diff --git a/.github/skills/startup-perf/SKILL.md b/.github/skills/startup-perf/SKILL.md index ee91301a425..8f53e1cc396 100644 --- a/.github/skills/startup-perf/SKILL.md +++ b/.github/skills/startup-perf/SKILL.md @@ -1,240 +1,204 @@ --- name: startup-perf -description: Measures Aspire application startup performance using dotnet-trace and the TraceAnalyzer tool. Use this when asked to measure impact of a code change on Aspire application startup performance. +description: Measures Aspire profiling with the OTEL startup harness, dashboard export, optional dotnet-trace traces, and optional MSBuild binlogs. --- -# Aspire Startup Performance Measurement +# Aspire Startup Profiling with OTEL -This skill provides patterns and practices for measuring .NET Aspire application startup performance using the `Measure-StartupPerformance.ps1` (Windows/PowerShell) or `measure-startup-performance.sh` (macOS/Linux) script and the companion `TraceAnalyzer` tool. +Use this skill when measuring, validating, or investigating Aspire startup performance with the OTEL profiling harness. -## Overview +The primary workflow is the PowerShell-free `eng/scripts/verify-startup-otel.sh` harness. A Windows PowerShell equivalent exists at `eng/scripts/verify-startup-otel.ps1` for parity checks. Both scripts run a throwaway AppHost through the Aspire CLI, enable profiling-only OTEL instrumentation, export dashboard telemetry, and validate that CLI, Hosting, and DCP startup spans are correlated in one trace. -The startup performance tooling collects `dotnet-trace` traces from an Aspire AppHost application and computes the startup duration from `AspireEventSource` events. Specifically, it measures the time between the `DcpModelCreationStart` (event ID 17) and `DcpModelCreationStop` (event ID 18) events emitted by the `Microsoft-Aspire-Hosting` EventSource provider. +## Current Profiling Model -**PowerShell Script (Windows)**: `tools/perf/Measure-StartupPerformance.ps1` -**Bash Script (macOS/Linux)**: `tools/perf/measure-startup-performance.sh` -**TraceAnalyzer Location**: `tools/perf/TraceAnalyzer/` -**Documentation**: `docs/getting-perf-traces.md` +Profiling is opt-in and separate from reported telemetry: -## Prerequisites +- Enable profiling with `ASPIRE_PROFILING_ENABLED=true` or `1`. +- CLI profiling spans use the `Aspire.Cli.Profiling` ActivitySource. +- Hosting profiling spans use the `Aspire.Hosting.Profiling` ActivitySource. +- DCP startup spans use the `dcp.startup` instrumentation scope. +- Reported telemetry must not carry profiling session IDs, high-cardinality profiling tags, or profiling spans. -### Windows +The older EventSource/dotnet-trace startup measurement scripts still exist, but they are a legacy fallback for explicit EventSource timing requests. Prefer the OTEL harness for current profiling work. -- PowerShell 7+ -- `dotnet-trace` global tool (`dotnet tool install -g dotnet-trace`) -- .NET SDK (restored via `./restore.cmd` or `./restore.sh`) +## Prerequisites -### macOS / Linux +The Bash harness runs on macOS/Linux with: -- Bash 4+ -- `dotnet-trace` global tool (`dotnet tool install -g dotnet-trace`) -- `python3` (for parsing `launchSettings.json`) -- .NET SDK (restored via `./restore.sh`) +- Bash +- `dotnet` +- `node` +- `curl` +- `unzip` +- `pgrep` and `ps` +- `dotnet-trace` only when using `--collect-dotnet-traces` -## Quick Start +From a clean checkout, the harness can restore and build the local CLI and bundle layout itself. Use `--skip-build` only when the CLI and bundle layout were already built in the same worktree. -### Single Measurement +The PowerShell harness runs on Windows with PowerShell 7+, `dotnet`, `node`/`npm`, and the usual Aspire CLI prerequisites. The Bash harness has the richer diagnostics path today (`--collect-dotnet-traces` and `--collect-dotnet-binlogs`); keep the shared validator in parity when updating either shell. -```powershell -# From repository root — measures the default TestShop.AppHost (Windows) -.\tools\perf\Measure-StartupPerformance.ps1 -``` +## Quick Start ```bash -# From repository root — measures the default TestShop.AppHost (macOS/Linux) -./tools/perf/measure-startup-performance.sh +# From repository root. Builds local CLI/layout if needed. +./eng/scripts/verify-startup-otel.sh ``` -### Multiple Iterations with Statistics - -```powershell -.\tools\perf\Measure-StartupPerformance.ps1 -Iterations 5 -``` +After a successful run, inspect the generated run root: ```bash -./tools/perf/measure-startup-performance.sh --iterations 5 +cat artifacts/tmp/startup-otel-harness/*/summary.json ``` -### Custom Project - -```powershell -.\tools\perf\Measure-StartupPerformance.ps1 -ProjectPath "path\to\MyApp.AppHost.csproj" -Iterations 3 -``` +For faster iteration after a successful local build: ```bash -./tools/perf/measure-startup-performance.sh --project-path path/to/MyApp.AppHost.csproj --iterations 3 +./eng/scripts/verify-startup-otel.sh --skip-build ``` -### Preserve Traces for Manual Analysis +Windows parity check: ```powershell -.\tools\perf\Measure-StartupPerformance.ps1 -Iterations 3 -PreserveTraces -TraceOutputDirectory "C:\traces" +.\eng\scripts\verify-startup-otel.ps1 -SkipBuild ``` +Collect sampled CPU traces and MSBuild binlogs: + ```bash -./tools/perf/measure-startup-performance.sh --iterations 3 --preserve-traces --trace-output-directory /tmp/traces +./eng/scripts/verify-startup-otel.sh --collect-dotnet-traces ``` -### Verbose Output - -```powershell -.\tools\perf\Measure-StartupPerformance.ps1 -Verbose -``` +Collect only MSBuild binlogs: ```bash -./tools/perf/measure-startup-performance.sh --verbose +./eng/scripts/verify-startup-otel.sh --collect-dotnet-binlogs ``` -## Parameters +Use a specific Aspire CLI, bundle layout, or DCP build: -| PowerShell Parameter | Bash Parameter | Default | Description | -|---------------------|----------------|---------|-------------| -| `-ProjectPath` | `--project-path` | TestShop.AppHost | Path to the AppHost `.csproj` to measure | -| `-Iterations` | `--iterations` | 1 | Number of measurement runs (1–100) | -| `-PreserveTraces` | `--preserve-traces` | false | Keep `.nettrace` files after analysis | -| `-TraceOutputDirectory` | `--trace-output-directory` | temp folder | Directory for preserved trace files | -| `-SkipBuild` | `--skip-build` | false | Skip `dotnet build` before running | -| `-TraceDurationSeconds` | `--trace-duration-seconds` | 60 | Maximum trace collection time (1–86400) | -| `-PauseBetweenIterationsSeconds` | `--pause-between-iterations-seconds` | 45 | Pause between iterations (0–3600) | -| `-Verbose` | `--verbose` | false | Show detailed output | - -## How It Works +```bash +./eng/scripts/verify-startup-otel.sh \ + --target-aspire-path artifacts/bin/Aspire.Cli/Debug/net10.0/aspire \ + --layout-path artifacts/bundle/osx-arm64 \ + --dcp-path path/to/dcp +``` -The script follows this sequence: +## Bash Harness Options -1. **Prerequisites check** — Verifies `dotnet-trace` is installed and the project exists. -2. **Build** — Builds the AppHost project in Release configuration (unless `-SkipBuild`). -3. **Build TraceAnalyzer** — Builds the companion `tools/perf/TraceAnalyzer` project. -4. **For each iteration:** - a. Locates the compiled executable (Arcade-style or traditional output paths). - b. Reads `launchSettings.json` for environment variables. - c. Launches the AppHost as a separate process. - d. Attaches `dotnet-trace` to the running process with the `Microsoft-Aspire-Hosting` provider. - e. Waits for the trace to complete (duration timeout or process exit). - f. Runs the TraceAnalyzer to extract the startup duration from the `.nettrace` file. - g. Cleans up processes. -5. **Reports results** — Prints per-iteration times and statistics (min, max, average, std dev). +| Option | Description | +| --- | --- | +| `--target-aspire-path PATH` | Aspire CLI under test. Alias: `--aspire-path`. | +| `--profiler-aspire-path PATH` | Aspire CLI used to host/export dashboard telemetry. Alias: `--dashboard-aspire-path`. | +| `--layout-path PATH` | Aspire bundle layout path. | +| `--dcp-path PATH` | DCP directory or binary path override. | +| `--output-root PATH` | Output root for harness artifacts. Defaults to `artifacts/tmp/startup-otel-harness`. | +| `--post-start-delay SECONDS` | Delay after AppHost start before stopping to allow extra telemetry to flush. | +| `--require-dcp-spans` | Require exported DCP process/resource spans in addition to CLI/Hosting spans. | +| `--collect-dotnet-traces` | Collect `.nettrace` files for the CLI and child .NET processes. Also enables MSBuild binlog collection. | +| `--collect-dotnet-binlogs` | Collect `.binlog` files for dotnet MSBuild commands. | +| `--skip-build` | Do not restore/build the local Aspire CLI or bundle layout. | -## TraceAnalyzer Tool +## PowerShell Harness Options -The `tools/perf/TraceAnalyzer` is a small .NET console app that parses `.nettrace` files using the `Microsoft.Diagnostics.Tracing.TraceEvent` library. +The PowerShell script intentionally mirrors the core validation knobs but does not duplicate the Bash-only process sampling path: -### What It Does +| Parameter | Description | +| --- | --- | +| `-TargetAspirePath PATH` | Aspire CLI under test. Alias: `-AspirePath`. | +| `-ProfilerAspirePath PATH` | Aspire CLI used to host/export dashboard telemetry. Alias: `-DashboardAspirePath`. | +| `-LayoutPath PATH` | Aspire bundle layout path. | +| `-DcpPath PATH` | DCP directory or binary path override. | +| `-OutputRoot PATH` | Output root for harness artifacts. Defaults to `artifacts\tmp\startup-otel-harness`. | +| `-PostStartDelaySeconds SECONDS` | Delay after AppHost start before stopping to allow extra telemetry to flush. | +| `-RequireDcpSpans` | Require exported DCP process/resource spans in addition to CLI/Hosting spans. | +| `-SkipBuild` | Do not restore/build the local Aspire CLI. | -- Opens the `.nettrace` file with `EventPipeEventSource` -- Listens for events from the `Microsoft-Aspire-Hosting` provider -- Extracts timestamps for `DcpModelCreationStart` (ID 17) and `DcpModelCreationStop` (ID 18) -- Outputs the duration in milliseconds (or `"null"` if events are not found) +## Output Artifacts -### Standalone Usage +Each run writes to: -```bash -dotnet run --project tools/perf/TraceAnalyzer -c Release -- +```text +artifacts/tmp/startup-otel-harness// ``` -## Understanding Output +Important files: -### Successful Run +| Path | Description | +| --- | --- | +| `summary.json` | Run summary with `ProfilingSessionId`, `TraceId`, `CorrelatedSpanCount`, paths, and optional trace/binlog file lists. | +| `span-summary.json` | Flattened exported span summary for quick inspection. | +| `startup-otel-export.zip` | Dashboard export containing trace JSON. | +| `logs/` | stdout/stderr for harness commands and child processes. | +| `workspace/` | Generated throwaway AppHost fixture. | +| `dotnet-traces/` | Optional `.nettrace` files from `--collect-dotnet-traces`. | +| `binlogs/` | Optional MSBuild `.binlog` files from trace/binlog collection. | -``` -================================================== - Aspire Startup Performance Measurement -================================================== - -Project: TestShop.AppHost -Iterations: 3 -... - -Iteration 1 ----------------------------------------- -Starting TestShop.AppHost... -Attaching trace collection to PID 12345... -Collecting performance trace... -Trace collection completed. -Analyzing trace: ... -Startup time: 1234.56 ms - -... - -================================================== - Results Summary -================================================== - -Iteration StartupTimeMs ---------- ------------- - 1 1234.56 - 2 1189.23 - 3 1201.45 - -Statistics: - Successful iterations: 3 / 3 - Minimum: 1189.23 ms - Maximum: 1234.56 ms - Average: 1208.41 ms - Std Dev: 18.92 ms -``` +## What the Harness Validates -### Common Issues +The shared C# file-based validator (`tools/StartupOtelValidator/ValidateStartupOtelExport.cs`) reads the dashboard export and requires a profiling session with correlated spans from: -| Symptom | Cause | Fix | -|---------|-------|-----| -| `dotnet-trace is not installed` | Missing global tool | Run `dotnet tool install -g dotnet-trace` | -| `Could not find compiled executable` | Project not built | Remove `-SkipBuild` or build manually | -| `Could not find DcpModelCreation events` | Trace too short or events not emitted | Increase `-TraceDurationSeconds` | -| `Application exited immediately` | App crash on startup | Check app logs, ensure dependencies are available | -| `dotnet-trace exited with code != 0` | Trace collection error | Check verbose output; trace file may still be valid | +- CLI startup/launch spans, including `aspire/cli/start_apphost.spawn_child`. +- Child CLI spans such as `aspire/cli/run`, dotnet build/run spans, backchannel connect spans, and dashboard URL retrieval. +- Hosting spans such as DCP model work, resource creation, resource wait, and DCP resource observation. +- Hosting-to-DCP trace links for created DCP objects. +- Resource wait events, including observed and completed events. +- DCP process/resource spans when `--require-dcp-spans` is specified. -## Comparing Before/After Performance +If validation fails, inspect `span-summary.json` first. Then use `startup-otel-export.zip` for the full dashboard export and `logs/` for process output. -To measure the impact of a code change: +## Comparing Before/After Changes -```powershell -# Windows: Measure baseline (on main branch) -git checkout main -.\tools\perf\Measure-StartupPerformance.ps1 -Iterations 5 -PreserveTraces -TraceOutputDirectory "C:\traces\baseline" +Prefer separate worktrees for baseline and feature measurements so branch switching does not disturb a dirty worktree. -# Windows: Measure with changes -git checkout my-feature-branch -.\tools\perf\Measure-StartupPerformance.ps1 -Iterations 5 -PreserveTraces -TraceOutputDirectory "C:\traces\feature" +```bash +# Baseline worktree +./eng/scripts/verify-startup-otel.sh --output-root artifacts/tmp/startup-otel-baseline --collect-dotnet-binlogs -# Compare the reported averages and std devs +# Feature worktree +./eng/scripts/verify-startup-otel.sh --output-root artifacts/tmp/startup-otel-feature --collect-dotnet-binlogs ``` -```bash -# macOS/Linux: Measure baseline (on main branch) -git checkout main -./tools/perf/measure-startup-performance.sh --iterations 5 --preserve-traces --trace-output-directory /tmp/traces/baseline +Compare: -# macOS/Linux: Measure with changes -git checkout my-feature-branch -./tools/perf/measure-startup-performance.sh --iterations 5 --preserve-traces --trace-output-directory /tmp/traces/feature +- `summary.json` for correlated span count and artifact paths. +- `span-summary.json` for span names, durations, operation IDs, process IDs, and events. +- `binlogs/` for MSBuild cost. +- `.nettrace` files when CPU sampling was collected. -# Compare the reported averages and std devs -``` +For statistically meaningful wall-clock comparisons, run multiple iterations manually and keep the environment stable. The OTEL harness validates correlation and produces artifacts; it is not a statistical benchmark runner by itself. -Use enough iterations (5+) and a consistent pause between iterations for reliable comparisons. +## Instrumentation Guidance -## Collecting Traces for Manual Analysis +Keep profiling APIs coarse-grained and profiling-specific: -If you need to inspect trace files manually (e.g., in PerfView or Visual Studio): +- Centralize raw `Activity`, activity names, tag names, and event names in the profiling telemetry type for the area (`Aspire.Cli.Profiling` or `Aspire.Hosting.Profiling`). +- Do not expose one public/internal method per tag. Prefer operation/result-level methods that accept the data for a phase and set multiple tags/events internally. +- Good API shape examples: start a dotnet process span with command, project, working directory, and options; record a process start result with started/process ID; record process completion with exit code and output counts; start a Kubernetes API span with operation/resource type; record retry details as one event method. +- Call sites should describe the operation being profiled, not know tag/event names. +- Do not add profiling tags/events to `Activity.Current` unless the current activity is known to be a profiling activity or profiling has explicitly wrapped it. +- Keep high-cardinality data out of reported telemetry. -```powershell -.\tools\perf\Measure-StartupPerformance.ps1 -PreserveTraces -TraceOutputDirectory "C:\my-traces" -``` +## Common Issues -```bash -./tools/perf/measure-startup-performance.sh --preserve-traces --trace-output-directory /tmp/my-traces -``` +| Symptom | Cause | Fix | +| --- | --- | --- | +| `Required command 'dotnet-trace' was not found` | `--collect-dotnet-traces` was used without the global tool. | Run `dotnet tool install -g dotnet-trace`. | +| `Target Aspire CLI not found` | CLI was not built or `--target-aspire-path` is wrong. | Omit `--skip-build` or pass the correct CLI path. | +| `No exported spans contained aspire.profiling.session_id` | Profiling was not enabled or telemetry was not exported. | Confirm `ASPIRE_PROFILING_ENABLED=true` and inspect `logs/`. | +| `No profiling session contained correlated... spans` | CLI/Hosting/DCP spans did not land in one correlated trace. | Inspect `span-summary.json` for missing scopes or broken parent/trace IDs. | +| `No dotnet-trace files were collected` | Trace collection was requested but no traceable child process was found or attach failed. | Inspect `logs/dotnet-trace-*.stderr.txt`; rerun with a longer `--post-start-delay`. | +| `No dotnet MSBuild binlogs were collected` | Binlog collection was requested but no MSBuild-backed dotnet command ran. | Inspect `logs/start.*`; rerun without `--skip-build` if necessary. | -See `docs/getting-perf-traces.md` for guidance on analyzing traces with PerfView or `dotnet trace report`. +## Legacy EventSource Tooling -## EventSource Provider Details +Use the legacy perf scripts only when the task explicitly asks for `Microsoft-Aspire-Hosting` EventSource timing or the `DcpModelCreationStart`/`DcpModelCreationStop` duration: -The `Microsoft-Aspire-Hosting` EventSource emits events for key Aspire lifecycle milestones. The startup performance script focuses on: +```bash +./tools/perf/measure-startup-performance.sh +``` -| Event ID | Event Name | Description | -|----------|------------|-------------| -| 17 | `DcpModelCreationStart` | Marks the beginning of DCP model creation | -| 18 | `DcpModelCreationStop` | Marks the completion of DCP model creation | +```powershell +.\tools\perf\Measure-StartupPerformance.ps1 +``` -The measured startup time is the wall-clock difference between these two events, representing the time to create all application services and supporting dependencies. +These scripts collect `dotnet-trace` EventSource data and analyze it with `tools/perf/TraceAnalyzer`. They do not validate OTEL span correlation. diff --git a/eng/scripts/verify-startup-otel.ps1 b/eng/scripts/verify-startup-otel.ps1 new file mode 100644 index 00000000000..42eadc2f4e5 --- /dev/null +++ b/eng/scripts/verify-startup-otel.ps1 @@ -0,0 +1,431 @@ +param( + [Alias("AspirePath")] + [string]$TargetAspirePath, + + [Alias("DashboardAspirePath")] + [string]$ProfilerAspirePath, + + [string]$LayoutPath, + [string]$DcpPath, + [string]$OutputRoot, + [switch]$RequireDcpSpans, + [int]$PostStartDelaySeconds = 0, + [switch]$SkipBuild +) + +Set-StrictMode -Version 3.0 +$ErrorActionPreference = "Stop" + +$repoRoot = Resolve-Path (Join-Path $PSScriptRoot "..\..") + +if (-not $OutputRoot) { + $OutputRoot = Join-Path $repoRoot "artifacts\tmp\startup-otel-harness" +} + +$runId = Get-Date -Format "yyyyMMdd-HHmmss" +$runRoot = Join-Path $OutputRoot $runId +$workspace = Join-Path $runRoot "workspace" +$projectDir = Join-Path $workspace "StartupOtelHarness" +$logsDir = Join-Path $runRoot "logs" +$exportDir = Join-Path $runRoot "export" +$exportZip = Join-Path $runRoot "startup-otel-export.zip" +$spanSummaryPath = Join-Path $runRoot "span-summary.json" + +New-Item -ItemType Directory -Path $workspace, $logsDir, $exportDir -Force | Out-Null + +function Write-Step($message) { + Write-Host "==> $message" +} + +function Get-FreeTcpPort { + $listener = [System.Net.Sockets.TcpListener]::new([System.Net.IPAddress]::Loopback, 0) + try { + $listener.Start() + return $listener.LocalEndpoint.Port + } + finally { + $listener.Stop() + } +} + +function Invoke-LoggedCommand { + param( + [Parameter(Mandatory = $true)] + [string]$FilePath, + + [string[]]$Arguments = @(), + + [Parameter(Mandatory = $true)] + [string]$WorkingDirectory, + + [Parameter(Mandatory = $true)] + [string]$Name, + + [switch]$AllowFailure + ) + + $stdoutPath = Join-Path $logsDir "$Name.stdout.txt" + $stderrPath = Join-Path $logsDir "$Name.stderr.txt" + + Push-Location $WorkingDirectory + try { + & $FilePath @Arguments > $stdoutPath 2> $stderrPath + $exitCode = $LASTEXITCODE + } + finally { + Pop-Location + } + + if ($exitCode -ne 0 -and -not $AllowFailure) { + $stderr = if (Test-Path $stderrPath) { Get-Content $stderrPath -Raw } else { "" } + $stdout = if (Test-Path $stdoutPath) { Get-Content $stdoutPath -Raw } else { "" } + throw @" +Command failed ($exitCode): $FilePath $($Arguments -join ' ') +stdout: $stdoutPath +$stdout +stderr: $stderrPath +$stderr +"@ + } + + return [pscustomobject]@{ + ExitCode = $exitCode + Stdout = $stdoutPath + Stderr = $stderrPath + } +} + +function Wait-HttpReady { + param( + [Parameter(Mandatory = $true)] + [string]$Url, + + [int]$TimeoutSeconds = 60 + ) + + $deadline = [DateTimeOffset]::UtcNow.AddSeconds($TimeoutSeconds) + do { + try { + $response = Invoke-WebRequest -Uri $Url -UseBasicParsing -TimeoutSec 2 + if ($response.StatusCode -ge 200 -and $response.StatusCode -lt 500) { + return + } + } + catch { + Start-Sleep -Milliseconds 500 + } + } while ([DateTimeOffset]::UtcNow -lt $deadline) + + throw "Timed out waiting for $Url" +} + +function Get-ChildProcessIds { + param( + [Parameter(Mandatory = $true)] + [int]$ParentProcessId + ) + + $children = @(Get-CimInstance Win32_Process -Filter "ParentProcessId=$ParentProcessId" -ErrorAction SilentlyContinue) + foreach ($child in $children) { + Get-ChildProcessIds -ParentProcessId $child.ProcessId + $child.ProcessId + } +} + +function Stop-ProcessTree { + param( + [Parameter(Mandatory = $true)] + [System.Diagnostics.Process]$Process + ) + + $processIds = @(Get-ChildProcessIds -ParentProcessId $Process.Id) + $Process.Id + foreach ($processId in $processIds | Select-Object -Unique) { + try { + $target = Get-Process -Id $processId -ErrorAction SilentlyContinue + if ($target) { + Stop-Process -Id $processId -Force + } + } + catch { + Write-Verbose "Failed to stop process ${processId}: $_" + } + } +} + +function Set-ProcessEnvironmentVariable { + param( + [Parameter(Mandatory = $true)] + [string]$Name, + + [string]$Value + ) + + $previous = [Environment]::GetEnvironmentVariable($Name, "Process") + [Environment]::SetEnvironmentVariable($Name, $Value, "Process") + return [pscustomobject]@{ + Name = $Name + Previous = $previous + } +} + +function Restore-ProcessEnvironmentVariables { + param( + [Parameter(Mandatory = $true)] + [object[]]$Variables + ) + + foreach ($variable in $Variables) { + [Environment]::SetEnvironmentVariable($variable.Name, $variable.Previous, "Process") + } +} + +if (-not $TargetAspirePath) { + $TargetAspirePath = Join-Path $repoRoot "artifacts\bin\Aspire.Cli\Debug\net10.0\aspire.exe" +} + +if (-not $ProfilerAspirePath) { + $profilerAspireCommand = Get-Command aspire -ErrorAction SilentlyContinue + if ($profilerAspireCommand) { + $ProfilerAspirePath = $profilerAspireCommand.Source + } + else { + $ProfilerAspirePath = $TargetAspirePath + } +} + +if (-not $LayoutPath -and $ProfilerAspirePath) { + $profilerAspireDirectory = Split-Path $ProfilerAspirePath -Parent + $candidateLayoutPath = Split-Path $profilerAspireDirectory -Parent + if ($candidateLayoutPath -and (Test-Path (Join-Path $candidateLayoutPath "bundle"))) { + $LayoutPath = $candidateLayoutPath + } +} + +if (-not $SkipBuild) { + Write-Step "Building local Aspire CLI" + Invoke-LoggedCommand -FilePath (Join-Path $repoRoot "restore.cmd") -Arguments @() -WorkingDirectory $repoRoot -Name "restore" | Out-Null + Invoke-LoggedCommand -FilePath "dotnet" -Arguments @("build", "src\Aspire.Cli\Aspire.Cli.csproj", "--no-restore") -WorkingDirectory $repoRoot -Name "build-aspire-cli" | Out-Null +} + +if (-not (Test-Path $TargetAspirePath)) { + throw "Target Aspire CLI not found at $TargetAspirePath" +} + +if (-not (Test-Path $ProfilerAspirePath)) { + throw "Profiler Aspire CLI not found at $ProfilerAspirePath" +} + +$dashboardPort = Get-FreeTcpPort +$otlpGrpcPort = Get-FreeTcpPort +$otlpHttpPort = Get-FreeTcpPort +$dashboardUrl = "http://localhost:$dashboardPort" +$otlpGrpcUrl = "http://localhost:$otlpGrpcPort" +$otlpHttpUrl = "http://localhost:$otlpHttpPort" + +$dashboardStdout = Join-Path $logsDir "dashboard.stdout.txt" +$dashboardStderr = Join-Path $logsDir "dashboard.stderr.txt" +$dashboardArgs = @( + "dashboard", + "run", + "--frontend-url", + $dashboardUrl, + "--otlp-grpc-url", + $otlpGrpcUrl, + "--otlp-http-url", + $otlpHttpUrl, + "--allow-anonymous" +) + +$environmentSnapshot = @() +if ($LayoutPath) { + Write-Step "Using Aspire bundle layout at $LayoutPath" + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "ASPIRE_LAYOUT_PATH" -Value $LayoutPath +} + +Write-Step "Starting standalone dashboard at $dashboardUrl" +$dashboardProcess = Start-Process -FilePath $ProfilerAspirePath -ArgumentList $dashboardArgs -WorkingDirectory $runRoot -RedirectStandardOutput $dashboardStdout -RedirectStandardError $dashboardStderr -PassThru -WindowStyle Hidden + +try { + Wait-HttpReady -Url $dashboardUrl -TimeoutSeconds 90 + + Write-Step "Configuring CLI diagnostic OTLP export to $otlpGrpcUrl" + # Keep both forms: OTEL_* configures CLI/OpenTelemetry exporters, while ASPIRE_OTEL_* is + # projected into AppHost IConfiguration as OTEL_* by DistributedApplicationBuilder. + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "ASPIRE_CLI_TELEMETRY_OPTOUT" -Value "true" + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "ASPIRE_PROFILING_ENABLED" -Value "true" + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "ASPIRE_STARTUP_PROFILING_ENABLED" -Value "true" + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "OTEL_EXPORTER_OTLP_ENDPOINT" -Value $otlpGrpcUrl + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "OTEL_EXPORTER_OTLP_PROTOCOL" -Value "grpc" + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "ASPIRE_OTEL_EXPORTER_OTLP_ENDPOINT" -Value $otlpGrpcUrl + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "ASPIRE_OTEL_EXPORTER_OTLP_PROTOCOL" -Value "grpc" + + if ($DcpPath) { + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "ASPIRE_DCP_PATH" -Value $DcpPath + } + + Write-Step "Creating TypeScript AppHost fixture" + $serviceDir = Join-Path $projectDir "service" + New-Item -ItemType Directory -Path $projectDir, $serviceDir -Force | Out-Null + $appHostDashboardPort = Get-FreeTcpPort + $appHostOtlpGrpcPort = Get-FreeTcpPort + $appHostResourceServicePort = Get-FreeTcpPort + + @" +{ + "appHost": { + "path": "apphost.ts", + "language": "typescript/nodejs" + }, + "profiles": { + "https": { + "applicationUrl": "http://localhost:$appHostDashboardPort", + "environmentVariables": { + "ASPIRE_ALLOW_UNSECURED_TRANSPORT": "true", + "ASPIRE_DASHBOARD_OTLP_ENDPOINT_URL": "http://localhost:$appHostOtlpGrpcPort", + "ASPIRE_RESOURCE_SERVICE_ENDPOINT_URL": "http://localhost:$appHostResourceServicePort", + "ASPIRE_PROFILING_ENABLED": "true", + "ASPIRE_STARTUP_PROFILING_ENABLED": "true", + "OTEL_EXPORTER_OTLP_ENDPOINT": "$otlpGrpcUrl", + "OTEL_EXPORTER_OTLP_PROTOCOL": "grpc", + "ASPIRE_OTEL_EXPORTER_OTLP_ENDPOINT": "$otlpGrpcUrl", + "ASPIRE_OTEL_EXPORTER_OTLP_PROTOCOL": "grpc" + } + } + } +} +"@ | Set-Content -Path (Join-Path $projectDir "aspire.config.json") -Encoding UTF8 + + @' +{ + "name": "startupotelharness", + "private": true, + "type": "module", + "dependencies": { + "vscode-jsonrpc": "^8.2.0" + }, + "devDependencies": { + "@types/node": "^22.0.0", + "tsx": "^4.21.0", + "typescript": "^5.9.3" + } +} +'@ | Set-Content -Path (Join-Path $projectDir "package.json") -Encoding UTF8 + + @' +{ + "compilerOptions": { + "target": "ES2022", + "module": "NodeNext", + "moduleResolution": "NodeNext", + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "strict": true, + "skipLibCheck": true, + "outDir": "./dist/apphost", + "rootDir": "." + }, + "include": ["apphost.ts", ".modules/**/*.ts"], + "exclude": ["node_modules"] +} +'@ | Set-Content -Path (Join-Path $projectDir "tsconfig.apphost.json") -Encoding UTF8 + + @' +import http from 'node:http'; + +const port = Number(process.env.PORT ?? '0'); +const server = http.createServer((request, response) => { + response.writeHead(200, { 'content-type': 'text/plain' }); + response.end('startup otel harness'); +}); + +server.listen(port, '127.0.0.1', () => { + console.log(`startup otel harness listening on ${port}`); +}); + +process.on('SIGTERM', () => { + server.close(() => process.exit(0)); +}); +'@ | Set-Content -Path (Join-Path $serviceDir "server.js") -Encoding UTF8 + + @' +import { createBuilder } from './.modules/aspire.js'; + +const builder = await createBuilder(); + +const worker = await builder.addExecutable("worker", "node", "./service", ["server.js"]); +await worker.withHttpEndpoint({ env: "PORT" }); + +const dependent = await builder.addExecutable("dependent", "node", "./service", ["server.js"]); +await dependent.withHttpEndpoint({ env: "PORT" }); +await dependent.waitFor(worker); + +await builder.build().run(); +'@ | Set-Content -Path (Join-Path $projectDir "apphost.ts") -Encoding UTF8 + + $appHostPath = Join-Path $projectDir "apphost.ts" + + Write-Step "Restoring TypeScript AppHost fixture" + Invoke-LoggedCommand -FilePath $TargetAspirePath -Arguments @("restore", "--apphost", $appHostPath) -WorkingDirectory $projectDir -Name "restore-ts-apphost" | Out-Null + + Write-Step "Starting TypeScript AppHost with telemetry export enabled" + $startResult = Invoke-LoggedCommand -FilePath $TargetAspirePath -Arguments @("start", "--isolated", "--format", "Json", "--apphost", $appHostPath) -WorkingDirectory $projectDir -Name "start" + + if ($PostStartDelaySeconds -gt 0) { + Write-Step "Waiting ${PostStartDelaySeconds}s for profiling telemetry to flush" + Start-Sleep -Seconds $PostStartDelaySeconds + } + + Write-Step "Stopping TypeScript AppHost" + Invoke-LoggedCommand -FilePath $TargetAspirePath -Arguments @("stop", "--apphost", $appHostPath) -WorkingDirectory $projectDir -Name "stop" | Out-Null + + Start-Sleep -Seconds 3 + + Write-Step "Exporting standalone dashboard telemetry" + Invoke-LoggedCommand -FilePath $TargetAspirePath -Arguments @("export", "--dashboard-url", $dashboardUrl, "--include-hidden", "--output", $exportZip) -WorkingDirectory $runRoot -Name "export" | Out-Null + + if (-not (Test-Path $exportZip)) { + throw "Export zip was not created: $exportZip" + } + + Expand-Archive -Path $exportZip -DestinationPath $exportDir -Force + + $requireDcpSpansValue = if ($RequireDcpSpans) { "true" } else { "false" } + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "REQUIRE_DCP_SPANS" -Value $requireDcpSpansValue + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "EXPORT_DIR" -Value $exportDir + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "SPAN_SUMMARY_PATH" -Value $spanSummaryPath + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "RUN_ROOT" -Value $runRoot + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "TARGET_ASPIRE_PATH" -Value (Resolve-Path $TargetAspirePath).Path + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "PROFILER_ASPIRE_PATH" -Value (Resolve-Path $ProfilerAspirePath).Path + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "POST_START_DELAY_SECONDS" -Value $PostStartDelaySeconds.ToString() + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "DASHBOARD_URL" -Value $dashboardUrl + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "OTLP_GRPC_URL" -Value $otlpGrpcUrl + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "OTLP_HTTP_URL" -Value $otlpHttpUrl + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "APPHOST_PATH" -Value $appHostPath + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "START_JSON_PATH" -Value $startResult.Stdout + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "EXPORT_ZIP" -Value $exportZip + if ($LayoutPath) { + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "LAYOUT_PATH" -Value $LayoutPath + } + if ($DcpPath) { + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "DCP_PATH" -Value $DcpPath + } + + Write-Step "Validating startup OTEL export" + $environmentSnapshot += Set-ProcessEnvironmentVariable -Name "MSBUILDTERMINALLOGGER" -Value "false" + & dotnet run (Join-Path $repoRoot "tools\StartupOtelValidator\ValidateStartupOtelExport.cs") + if ($LASTEXITCODE -ne 0) { + throw "Startup OTEL export validation failed. See $spanSummaryPath" + } + + Write-Step "Startup OTEL harness passed" +} +finally { + if ($environmentSnapshot.Count -gt 0) { + Restore-ProcessEnvironmentVariables -Variables $environmentSnapshot + } + + if ($dashboardProcess -and -not $dashboardProcess.HasExited) { + Write-Step "Stopping standalone dashboard" + Stop-ProcessTree -Process $dashboardProcess + } +} diff --git a/eng/scripts/verify-startup-otel.sh b/eng/scripts/verify-startup-otel.sh new file mode 100755 index 00000000000..d951e6460f0 --- /dev/null +++ b/eng/scripts/verify-startup-otel.sh @@ -0,0 +1,788 @@ +#!/usr/bin/env bash + +set -euo pipefail + +show_help() { + cat <<'EOF' +Usage: verify-startup-otel.sh [OPTIONS] + +Runs the startup OTEL verification harness without PowerShell. + +Options: + --target-aspire-path PATH Aspire CLI under test. Alias: --aspire-path + --profiler-aspire-path PATH Aspire CLI used to host/export from the dashboard. + Alias: --dashboard-aspire-path + --layout-path PATH Aspire bundle layout path. + --dcp-path PATH DCP directory or binary path override. + --output-root PATH Output root for harness artifacts. + --post-start-delay SECONDS Delay after AppHost start before stopping. + --require-dcp-spans Require exported DCP process/resource spans. + --collect-dotnet-traces Collect dotnet-trace .nettrace files for the CLI and child .NET processes. + Also enables MSBuild .binlog collection. + --collect-dotnet-binlogs Collect MSBuild .binlog files for dotnet MSBuild commands. + --skip-build Do not restore/build the local Aspire CLI or bundle layout. + -h, --help Show this help. +EOF +} + +repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd -P)" + +target_aspire_path="" +profiler_aspire_path="" +layout_path="" +layout_path_explicit=false +dcp_path="" +output_root="" +post_start_delay_seconds=0 +require_dcp_spans=false +collect_dotnet_traces=false +collect_dotnet_binlogs=false +skip_build=false +dashboard_pid="" +dotnet_trace_dir="" +dotnet_trace_path="" +dotnet_binlog_dir="" +active_dotnet_trace_pids="" +active_dotnet_trace_target_pids="" + +write_step() { + echo "==> $1" +} + +fail() { + echo "error: $1" >&2 + exit 1 +} + +require_value() { + local option="$1" + local value="${2:-}" + if [[ -z "$value" ]]; then + fail "Option '$option' requires a non-empty value." + fi +} + +require_command() { + local command_name="$1" + if ! command -v "$command_name" >/dev/null 2>&1; then + fail "Required command '$command_name' was not found on PATH." + fi +} + +resolve_dotnet_trace() { + if command -v dotnet-trace >/dev/null 2>&1; then + command -v dotnet-trace + elif [[ -x "$HOME/.dotnet/tools/dotnet-trace" ]]; then + printf '%s\n' "$HOME/.dotnet/tools/dotnet-trace" + else + fail "Required command 'dotnet-trace' was not found. Install it with: dotnet tool install -g dotnet-trace" + fi +} + +resolve_existing_path() { + local path="$1" + if [[ -d "$path" ]]; then + (cd "$path" && pwd -P) + else + local directory + local file_name + directory="$(dirname "$path")" + file_name="$(basename "$path")" + (cd "$directory" && printf '%s/%s\n' "$(pwd -P)" "$file_name") + fi +} + +get_free_tcp_port() { + node -e "const net = require('node:net'); const server = net.createServer(); server.listen(0, '127.0.0.1', () => { console.log(server.address().port); server.close(); });" +} + +get_current_rid() { + local os_name + local architecture + + case "$(uname -s)" in + Darwin) + os_name="osx" + ;; + Linux) + os_name="linux" + ;; + *) + return 1 + ;; + esac + + case "$(uname -m)" in + arm64|aarch64) + architecture="arm64" + ;; + x86_64|amd64) + architecture="x64" + ;; + *) + return 1 + ;; + esac + + printf '%s-%s\n' "$os_name" "$architecture" +} + +invoke_logged_command() { + local name="$1" + local working_directory="$2" + shift 2 + + local stdout_path="$logs_dir/$name.stdout.txt" + local stderr_path="$logs_dir/$name.stderr.txt" + local exit_code + + set +e + ( + cd "$working_directory" && "$@" + ) >"$stdout_path" 2>"$stderr_path" + exit_code=$? + set -e + + if [[ $exit_code -ne 0 ]]; then + { + echo "Command failed ($exit_code): $*" + echo "stdout: $stdout_path" + cat "$stdout_path" + echo "stderr: $stderr_path" + cat "$stderr_path" + } >&2 + exit "$exit_code" + fi + + printf '%s\n' "$stdout_path" +} + +sanitize_file_name() { + printf '%s' "$1" | tr -cs '[:alnum:]_.-' '-' | sed -e 's/^-//' -e 's/-$//' | cut -c 1-80 +} + +contains_value() { + local value="$1" + shift + + local item + for item in "$@"; do + if [[ "$item" == "$value" ]]; then + return 0 + fi + done + + return 1 +} + +stop_active_dotnet_traces() { + if [[ -z "$active_dotnet_trace_pids" ]]; then + return + fi + + local trace_pid + for trace_pid in $active_dotnet_trace_pids; do + if [[ -n "$trace_pid" ]] && kill -0 "$trace_pid" 2>/dev/null; then + kill -TERM "$trace_pid" 2>/dev/null || true + fi + done + + for trace_pid in $active_dotnet_trace_pids; do + if [[ -n "$trace_pid" ]]; then + wait "$trace_pid" 2>/dev/null || true + fi + done + + active_dotnet_trace_pids="" + active_dotnet_trace_target_pids="" +} + +start_dotnet_trace_for_pid() { + local target_process_id="$1" + local name="$2" + + if ! kill -0 "$target_process_id" 2>/dev/null; then + return + fi + + if contains_value "$target_process_id" $active_dotnet_trace_target_pids; then + return + fi + + mkdir -p "$dotnet_trace_dir" + + local sanitized_name + sanitized_name="$(sanitize_file_name "$name")" + if [[ -z "$sanitized_name" ]]; then + sanitized_name="process" + fi + + local trace_path="$dotnet_trace_dir/$sanitized_name-$target_process_id.nettrace" + local stdout_path="$logs_dir/dotnet-trace-$sanitized_name-$target_process_id.stdout.txt" + local stderr_path="$logs_dir/dotnet-trace-$sanitized_name-$target_process_id.stderr.txt" + + "$dotnet_trace_path" collect \ + --process-id "$target_process_id" \ + --profile dotnet-sampled-thread-time \ + --format nettrace \ + --output "$trace_path" \ + >"$stdout_path" \ + 2>"$stderr_path" & + + active_dotnet_trace_pids="${active_dotnet_trace_pids:+$active_dotnet_trace_pids }$!" + active_dotnet_trace_target_pids="${active_dotnet_trace_target_pids:+$active_dotnet_trace_target_pids }$target_process_id" +} + +is_traceable_dotnet_process() { + local process_id="$1" + local command_name + local command_line + + command_name="$(ps -p "$process_id" -o comm= 2>/dev/null || true)" + command_line="$(ps -p "$process_id" -o command= 2>/dev/null || true)" + + case "$(basename "$command_name")" in + dotnet|dotnet.exe|StartupOtelHarness|StartupOtelHarness.exe) + return 0 + ;; + esac + + [[ "$command_line" == *"dotnet"* || "$command_line" == *"StartupOtelHarness"* ]] +} + +trace_descendant_dotnet_processes() { + local root_process_id="$1" + local process_id + + while IFS= read -r process_id; do + if [[ -z "$process_id" ]]; then + continue + fi + + if is_traceable_dotnet_process "$process_id"; then + local command_name + command_name="$(ps -p "$process_id" -o comm= 2>/dev/null || true)" + start_dotnet_trace_for_pid "$process_id" "child-$(basename "$command_name")" + fi + done < <(get_child_process_ids "$root_process_id") +} + +invoke_start_with_dotnet_traces() { + local stdout_path="$logs_dir/start.stdout.txt" + local stderr_path="$logs_dir/start.stderr.txt" + local exit_code + + dotnet_trace_dir="$run_root/dotnet-traces" + mkdir -p "$dotnet_trace_dir" + + write_step "Collecting dotnet-trace files in $dotnet_trace_dir" >&2 + + set +e + ( + cd "$project_dir" && exec env "${startup_env[@]}" "$target_aspire_path" start --format Json --apphost "$apphost_path" + ) >"$stdout_path" 2>"$stderr_path" & + local start_process_id=$! + set -e + + start_dotnet_trace_for_pid "$start_process_id" "aspire-start-cli" + + while kill -0 "$start_process_id" 2>/dev/null; do + trace_descendant_dotnet_processes "$start_process_id" + sleep 0.05 + done + + set +e + wait "$start_process_id" + exit_code=$? + set -e + + if [[ $exit_code -ne 0 ]]; then + stop_active_dotnet_traces + { + echo "Command failed ($exit_code): $target_aspire_path start --format Json --apphost $apphost_path" + echo "stdout: $stdout_path" + cat "$stdout_path" + echo "stderr: $stderr_path" + cat "$stderr_path" + } >&2 + exit "$exit_code" + fi + + local apphost_process_id + apphost_process_id="$(node -e "const fs = require('node:fs'); const p = process.argv[1]; const value = JSON.parse(fs.readFileSync(p, 'utf8')).appHostPid; if (value) console.log(value);" "$stdout_path" 2>/dev/null || true)" + if [[ -n "$apphost_process_id" ]] && kill -0 "$apphost_process_id" 2>/dev/null; then + start_dotnet_trace_for_pid "$apphost_process_id" "apphost" + fi + + start_stdout="$stdout_path" +} + +wait_http_ready() { + local url="$1" + local timeout_seconds="${2:-60}" + local deadline=$((SECONDS + timeout_seconds)) + + while (( SECONDS < deadline )); do + local status + status="$(curl -s -o /dev/null -w '%{http_code}' --max-time 2 "$url" 2>/dev/null || true)" + if [[ "$status" =~ ^[0-9]+$ ]] && (( status >= 200 && status < 500 )); then + return + fi + + sleep 0.5 + done + + fail "Timed out waiting for $url." +} + +get_child_process_ids() { + local parent_process_id="$1" + local child_process_id + + while IFS= read -r child_process_id; do + if [[ -n "$child_process_id" ]]; then + get_child_process_ids "$child_process_id" + printf '%s\n' "$child_process_id" + fi + done < <(pgrep -P "$parent_process_id" 2>/dev/null || true) +} + +stop_process_tree() { + local process_id="$1" + local process_ids=() + local child_process_id + + while IFS= read -r child_process_id; do + if [[ -n "$child_process_id" ]]; then + process_ids+=("$child_process_id") + fi + done < <(get_child_process_ids "$process_id") + + process_ids+=("$process_id") + + for process_id in "${process_ids[@]}"; do + if kill -0 "$process_id" 2>/dev/null; then + kill "$process_id" 2>/dev/null || true + fi + done + + sleep 2 + + for process_id in "${process_ids[@]}"; do + if kill -0 "$process_id" 2>/dev/null; then + kill -9 "$process_id" 2>/dev/null || true + fi + done +} + +cleanup() { + local exit_code=$? + + if [[ "$collect_dotnet_traces" == true ]]; then + stop_active_dotnet_traces + fi + + if [[ -n "${dashboard_pid:-}" ]] && kill -0 "$dashboard_pid" 2>/dev/null; then + write_step "Stopping standalone dashboard" + stop_process_tree "$dashboard_pid" + fi + + exit "$exit_code" +} + +trap cleanup EXIT + +while [[ $# -gt 0 ]]; do + case "$1" in + --target-aspire-path|--aspire-path) + require_value "$1" "${2:-}" + target_aspire_path="$2" + shift 2 + ;; + --profiler-aspire-path|--dashboard-aspire-path) + require_value "$1" "${2:-}" + profiler_aspire_path="$2" + shift 2 + ;; + --layout-path) + require_value "$1" "${2:-}" + layout_path="$2" + layout_path_explicit=true + shift 2 + ;; + --dcp-path) + require_value "$1" "${2:-}" + dcp_path="$2" + shift 2 + ;; + --output-root) + require_value "$1" "${2:-}" + output_root="$2" + shift 2 + ;; + --post-start-delay) + require_value "$1" "${2:-}" + post_start_delay_seconds="$2" + shift 2 + ;; + --require-dcp-spans) + require_dcp_spans=true + shift + ;; + --collect-dotnet-traces) + collect_dotnet_traces=true + shift + ;; + --collect-dotnet-binlogs) + collect_dotnet_binlogs=true + shift + ;; + --skip-build) + skip_build=true + shift + ;; + -h|--help) + show_help + exit 0 + ;; + *) + fail "Unknown option '$1'. Use --help for usage information." + ;; + esac +done + +require_command curl +require_command dotnet +require_command node +require_command pgrep +require_command ps +require_command unzip + +if [[ "$collect_dotnet_traces" == true ]]; then + collect_dotnet_binlogs=true + dotnet_trace_path="$(resolve_dotnet_trace)" +fi + +if [[ ! "$post_start_delay_seconds" =~ ^[0-9]+$ ]]; then + fail "--post-start-delay must be a non-negative integer." +fi + +current_rid="$(get_current_rid || true)" + +if [[ -z "$output_root" ]]; then + output_root="$repo_root/artifacts/tmp/startup-otel-harness" +fi + +run_id="$(date +%Y%m%d-%H%M%S)" +run_root="$output_root/$run_id" +workspace="$run_root/workspace" +project_dir="$workspace/StartupOtelHarness" +logs_dir="$run_root/logs" +export_dir="$run_root/export" +export_zip="$run_root/startup-otel-export.zip" +span_summary_path="$run_root/span-summary.json" + +mkdir -p "$workspace" "$logs_dir" "$export_dir" + +if [[ "$collect_dotnet_binlogs" == true ]]; then + dotnet_binlog_dir="$run_root/binlogs" + mkdir -p "$dotnet_binlog_dir" +fi + +if [[ -z "$target_aspire_path" ]]; then + target_aspire_path="$repo_root/artifacts/bin/Aspire.Cli/Debug/net10.0/aspire" +fi + +if [[ -z "$profiler_aspire_path" ]]; then + profiler_aspire_path="$target_aspire_path" +fi + +if [[ -z "$layout_path" && -n "$current_rid" && -d "$repo_root/artifacts/bundle/$current_rid" ]]; then + layout_path="$repo_root/artifacts/bundle/$current_rid" +fi + +if [[ -z "$layout_path" && -n "$profiler_aspire_path" ]]; then + profiler_aspire_directory="$(dirname "$profiler_aspire_path")" + candidate_layout_path="$(cd "$profiler_aspire_directory/.." 2>/dev/null && pwd -P || true)" + if [[ -n "$candidate_layout_path" && ( -d "$candidate_layout_path/bundle" || ( -d "$candidate_layout_path/managed" && -d "$candidate_layout_path/dcp" ) ) ]]; then + layout_path="$candidate_layout_path" + fi +fi + +if [[ "$skip_build" == false ]]; then + if [[ -z "$current_rid" ]]; then + fail "Could not determine the current runtime identifier for bundle layout build." + fi + + write_step "Building local Aspire CLI" + invoke_logged_command "restore" "$repo_root" "$repo_root/restore.sh" >/dev/null + invoke_logged_command "build-bundle-layout" "$repo_root" dotnet msbuild "$repo_root/eng/Bundle.proj" /t:Build /p:TargetRid="$current_rid" /p:SkipNativeBuild=true >/dev/null + invoke_logged_command "build-aspire-cli" "$repo_root" dotnet build "$repo_root/src/Aspire.Cli/Aspire.Cli.csproj" --no-restore >/dev/null + + if [[ "$layout_path_explicit" == false && -d "$repo_root/artifacts/bundle/$current_rid" ]]; then + layout_path="$repo_root/artifacts/bundle/$current_rid" + fi +fi + +if [[ ! -f "$target_aspire_path" ]]; then + fail "Target Aspire CLI not found at $target_aspire_path." +fi + +if [[ ! -f "$profiler_aspire_path" ]]; then + fail "Profiler Aspire CLI not found at $profiler_aspire_path." +fi + +target_aspire_path="$(resolve_existing_path "$target_aspire_path")" +profiler_aspire_path="$(resolve_existing_path "$profiler_aspire_path")" + +if [[ -n "$layout_path" ]]; then + layout_path="$(resolve_existing_path "$layout_path")" + write_step "Using Aspire bundle layout at $layout_path" +fi + +if [[ -n "$dcp_path" ]]; then + dcp_path="$(resolve_existing_path "$dcp_path")" + if [[ -f "$dcp_path" ]]; then + dcp_path="$(dirname "$dcp_path")" + fi + + dcp_binary_path="$dcp_path/dcp" + if [[ ! -f "$dcp_binary_path" && -f "$dcp_binary_path.exe" ]]; then + dcp_binary_path="$dcp_binary_path.exe" + fi + if [[ ! -f "$dcp_binary_path" ]]; then + fail "DCP executable not found under $dcp_path." + fi +fi + +dashboard_port="$(get_free_tcp_port)" +otlp_grpc_port="$(get_free_tcp_port)" +otlp_http_port="$(get_free_tcp_port)" +dashboard_url="http://localhost:$dashboard_port" +otlp_grpc_url="http://localhost:$otlp_grpc_port" +otlp_http_url="http://localhost:$otlp_http_port" + +dashboard_stdout="$logs_dir/dashboard.stdout.txt" +dashboard_stderr="$logs_dir/dashboard.stderr.txt" +dashboard_env=() +layout_dcp_path="" +layout_managed_path="" +if [[ -n "$layout_path" ]]; then + dashboard_env+=("ASPIRE_LAYOUT_PATH=$layout_path") + + if [[ -d "$layout_path/dcp" && -d "$layout_path/managed" ]]; then + layout_dcp_path="$layout_path/dcp" + layout_managed_path="$layout_path/managed/aspire-managed" + elif [[ -d "$layout_path/bundle/dcp" && -d "$layout_path/bundle/managed" ]]; then + layout_dcp_path="$layout_path/bundle/dcp" + layout_managed_path="$layout_path/bundle/managed/aspire-managed" + fi + + if [[ ! -f "$layout_managed_path" && -f "$layout_managed_path.exe" ]]; then + layout_managed_path="$layout_managed_path.exe" + fi +fi + +write_step "Starting standalone dashboard at $dashboard_url" +pushd "$run_root" >/dev/null +env "${dashboard_env[@]}" "$profiler_aspire_path" \ + dashboard run \ + --frontend-url "$dashboard_url" \ + --otlp-grpc-url "$otlp_grpc_url" \ + --otlp-http-url "$otlp_http_url" \ + --allow-anonymous \ + >"$dashboard_stdout" \ + 2>"$dashboard_stderr" & +dashboard_pid=$! +popd >/dev/null + +wait_http_ready "$dashboard_url" 90 + +write_step "Configuring CLI diagnostic OTLP export to $otlp_grpc_url" +# Keep both forms: OTEL_* configures CLI/OpenTelemetry exporters, while ASPIRE_OTEL_* is +# projected into AppHost IConfiguration as OTEL_* by DistributedApplicationBuilder. +startup_env=( + "${dashboard_env[@]}" + "ASPIRE_CLI_TELEMETRY_OPTOUT=true" + "ASPIRE_PROFILING_ENABLED=true" + "ASPIRE_STARTUP_PROFILING_ENABLED=true" + "OTEL_EXPORTER_OTLP_ENDPOINT=$otlp_grpc_url" + "OTEL_EXPORTER_OTLP_PROTOCOL=grpc" + "ASPIRE_OTEL_EXPORTER_OTLP_ENDPOINT=$otlp_grpc_url" + "ASPIRE_OTEL_EXPORTER_OTLP_PROTOCOL=grpc" +) +if [[ -n "$dcp_path" ]]; then + startup_env+=("ASPIRE_DCP_PATH=$dcp_path") +elif [[ -n "$layout_dcp_path" ]]; then + startup_env+=("ASPIRE_DCP_PATH=$layout_dcp_path") +fi +if [[ -n "$layout_managed_path" && -f "$layout_managed_path" ]]; then + startup_env+=("ASPIRE_DASHBOARD_PATH=$layout_managed_path") +fi +if [[ "$collect_dotnet_binlogs" == true ]]; then + write_step "Collecting dotnet MSBuild binlogs in $dotnet_binlog_dir" + startup_env+=("ASPIRE_CLI_DOTNET_BINLOG_DIR=$dotnet_binlog_dir") +fi + +write_step "Creating C# AppHost fixture" +service_dir="$project_dir/service" +properties_dir="$project_dir/Properties" +mkdir -p "$project_dir" "$service_dir" "$properties_dir" +apphost_dashboard_port="$(get_free_tcp_port)" +apphost_otlp_grpc_port="$(get_free_tcp_port)" +apphost_resource_service_port="$(get_free_tcp_port)" + +cat >"$properties_dir/launchSettings.json" <"$project_dir/StartupOtelHarness.AppHost.csproj" < + + Exe + net10.0 + enable + enable + true + 13.4.0 + startup-otel-harness + + + + + + + <_Parameter1>AppHostProjectPath + <_Parameter2>\$(MSBuildProjectDirectory) + + + <_Parameter1>AppHostProjectName + <_Parameter2>\$(MSBuildProjectFile) + + + <_Parameter1>AppHostProjectBaseIntermediateOutputPath + <_Parameter2>\$(BaseIntermediateOutputPath) + + + +EOF + +cat >"$service_dir/server.js" <<'EOF' +import http from 'node:http'; + +const port = Number(process.env.PORT ?? '0'); +const server = http.createServer((request, response) => { + response.writeHead(200, { 'content-type': 'text/plain' }); + response.end('startup otel harness'); +}); + +server.listen(port, '127.0.0.1', () => { + console.log(`startup otel harness listening on ${port}`); +}); + +process.on('SIGTERM', () => { + server.close(() => process.exit(0)); +}); +EOF + +cat >"$project_dir/Program.cs" <<'EOF' +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using Aspire.Hosting; + +var builder = DistributedApplication.CreateBuilder(args); + +var serviceDirectory = Path.Combine(builder.AppHostDirectory, "service"); + +var worker = builder.AddExecutable("worker", "node", serviceDirectory, "server.js") + .WithHttpEndpoint(env: "PORT"); + +builder.AddExecutable("dependent", "node", serviceDirectory, "server.js") + .WithHttpEndpoint(env: "PORT") + .WaitFor(worker); + +builder.Build().Run(); +EOF + +apphost_path="$project_dir/StartupOtelHarness.AppHost.csproj" + +write_step "Starting C# AppHost with telemetry export enabled" +if [[ "$collect_dotnet_traces" == true ]]; then + invoke_start_with_dotnet_traces +else + start_stdout="$(invoke_logged_command "start" "$project_dir" env "${startup_env[@]}" "$target_aspire_path" start --format Json --apphost "$apphost_path")" +fi + +if (( post_start_delay_seconds > 0 )); then + write_step "Waiting ${post_start_delay_seconds}s for profiling telemetry to flush" + sleep "$post_start_delay_seconds" +fi + +write_step "Stopping C# AppHost" +invoke_logged_command "stop" "$project_dir" env "${startup_env[@]}" "$target_aspire_path" stop --apphost "$apphost_path" >/dev/null + +if [[ "$collect_dotnet_traces" == true ]]; then + stop_active_dotnet_traces + + if ! compgen -G "$dotnet_trace_dir/*.nettrace" >/dev/null; then + fail "No dotnet-trace files were collected under $dotnet_trace_dir." + fi +fi + +if [[ "$collect_dotnet_binlogs" == true ]]; then + if ! compgen -G "$dotnet_binlog_dir/*.binlog" >/dev/null; then + fail "No dotnet MSBuild binlogs were collected under $dotnet_binlog_dir." + fi +fi + +sleep 3 + +write_step "Exporting standalone dashboard telemetry" +invoke_logged_command "export" "$run_root" env "${startup_env[@]}" "$target_aspire_path" export --dashboard-url "$dashboard_url" --include-hidden --output "$export_zip" >/dev/null + +if [[ ! -f "$export_zip" ]]; then + fail "Export zip was not created: $export_zip." +fi + +unzip -q "$export_zip" -d "$export_dir" + +REQUIRE_DCP_SPANS="$require_dcp_spans" \ +EXPORT_DIR="$export_dir" \ +SPAN_SUMMARY_PATH="$span_summary_path" \ +RUN_ROOT="$run_root" \ +TARGET_ASPIRE_PATH="$target_aspire_path" \ +PROFILER_ASPIRE_PATH="$profiler_aspire_path" \ +LAYOUT_PATH="$layout_path" \ +DCP_PATH="$dcp_path" \ +POST_START_DELAY_SECONDS="$post_start_delay_seconds" \ +DASHBOARD_URL="$dashboard_url" \ +OTLP_GRPC_URL="$otlp_grpc_url" \ +OTLP_HTTP_URL="$otlp_http_url" \ +APPHOST_PATH="$apphost_path" \ +START_JSON_PATH="$start_stdout" \ +EXPORT_ZIP="$export_zip" \ +DOTNET_TRACE_DIR="$dotnet_trace_dir" \ +DOTNET_BINLOG_DIR="$dotnet_binlog_dir" \ +MSBUILDTERMINALLOGGER=false \ +dotnet run "$repo_root/tools/StartupOtelValidator/ValidateStartupOtelExport.cs" + +write_step "Startup OTEL harness passed" diff --git a/src/Aspire.Cli/Backchannel/AppHostAuxiliaryBackchannel.cs b/src/Aspire.Cli/Backchannel/AppHostAuxiliaryBackchannel.cs index 530e5e207b1..c512d0ec25a 100644 --- a/src/Aspire.Cli/Backchannel/AppHostAuxiliaryBackchannel.cs +++ b/src/Aspire.Cli/Backchannel/AppHostAuxiliaryBackchannel.cs @@ -5,6 +5,7 @@ using System.Net.Sockets; using System.Runtime.CompilerServices; using System.Text.Json; +using Aspire.Cli.Telemetry; using Aspire.Cli.Utils; using Microsoft.Extensions.Logging; using ModelContextProtocol.Protocol; @@ -22,6 +23,7 @@ internal sealed class AppHostAuxiliaryBackchannel : IAppHostAuxiliaryBackchannel private JsonRpc? _rpc; private bool _disposed; private readonly ImmutableHashSet _capabilities; + private readonly ProfilingTelemetry? _profilingTelemetry; /// /// Private constructor - use factory methods to create instances. @@ -33,7 +35,8 @@ private AppHostAuxiliaryBackchannel( AppHostInformation? appHostInfo, bool isInScope, ImmutableHashSet capabilities, - ILogger? logger) + ILogger? logger, + ProfilingTelemetry? profilingTelemetry) { Hash = hash; SocketPath = socketPath; @@ -43,6 +46,7 @@ private AppHostAuxiliaryBackchannel( _capabilities = capabilities; ConnectedAt = DateTimeOffset.UtcNow; _logger = logger; + _profilingTelemetry = profilingTelemetry; } /// @@ -54,7 +58,7 @@ internal AppHostAuxiliaryBackchannel( JsonRpc rpc, AppHostInformation? appHostInfo, bool isInScope) - : this(hash, socketPath, rpc, appHostInfo, isInScope, ImmutableHashSet.Empty, null) + : this(hash, socketPath, rpc, appHostInfo, isInScope, ImmutableHashSet.Empty, null, null) { } @@ -102,14 +106,16 @@ private JsonRpc EnsureConnected() /// The path to the Unix domain socket. /// Optional logger for diagnostic messages. /// Cancellation token. + /// Optional profiling service. /// A connected AppHostAuxiliaryBackchannel instance. public static Task ConnectAsync( string socketPath, ILogger? logger = null, - CancellationToken cancellationToken = default) + CancellationToken cancellationToken = default, + ProfilingTelemetry? profilingTelemetry = null) { var hash = AppHostHelper.ExtractHashFromSocketPath(socketPath) ?? string.Empty; - return CreateFromSocketAsync(hash, socketPath, isInScope: true, socket: null, logger, cancellationToken); + return CreateFromSocketAsync(hash, socketPath, isInScope: true, socket: null, logger, cancellationToken, profilingTelemetry); } /// @@ -123,6 +129,7 @@ public static Task ConnectAsync( /// Optional already-connected socket. If null, a new connection will be established. /// Optional logger. /// Cancellation token (only used when socket is null). + /// Optional profiling service. /// A connected AppHostAuxiliaryBackchannel instance. internal static async Task CreateFromSocketAsync( string hash, @@ -130,7 +137,8 @@ internal static async Task CreateFromSocketAsync( bool isInScope, Socket? socket = null, ILogger? logger = null, - CancellationToken cancellationToken = default) + CancellationToken cancellationToken = default, + ProfilingTelemetry? profilingTelemetry = null) { // Connect if no socket provided if (socket is null) @@ -155,7 +163,7 @@ internal static async Task CreateFromSocketAsync( var capabilitiesSet = capabilities?.ToImmutableHashSet() ?? ImmutableHashSet.Create(AuxiliaryBackchannelCapabilities.V1); - return new AppHostAuxiliaryBackchannel(hash, socketPath, rpc, appHostInfo, isInScope, capabilitiesSet, logger); + return new AppHostAuxiliaryBackchannel(hash, socketPath, rpc, appHostInfo, isInScope, capabilitiesSet, logger, profilingTelemetry); } /// @@ -237,6 +245,9 @@ await rpc.InvokeWithCancellationAsync( var rpc = EnsureConnected(); _logger?.LogDebug("Requesting Dashboard URLs"); + // This method runs inside whichever activity is current, so avoid adding + // profiling-only events to reported telemetry unless profiling is on. + var activity = _profilingTelemetry?.StartAuxiliaryBackchannelGetDashboardUrls() ?? default; try { @@ -245,12 +256,16 @@ await rpc.InvokeWithCancellationAsync( [], cancellationToken).ConfigureAwait(false); + activity.SetAppHostDashboardUrls(dashboardUrls); + activity.AddAuxBackchannelGetDashboardUrlsResponseEvent(); + return dashboardUrls; } catch (RemoteMethodNotFoundException ex) { // The RPC method may not be available on older AppHost versions. _logger?.LogDebug(ex, "GetDashboardUrlsAsync RPC method not available on the remote AppHost. The AppHost may be running an older version."); + activity.AddAuxBackchannelGetDashboardUrlsNotFoundEvent(); return null; } } diff --git a/src/Aspire.Cli/Backchannel/AppHostCliBackchannel.cs b/src/Aspire.Cli/Backchannel/AppHostCliBackchannel.cs index 1fa62b33c99..e3c1238230f 100644 --- a/src/Aspire.Cli/Backchannel/AppHostCliBackchannel.cs +++ b/src/Aspire.Cli/Backchannel/AppHostCliBackchannel.cs @@ -27,7 +27,10 @@ internal interface IAppHostCliBackchannel Task GetPipelineStepsAsync(string? step, CancellationToken cancellationToken); } -internal sealed class AppHostCliBackchannel(ILogger logger, AspireCliTelemetry telemetry) : IAppHostCliBackchannel +internal sealed class AppHostCliBackchannel( + ILogger logger, + AspireCliTelemetry telemetry, + ProfilingTelemetry profilingTelemetry) : IAppHostCliBackchannel { private const string BaselineCapability = "baseline.v2"; private TaskCompletionSource _rpcTaskCompletionSource = new(); @@ -67,15 +70,20 @@ await rpc.InvokeWithCancellationAsync( public async Task GetDashboardUrlsAsync(CancellationToken cancellationToken) { - using var activity = telemetry.StartDiagnosticActivity(); + using var activity = profilingTelemetry.StartBackchannelGetDashboardUrls(); + activity.AddBackchannelWaitForRpcEvent(); var rpc = await GetRpcTaskAsync().WaitAsync(cancellationToken).ConfigureAwait(false); + activity.AddBackchannelRpcReadyEvent(); logger.LogDebug("Requesting dashboard URL"); + activity.AddBackchannelGetDashboardUrlsInvokeEvent(); var state = await rpc.InvokeWithCancellationAsync( "GetDashboardUrlsAsync", [], cancellationToken); + activity.SetAppHostDashboardUrls(state); + activity.AddBackchannelGetDashboardUrlsResponseEvent(); return state; } @@ -253,7 +261,7 @@ public async Task ConnectAsync(string socketPath, bool autoReconnect, int retryC { try { - using var activity = telemetry.StartDiagnosticActivity(); + using var activity = profilingTelemetry.StartBackchannelConnect(socketPath, autoReconnect, retryCount); lock (_lock) { @@ -271,7 +279,9 @@ public async Task ConnectAsync(string socketPath, bool autoReconnect, int retryC logger.Log(connectingLogLevel, "Connecting to AppHost backchannel at {SocketPath} (autoReconnect={AutoReconnect}, retryCount={RetryCount})", socketPath, autoReconnect, retryCount); var socket = new Socket(AddressFamily.Unix, SocketType.Stream, ProtocolType.Unspecified); var endpoint = new UnixDomainSocketEndPoint(socketPath); + activity.AddBackchannelSocketConnectStartEvent(); await socket.ConnectAsync(endpoint, cancellationToken); + activity.AddBackchannelSocketConnectedEvent(); logger.LogDebug("Connected to AppHost backchannel at {SocketPath} (retryCount={RetryCount})", socketPath, retryCount); var stream = new NetworkStream(socket, true); @@ -280,11 +290,15 @@ public async Task ConnectAsync(string socketPath, bool autoReconnect, int retryC { rpc = new JsonRpc(new HeaderDelimitedMessageHandler(stream, stream, BackchannelJsonSerializerContext.CreateRpcMessageFormatter())); rpc.StartListening(); + activity.AddBackchannelRpcListeningEvent(); + activity.AddBackchannelGetCapabilitiesStartEvent(); var capabilities = await rpc.InvokeWithCancellationAsync( "GetCapabilitiesAsync", [], cancellationToken); + activity.SetBackchannelCapabilitySummary(capabilities, BaselineCapability); + activity.AddBackchannelGetCapabilitiesResponseEvent(); if (!capabilities.Any(s => s == BaselineCapability)) { @@ -499,4 +513,3 @@ public async Task GetPipelineStepsAsync(string? step, } } - diff --git a/src/Aspire.Cli/Backchannel/AppHostConnectionResolver.cs b/src/Aspire.Cli/Backchannel/AppHostConnectionResolver.cs index 1342827372c..46e9135f32e 100644 --- a/src/Aspire.Cli/Backchannel/AppHostConnectionResolver.cs +++ b/src/Aspire.Cli/Backchannel/AppHostConnectionResolver.cs @@ -6,6 +6,7 @@ using Aspire.Cli.Interaction; using Aspire.Cli.Projects; using Aspire.Cli.Resources; +using Aspire.Cli.Telemetry; using Aspire.Cli.Utils; using Microsoft.Extensions.Logging; using Spectre.Console; @@ -42,7 +43,8 @@ internal sealed class AppHostConnectionResolver( IInteractionService interactionService, IProjectLocator projectLocator, CliExecutionContext executionContext, - ILogger logger) + ILogger logger, + ProfilingTelemetry? profilingTelemetry = null) { /// /// Resolves all running AppHost connections using socket-first discovery. @@ -143,7 +145,7 @@ public async Task ResolveConnectionAsync( try { var connection = await AppHostAuxiliaryBackchannel.ConnectAsync( - socketPath, logger, cancellationToken).ConfigureAwait(false); + socketPath, logger, cancellationToken, profilingTelemetry).ConfigureAwait(false); if (connection is not null) { return new AppHostConnectionResult { Connection = connection }; diff --git a/src/Aspire.Cli/Backchannel/AuxiliaryBackchannelMonitor.cs b/src/Aspire.Cli/Backchannel/AuxiliaryBackchannelMonitor.cs index e5eae36c7a3..ad72f746901 100644 --- a/src/Aspire.Cli/Backchannel/AuxiliaryBackchannelMonitor.cs +++ b/src/Aspire.Cli/Backchannel/AuxiliaryBackchannelMonitor.cs @@ -6,6 +6,7 @@ using System.Net.Sockets; using System.Runtime.CompilerServices; using Aspire.Cli.Commands; +using Aspire.Cli.Telemetry; using Aspire.Cli.Utils; using Microsoft.Extensions.FileProviders; using Microsoft.Extensions.Hosting; @@ -20,7 +21,8 @@ namespace Aspire.Cli.Backchannel; internal sealed class AuxiliaryBackchannelMonitor( ILogger logger, CliExecutionContext executionContext, - TimeProvider timeProvider) : BackgroundService, IAuxiliaryBackchannelMonitor + TimeProvider timeProvider, + ProfilingTelemetry profilingTelemetry) : BackgroundService, IAuxiliaryBackchannelMonitor { private static readonly TimeSpan s_maxRetryElapsed = TimeSpan.FromSeconds(3); private static readonly TimeSpan s_maxRetryDelay = TimeSpan.FromSeconds(1); @@ -406,7 +408,7 @@ private async Task TryConnectToSocketAsync(string socketPath, ConcurrentBag logger, TimeProvider timeProvider) { @@ -245,6 +246,13 @@ private async Task StopExistingInstancesAsync(FileInfo effectiveAppHostFile, Can internal static bool IsExtensionEnvironmentVariable(string name) => name.StartsWith(ExtensionEnvironmentVariablePrefix, StringComparison.OrdinalIgnoreCase); + internal static Dictionary CreateDetachedChildEnvironment(Activity? activity) + { + var environment = new Dictionary { [KnownConfigNames.CliRunDetached] = "true" }; + ProfilingTelemetry.AddActivityContextToEnvironment(activity, environment); + return environment; + } + private record LaunchResult(Process? ChildProcess, IAppHostAuxiliaryBackchannel? Backchannel, DashboardUrlsState? DashboardUrls, bool ChildExitedEarly, int ChildExitCode); private async Task LaunchAndWaitForBackchannelAsync( @@ -256,25 +264,32 @@ private async Task LaunchAndWaitForBackchannelAsync( { Process childProcess; - try + using (var spawnActivity = profilingTelemetry.StartDetachedSpawnChild(executablePath, childArgs.Count, "run")) { - childProcess = DetachedProcessLauncher.Start( - executablePath, - childArgs, - executionContext.WorkingDirectory.FullName, - IsExtensionEnvironmentVariable, - new Dictionary { [KnownConfigNames.CliRunDetached] = "true" }); - } - catch (Exception ex) - { - logger.LogError(ex, "Failed to start child CLI process"); - return new LaunchResult(null, null, null, false, 0); + try + { + childProcess = DetachedProcessLauncher.Start( + executablePath, + childArgs, + executionContext.WorkingDirectory.FullName, + IsExtensionEnvironmentVariable, + CreateDetachedChildEnvironment(Activity.Current)); + spawnActivity.SetProcessId(childProcess.Id); + } + catch (Exception ex) + { + spawnActivity.SetError(ex.Message); + logger.LogError(ex, "Failed to start child CLI process"); + return new LaunchResult(null, null, null, false, 0); + } } logger.LogDebug("Child CLI process started with PID: {PID}", childProcess.Id); var startTime = timeProvider.GetUtcNow(); var timeout = TimeSpan.FromSeconds(120); + using var waitForBackchannelActivity = profilingTelemetry.StartDetachedWaitForBackchannel(childProcess.Id, expectedHash, legacyHash is not null); + var scanCount = 0; while (timeProvider.GetUtcNow() - startTime < timeout) { @@ -283,24 +298,34 @@ private async Task LaunchAndWaitForBackchannelAsync( if (childProcess.HasExited) { var exitCode = childProcess.ExitCode; + waitForBackchannelActivity.SetProcessExitCode(exitCode); + waitForBackchannelActivity.SetError($"Child CLI exited with code {exitCode}."); logger.LogWarning("Child CLI process exited with code {ExitCode}", exitCode); return new LaunchResult(childProcess, null, null, true, exitCode); } await backchannelMonitor.ScanAsync(cancellationToken).ConfigureAwait(false); + scanCount++; var connection = backchannelMonitor.GetConnectionsByHash(expectedHash).FirstOrDefault() ?? (legacyHash is not null ? backchannelMonitor.GetConnectionsByHash(legacyHash).FirstOrDefault() : null); if (connection is not null) { + waitForBackchannelActivity.SetBackchannelScanCount(scanCount); + waitForBackchannelActivity.AddStartAppHostBackchannelConnectedEvent(); DashboardUrlsState? dashboardUrls = null; - try - { - dashboardUrls = await connection.GetDashboardUrlsAsync(cancellationToken).ConfigureAwait(false); - } - catch (Exception ex) + using (var getDashboardUrlsActivity = profilingTelemetry.StartDetachedGetDashboardUrls()) { - logger.LogDebug(ex, "Failed to retrieve dashboard URLs from backchannel connection. Continuing without dashboard URLs."); + try + { + dashboardUrls = await connection.GetDashboardUrlsAsync(cancellationToken).ConfigureAwait(false); + getDashboardUrlsActivity.SetAppHostDashboardUrls(dashboardUrls); + } + catch (Exception ex) + { + getDashboardUrlsActivity.SetError(ex.Message); + logger.LogDebug(ex, "Failed to retrieve dashboard URLs from backchannel connection. Continuing without dashboard URLs."); + } } return new LaunchResult(childProcess, connection, dashboardUrls, false, 0); @@ -316,6 +341,8 @@ private async Task LaunchAndWaitForBackchannelAsync( } } + waitForBackchannelActivity.SetBackchannelScanCount(scanCount); + waitForBackchannelActivity.SetError("Timed out waiting for AppHost backchannel."); return new LaunchResult(childProcess, null, null, false, 0); } diff --git a/src/Aspire.Cli/Commands/RunCommand.cs b/src/Aspire.Cli/Commands/RunCommand.cs index 1408cbd953c..052448a1b41 100644 --- a/src/Aspire.Cli/Commands/RunCommand.cs +++ b/src/Aspire.Cli/Commands/RunCommand.cs @@ -68,6 +68,7 @@ internal sealed class RunCommand : BaseCommand private readonly AppHostLauncher _appHostLauncher; private readonly FileLoggerProvider _fileLoggerProvider; private readonly ICliHostEnvironment _hostEnvironment; + private readonly ProfilingTelemetry _profilingTelemetry; private bool _isDetachMode; protected override bool UpdateNotificationsEnabled => !_isDetachMode; @@ -97,7 +98,8 @@ public RunCommand( IAppHostProjectFactory projectFactory, AppHostLauncher appHostLauncher, FileLoggerProvider fileLoggerProvider, - ICliHostEnvironment hostEnvironment) + ICliHostEnvironment hostEnvironment, + ProfilingTelemetry profilingTelemetry) : base("run", RunCommandStrings.Description, features, updateNotifier, executionContext, interactionService, telemetry) { _runner = runner; @@ -112,6 +114,7 @@ public RunCommand( _appHostLauncher = appHostLauncher; _fileLoggerProvider = fileLoggerProvider; _hostEnvironment = hostEnvironment; + _profilingTelemetry = profilingTelemetry; Options.Add(s_detachOption); Options.Add(s_noBuildOption); @@ -186,23 +189,27 @@ protected override async Task ExecuteAsync(ParseResult parseResult, Cancell try { - using var activity = Telemetry.StartDiagnosticActivity(this.Name); - // Start a reported telemetry activity for the app host run early so that // all failure paths (project not found, incompatible version, etc.) are captured. runActivity = Telemetry.StartReportedActivity(name: TelemetryConstants.Activities.RunAppHost); runActivity?.SetTag(TelemetryConstants.Tags.AppHostDetached, _configuration.GetBool(KnownConfigNames.CliRunDetached) is true); runActivity?.SetTag(TelemetryConstants.Tags.AppHostIsolated, isolated); + using var activity = _profilingTelemetry.StartRunCommand(); + var multipleAppHostBehavior = _hostEnvironment.SupportsInteractiveInput ? MultipleAppHostProjectsFoundBehavior.Prompt : MultipleAppHostProjectsFoundBehavior.Throw; - var searchResult = await _projectLocator.UseOrFindAppHostProjectFileAsync( - passedAppHostProjectFile, - multipleAppHostBehavior, - createSettingsFile: true, - cancellationToken); + AppHostProjectSearchResult searchResult; + using (var findAppHostActivity = _profilingTelemetry.StartRunAppHostFindAppHost(passedAppHostProjectFile)) + { + searchResult = await _projectLocator.UseOrFindAppHostProjectFileAsync( + passedAppHostProjectFile, + multipleAppHostBehavior, + createSettingsFile: true, + cancellationToken); + } var effectiveAppHostFile = searchResult.SelectedProjectFile; if (effectiveAppHostFile is null) @@ -225,7 +232,12 @@ protected override async Task ExecuteAsync(ParseResult parseResult, Cancell // Check for running instance — even if we fail to stop we won't // block the apphost starting to make sure we don't ever break flow. // It should mostly stop just fine though. - var runningInstanceResult = await project.FindAndStopRunningInstanceAsync(effectiveAppHostFile, ExecutionContext.HomeDirectory, cancellationToken); + RunningInstanceResult runningInstanceResult; + using (var stopRunningInstanceActivity = _profilingTelemetry.StartRunAppHostStopExistingInstance()) + { + runningInstanceResult = await project.FindAndStopRunningInstanceAsync(effectiveAppHostFile, ExecutionContext.HomeDirectory, cancellationToken); + stopRunningInstanceActivity.SetAppHostRunningInstanceResult(runningInstanceResult); + } // If in isolated mode and a running instance was stopped, warn the user if (isolated && runningInstanceResult == RunningInstanceResult.InstanceStopped) @@ -254,12 +266,22 @@ protected override async Task ExecuteAsync(ParseResult parseResult, Cancell BuildCompletionSource = buildCompletionSource, BackchannelCompletionSource = backchannelCompletionSource, }; + ProfilingTelemetry.AddCurrentContextToEnvironment(context.EnvironmentVariables); // Start the project run as a pending task - we'll handle UX while it runs - var pendingRun = project.RunAsync(context, cancellationToken); + Task pendingRun; + using (_profilingTelemetry.StartRunAppHostStartProject(project.LanguageId, noBuild, waitForDebugger)) + { + pendingRun = project.RunAsync(context, cancellationToken); + } // Wait for the build to complete first (project handles its own build status spinners) - var buildSuccess = await buildCompletionSource.Task.WaitAsync(cancellationToken); + bool buildSuccess; + using (var waitForBuildActivity = _profilingTelemetry.StartRunAppHostWaitForBuild()) + { + buildSuccess = await buildCompletionSource.Task.WaitAsync(cancellationToken); + waitForBuildActivity.SetAppHostBuildSuccess(buildSuccess); + } if (!buildSuccess) { runActivity?.SetTag(TelemetryConstants.Tags.ErrorType, "build_failed"); @@ -279,17 +301,27 @@ protected override async Task ExecuteAsync(ParseResult parseResult, Cancell } // Now wait for the backchannel to be established - var backchannel = await InteractionService.ShowStatusAsync( - RunCommandStrings.ConnectingToAppHost, - async () => await backchannelCompletionSource.Task.WaitAsync(cancellationToken)); + IAppHostCliBackchannel backchannel; + using (var waitForBackchannelActivity = _profilingTelemetry.StartRunAppHostWaitForBackchannel()) + { + backchannel = await InteractionService.ShowStatusAsync( + RunCommandStrings.ConnectingToAppHost, + async () => await backchannelCompletionSource.Task.WaitAsync(cancellationToken)); + waitForBackchannelActivity.SetAppHostBackchannelConnected(true); + } // Set up log capture - writes to unified CLI log file var pendingLogCapture = CaptureAppHostLogsAsync(_fileLoggerProvider, backchannel, _interactionService, cancellationToken); // Get dashboard URLs - var dashboardUrls = await InteractionService.ShowStatusAsync( - RunCommandStrings.StartingDashboard, - async () => await backchannel.GetDashboardUrlsAsync(cancellationToken)); + DashboardUrlsState dashboardUrls; + using (var getDashboardUrlsActivity = _profilingTelemetry.StartRunAppHostGetDashboardUrls()) + { + dashboardUrls = await InteractionService.ShowStatusAsync( + RunCommandStrings.StartingDashboard, + async () => await backchannel.GetDashboardUrlsAsync(cancellationToken)); + getDashboardUrlsActivity.SetAppHostDashboardHealthy(dashboardUrls.DashboardHealthy); + } if (dashboardUrls.DashboardHealthy is false) { @@ -391,8 +423,14 @@ await InteractionService.DisplayLiveAsync(BuildLiveRenderable(), async updateTar extInteractionService.NotifyAppHostStartupCompleted(); } - await pendingLogCapture; - return await pendingRun; + using (var lifetimeActivity = _profilingTelemetry.StartRunAppHostLifetime()) + { + runActivity?.Stop(); + await pendingLogCapture; + var exitCode = await pendingRun; + lifetimeActivity.SetProcessExitCode(exitCode); + return exitCode; + } } catch (OperationCanceledException ex) when (ex.CancellationToken == cancellationToken || ex is ExtensionOperationCanceledException) { diff --git a/src/Aspire.Cli/DotNet/DotNetCliRunner.cs b/src/Aspire.Cli/DotNet/DotNetCliRunner.cs index 50520d8b070..e3e05a68f5d 100644 --- a/src/Aspire.Cli/DotNet/DotNetCliRunner.cs +++ b/src/Aspire.Cli/DotNet/DotNetCliRunner.cs @@ -64,6 +64,7 @@ internal sealed class DotNetCliRunner( ILogger logger, IServiceProvider serviceProvider, AspireCliTelemetry telemetry, + ProfilingTelemetry profilingTelemetry, IConfiguration configuration, IDiskCache diskCache, IFeatures features, @@ -75,6 +76,7 @@ internal sealed class DotNetCliRunner( // Retry configuration for NuGet package search operations private const int MaxSearchRetries = 3; + private static long s_binlogSequence; private static readonly TimeSpan[] s_searchRetryDelays = [TimeSpan.FromSeconds(1), TimeSpan.FromSeconds(2)]; private string GetMsBuildServerValue() @@ -96,18 +98,30 @@ private async Task ExecuteAsync( ProcessInvocationOptions options, CancellationToken cancellationToken) { + var dotnetCommand = args.Length > 0 ? args[0] : "execute"; + using var processActivity = profilingTelemetry.StartDotNetProcess(dotnetCommand, projectFile, workingDirectory, options); + // Build the final environment variables by merging caller-provided env with dotnet-specific settings. var finalEnv = env?.ToDictionary() ?? new Dictionary(); ConfigureDotNetEnvironment(finalEnv); // Resolve the dotnet executable path, preferring the private SDK installation if available. var dotnetPath = ResolveDotNetPath(finalEnv); + processActivity.SetDotNetResolvedExecutable( + dotnetPath, + finalEnv.TryGetValue("DOTNET_CLI_USE_MSBUILD_SERVER", out var msBuildServerValue) ? msBuildServerValue : null); + + var effectiveArgs = AddBinlogArgumentIfConfigured(args, dotnetCommand, projectFile, workingDirectory, processActivity); + processActivity.SetDotNetArgsCount(effectiveArgs.Length); + + var outputCounters = new ProcessOutputCounters(); + var instrumentedOptions = CreateInstrumentedProcessOptions(options, processActivity, outputCounters); // Do not use 'using' here: StartBackchannelAsync runs fire-and-forget and // accesses execution.HasExited / ExitCode after this method returns. Disposing // the underlying Process while the backchannel task is still polling would // cause ObjectDisposedException. Let the GC handle cleanup instead. - var execution = executionFactory.CreateExecution(dotnetPath, args, finalEnv, workingDirectory, options); + var execution = executionFactory.CreateExecution(dotnetPath, effectiveArgs, finalEnv, workingDirectory, instrumentedOptions); // Get socket path from env if present string? socketPath = null; @@ -133,9 +147,11 @@ await extensionInteractionService.LaunchAppHostAsync( } var started = execution.Start(); + processActivity.AddDotNetProcessStartResult(started, started ? execution.ProcessId : null); if (!started) { + processActivity.SetError("Process failed to start."); return ExitCodeConstants.FailedToDotnetRunAppHost; } @@ -144,7 +160,111 @@ await extensionInteractionService.LaunchAppHostAsync( _ = StartBackchannelAsync(execution, socketPath, backchannelCompletionSource, cancellationToken); } - return await execution.WaitForExitAsync(cancellationToken); + var exitCode = await execution.WaitForExitAsync(cancellationToken); + processActivity.SetDotNetCompleted(exitCode, outputCounters.StdoutLineCount, outputCounters.StderrLineCount); + + return exitCode; + } + + private string[] AddBinlogArgumentIfConfigured( + string[] args, + string dotnetCommand, + FileInfo? projectFile, + DirectoryInfo workingDirectory, + ProfilingTelemetry.ActivityScope processActivity) + { + var binlogDirectory = configuration[KnownConfigNames.CliDotnetBinlogDirectory]; + if (string.IsNullOrWhiteSpace(binlogDirectory)) + { + return args; + } + + if (!SupportsBinlog(dotnetCommand)) + { + // Some dotnet subcommands are not MSBuild entry points and reject /bl. + processActivity.SetDotNetBinlogSkippedUnsupportedCommand(); + return args; + } + + var fullBinlogDirectory = Path.IsPathFullyQualified(binlogDirectory) + ? binlogDirectory + : Path.GetFullPath(Path.Combine(workingDirectory.FullName, binlogDirectory)); + + Directory.CreateDirectory(fullBinlogDirectory); + + var binlogPath = Path.Combine(fullBinlogDirectory, CreateBinlogFileName(dotnetCommand, projectFile, workingDirectory)); + processActivity.SetDotNetBinlogPath(binlogPath); + + return [.. args, $"/bl:{binlogPath}"]; + } + + private static bool SupportsBinlog(string dotnetCommand) + { + return dotnetCommand is "build" or "msbuild" or "restore" or "publish" or "test"; + } + + private static string CreateBinlogFileName(string dotnetCommand, FileInfo? projectFile, DirectoryInfo workingDirectory) + { + var sequence = Interlocked.Increment(ref s_binlogSequence); + var timestamp = DateTimeOffset.UtcNow.ToString("yyyyMMddTHHmmssfff", CultureInfo.InvariantCulture); + var scope = projectFile is not null ? Path.GetFileNameWithoutExtension(projectFile.Name) : workingDirectory.Name; + + return $"{timestamp}-{Environment.ProcessId}-{sequence:D4}-{SanitizeFileNamePart(dotnetCommand)}-{SanitizeFileNamePart(scope)}.binlog"; + } + + private static string SanitizeFileNamePart(string value) + { + var builder = new StringBuilder(value.Length); + foreach (var ch in value) + { + builder.Append(char.IsAsciiLetterOrDigit(ch) || ch is '-' or '_' or '.' ? ch : '-'); + } + + var sanitized = builder.ToString().Trim('-'); + if (string.IsNullOrEmpty(sanitized)) + { + return "dotnet"; + } + + return sanitized.Length <= 80 ? sanitized : sanitized[..80]; + } + + private static ProcessInvocationOptions CreateInstrumentedProcessOptions( + ProcessInvocationOptions options, + ProfilingTelemetry.ActivityScope activity, + ProcessOutputCounters outputCounters) + { + return new ProcessInvocationOptions + { + NoLaunchProfile = options.NoLaunchProfile, + StartDebugSession = options.StartDebugSession, + Debug = options.Debug, + SuppressLogging = options.SuppressLogging, + StandardOutputCallback = line => + { + var lineCount = Interlocked.Increment(ref outputCounters.StdoutLineCount); + if (lineCount == 1) + { + activity.AddDotNetFirstStdoutEvent(); + } + options.StandardOutputCallback?.Invoke(line); + }, + StandardErrorCallback = line => + { + var lineCount = Interlocked.Increment(ref outputCounters.StderrLineCount); + if (lineCount == 1) + { + activity.AddDotNetFirstStderrEvent(); + } + options.StandardErrorCallback?.Invoke(line); + } + }; + } + + private sealed class ProcessOutputCounters + { + public int StdoutLineCount; + public int StderrLineCount; } internal static int GetCurrentProcessId() => Environment.ProcessId; @@ -220,7 +340,7 @@ private string ResolveDotNetPath(IDictionary env) private async Task StartBackchannelAsync(IProcessExecution? execution, string socketPath, TaskCompletionSource backchannelCompletionSource, CancellationToken cancellationToken) { - using var activity = telemetry.StartDiagnosticActivity(); + using var activity = profilingTelemetry.StartBackchannelConnect(socketPath); using var timer = new PeriodicTimer(TimeSpan.FromMilliseconds(50)); @@ -236,7 +356,13 @@ private async Task StartBackchannelAsync(IProcessExecution? execution, string so try { logger.LogTrace("Attempting to connect to AppHost backchannel at {SocketPath} (attempt {Attempt})", socketPath, connectionAttempts); + if (connectionAttempts == 0 || connectionAttempts % 10 == 0) + { + activity.AddBackchannelConnectAttemptEvent(connectionAttempts); + } await backchannel.ConnectAsync(socketPath, connectionAttempts, cancellationToken).ConfigureAwait(false); + activity.SetBackchannelRetryCount(connectionAttempts); + activity.AddBackchannelConnectedEvent(); backchannelCompletionSource.SetResult(backchannel); // Note: We intentionally do not call Environment.Exit when the backchannel disconnects. // The CLI should complete normally and return the appropriate exit code based on the diff --git a/src/Aspire.Cli/DotNet/IProcessExecution.cs b/src/Aspire.Cli/DotNet/IProcessExecution.cs index 8dafaecbaf3..d8ff1cce5cf 100644 --- a/src/Aspire.Cli/DotNet/IProcessExecution.cs +++ b/src/Aspire.Cli/DotNet/IProcessExecution.cs @@ -29,6 +29,11 @@ internal interface IProcessExecution : IDisposable /// true if the process was started successfully; otherwise, false. bool Start(); + /// + /// Gets the process ID. Only valid after returns true. + /// + int ProcessId { get; } + /// /// Waits for the process to exit asynchronously. /// diff --git a/src/Aspire.Cli/DotNet/ProcessExecution.cs b/src/Aspire.Cli/DotNet/ProcessExecution.cs index 6fe7d518800..2f9eb4f5ba5 100644 --- a/src/Aspire.Cli/DotNet/ProcessExecution.cs +++ b/src/Aspire.Cli/DotNet/ProcessExecution.cs @@ -44,6 +44,9 @@ internal ProcessExecution(Process process, ILogger logger, ProcessInvocationOpti /// public int ExitCode => _process.ExitCode; + /// + public int ProcessId => _process.Id; + /// public bool Start() { diff --git a/src/Aspire.Cli/Npm/NpmRunner.cs b/src/Aspire.Cli/Npm/NpmRunner.cs index fbe946f8a2f..f768db40d2d 100644 --- a/src/Aspire.Cli/Npm/NpmRunner.cs +++ b/src/Aspire.Cli/Npm/NpmRunner.cs @@ -3,6 +3,7 @@ using System.Diagnostics; using System.Diagnostics.CodeAnalysis; +using Aspire.Cli.Telemetry; using Microsoft.Extensions.Logging; using Semver; @@ -11,7 +12,7 @@ namespace Aspire.Cli.Npm; /// /// Runs npm CLI commands for package management operations. /// -internal sealed class NpmRunner(ILogger logger) : INpmRunner +internal sealed class NpmRunner(ILogger logger, ProfilingTelemetry profilingTelemetry) : INpmRunner { /// /// The public npm registry URL. Commands that resolve packages from the registry @@ -249,9 +250,7 @@ public async Task InstallGlobalAsync(string tarballPath, CancellationToken private static string CreateIsolatedTempDirectory() { - var tempDir = Path.Combine(Path.GetTempPath(), $"aspire-npm-{Guid.NewGuid():N}"); - Directory.CreateDirectory(tempDir); - return tempDir; + return Directory.CreateTempSubdirectory("aspire-npm-").FullName; } private void CleanupTempDirectory(string tempDir) @@ -356,15 +355,19 @@ internal static bool TryExtractLastVersion(string npmOutput, [NotNullWhen(true)] var startInfo = CreateNpmProcessStartInfo(npmPath, args, workingDirectory); using var process = new Process { StartInfo = startInfo }; + using var activity = profilingTelemetry.StartNpmCommand(npmPath, args.Length, workingDirectory); process.Start(); + activity.SetProcessId(process.Id); var outputTask = process.StandardOutput.ReadToEndAsync(cancellationToken); var errorTask = process.StandardError.ReadToEndAsync(cancellationToken); await process.WaitForExitAsync(cancellationToken).ConfigureAwait(false); + activity.SetProcessExitCode(process.ExitCode); if (process.ExitCode != 0) { + activity.SetError($"npm exited with code {process.ExitCode}."); var errorOutput = await errorTask.ConfigureAwait(false); logger.LogDebug("npm {Args} returned non-zero exit code {ExitCode}: {Error}", argsString, process.ExitCode, errorOutput.Trim()); return null; diff --git a/src/Aspire.Cli/Program.cs b/src/Aspire.Cli/Program.cs index 92d7ce89319..6bb4334dd78 100644 --- a/src/Aspire.Cli/Program.cs +++ b/src/Aspire.Cli/Program.cs @@ -767,7 +767,8 @@ public static async Task Main(string[] args) EnableDefaultExceptionHandler = false }; - using var mainActivity = telemetry.StartReportedActivity(name: TelemetryConstants.Activities.Main, kind: ActivityKind.Internal); + app.Services.GetRequiredService(); + using var mainActivity = telemetry.StartReportedActivity(TelemetryConstants.Activities.Main, ActivityKind.Internal); if (mainActivity != null) { diff --git a/src/Aspire.Cli/Projects/AppHostServerSession.cs b/src/Aspire.Cli/Projects/AppHostServerSession.cs index b26e84e191b..dc0b4598828 100644 --- a/src/Aspire.Cli/Projects/AppHostServerSession.cs +++ b/src/Aspire.Cli/Projects/AppHostServerSession.cs @@ -3,6 +3,7 @@ using System.Diagnostics; using Aspire.Cli.Configuration; +using Aspire.Cli.Telemetry; using Aspire.Cli.Utils; using Aspire.Hosting; using Microsoft.Extensions.Logging; @@ -19,6 +20,7 @@ internal sealed class AppHostServerSession : IAppHostServerSession private readonly Process _serverProcess; private readonly OutputCollector _output; private readonly string _socketPath; + private readonly ProfilingTelemetry.ActivityScope _activity; private IAppHostRpcClient? _rpcClient; private bool _disposed; @@ -27,13 +29,15 @@ internal AppHostServerSession( OutputCollector output, string socketPath, string authenticationToken, - ILogger logger) + ILogger logger, + ProfilingTelemetry.ActivityScope activity = default) { _serverProcess = serverProcess; _output = output; _socketPath = socketPath; _authenticationToken = authenticationToken; _logger = logger; + _activity = activity; } /// @@ -57,12 +61,14 @@ internal AppHostServerSession( /// The environment variables to pass to the server. /// Whether to enable debug logging for the server. /// The logger to use for lifecycle diagnostics. + /// Optional profiling telemetry for the server process lifetime. /// The started AppHost server session. internal static AppHostServerSession Start( IAppHostServerProject appHostServerProject, Dictionary? environmentVariables, bool debug, - ILogger logger) + ILogger logger, + ProfilingTelemetry? profilingTelemetry = null) { var currentPid = Environment.ProcessId; var serverEnvironmentVariables = environmentVariables is null @@ -72,17 +78,37 @@ internal static AppHostServerSession Start( var authenticationToken = TokenGenerator.GenerateToken(); serverEnvironmentVariables[KnownConfigNames.RemoteAppHostToken] = authenticationToken; - var (socketPath, serverProcess, serverOutput) = appHostServerProject.Run( - currentPid, - serverEnvironmentVariables, - debug: debug); + var activity = profilingTelemetry is null + ? default + : profilingTelemetry.StartAppHostServerLifetime(appHostServerProject.GetType().Name); + + string socketPath; + Process serverProcess; + OutputCollector serverOutput; + try + { + (socketPath, serverProcess, serverOutput) = appHostServerProject.Run( + currentPid, + serverEnvironmentVariables, + debug: debug); + } + catch (Exception ex) + { + activity.SetError(ex.Message); + activity.Dispose(); + throw; + } + + activity.SetProcessId(serverProcess.Id); + activity.SetProcessExecutableName(Path.GetFileName(serverProcess.StartInfo.FileName)); return new AppHostServerSession( serverProcess, serverOutput, socketPath, authenticationToken, - logger); + logger, + activity); } /// @@ -114,6 +140,7 @@ public async ValueTask DisposeAsync() try { _serverProcess.Kill(entireProcessTree: true); + _activity.SetError("AppHost server process was terminated during session disposal."); } catch (Exception ex) { @@ -121,7 +148,13 @@ public async ValueTask DisposeAsync() } } + if (_serverProcess.HasExited) + { + _activity.SetProcessExitCode(_serverProcess.ExitCode); + } + _serverProcess.Dispose(); + _activity.Dispose(); } } @@ -132,13 +165,16 @@ internal sealed class AppHostServerSessionFactory : IAppHostServerSessionFactory { private readonly IAppHostServerProjectFactory _projectFactory; private readonly ILogger _logger; + private readonly ProfilingTelemetry _profilingTelemetry; public AppHostServerSessionFactory( IAppHostServerProjectFactory projectFactory, - ILogger logger) + ILogger logger, + ProfilingTelemetry profilingTelemetry) { _projectFactory = projectFactory; _logger = logger; + _profilingTelemetry = profilingTelemetry; } /// @@ -167,7 +203,8 @@ public async Task CreateAsync( appHostServerProject, launchSettingsEnvVars, debug, - _logger); + _logger, + _profilingTelemetry); return new AppHostServerSessionResult( Success: true, diff --git a/src/Aspire.Cli/Projects/DotNetAppHostProject.cs b/src/Aspire.Cli/Projects/DotNetAppHostProject.cs index 30ed86658f2..d5d636eb3cf 100644 --- a/src/Aspire.Cli/Projects/DotNetAppHostProject.cs +++ b/src/Aspire.Cli/Projects/DotNetAppHostProject.cs @@ -26,6 +26,7 @@ internal sealed class DotNetAppHostProject : IAppHostProject private readonly IInteractionService _interactionService; private readonly ICertificateService _certificateService; private readonly AspireCliTelemetry _telemetry; + private readonly ProfilingTelemetry _profilingTelemetry; private readonly IFeatures _features; private readonly ILogger _logger; private readonly TimeProvider _timeProvider; @@ -44,6 +45,7 @@ public DotNetAppHostProject( IInteractionService interactionService, ICertificateService certificateService, AspireCliTelemetry telemetry, + ProfilingTelemetry profilingTelemetry, IFeatures features, IProjectUpdater projectUpdater, IDotNetSdkInstaller sdkInstaller, @@ -56,6 +58,7 @@ public DotNetAppHostProject( _interactionService = interactionService; _certificateService = certificateService; _telemetry = telemetry; + _profilingTelemetry = profilingTelemetry; _features = features; _projectUpdater = projectUpdater; _sdkInstaller = sdkInstaller; @@ -218,7 +221,7 @@ public async Task RunAsync(AppHostProjectContext context, CancellationToken (bool IsCompatibleAppHost, bool SupportsBackchannel, string? AspireHostingVersion)? appHostCompatibilityCheck = null; - using var activity = _telemetry.StartDiagnosticActivity("run"); + using var activity = _profilingTelemetry.StartAppHostRun(); var isSingleFileAppHost = effectiveAppHostFile.Extension != ".csproj"; @@ -228,8 +231,17 @@ public async Task RunAsync(AppHostProjectContext context, CancellationToken string? isolatedUserSecretsId = null; if (context.Isolated) { - isolatedUserSecretsId = await ConfigureIsolatedModeAsync(effectiveAppHostFile, env, cancellationToken); - _logger.LogInformation("Aspire run isolated. Isolated UserSecretsId: {IsolatedUserSecretsId}", isolatedUserSecretsId); + using var isolatedModeActivity = _profilingTelemetry.StartAppHostConfigureIsolatedMode(); + try + { + isolatedUserSecretsId = await ConfigureIsolatedModeAsync(effectiveAppHostFile, env, cancellationToken); + _logger.LogInformation("Aspire run isolated. Isolated UserSecretsId: {IsolatedUserSecretsId}", isolatedUserSecretsId); + } + catch (Exception ex) + { + isolatedModeActivity.SetError(ex.Message); + throw; + } } // Enable debug logging in the app host so that debug-level output is @@ -245,7 +257,12 @@ public async Task RunAsync(AppHostProjectContext context, CancellationToken try { - var certResult = await _certificateService.EnsureCertificatesTrustedAsync(cancellationToken); + EnsureCertificatesTrustedResult certResult; + using (var certActivity = _profilingTelemetry.StartAppHostEnsureDevCertificates()) + { + certResult = await _certificateService.EnsureCertificatesTrustedAsync(cancellationToken); + certActivity.SetDevCertificateEnvironmentVariables(certResult.EnvironmentVariables.Count); + } // Apply any environment variables returned by the certificate service (e.g., SSL_CERT_DIR on Linux) foreach (var kvp in certResult.EnvironmentVariables) @@ -271,6 +288,8 @@ public async Task RunAsync(AppHostProjectContext context, CancellationToken var shouldBuildInCli = !isExtensionHost || extensionHasBuildCapability; if (shouldBuildInCli) { + using var buildActivity = _profilingTelemetry.StartAppHostBuild(context.NoRestore, isExtensionHost, extensionHasBuildCapability); + var buildOptions = new ProcessInvocationOptions { StandardOutputCallback = buildOutputCollector.AppendOutput, @@ -278,6 +297,7 @@ public async Task RunAsync(AppHostProjectContext context, CancellationToken }; var buildExitCode = await AppHostHelper.BuildAppHostAsync(_runner, _interactionService, effectiveAppHostFile, context.NoRestore, buildOptions, context.WorkingDirectory, cancellationToken); + buildActivity.SetAppHostBuildExitCode(buildExitCode); if (buildExitCode != 0) { @@ -295,7 +315,12 @@ public async Task RunAsync(AppHostProjectContext context, CancellationToken } else { + using var compatibilityActivity = _profilingTelemetry.StartAppHostCheckCompatibility(); appHostCompatibilityCheck = await AppHostHelper.CheckAppHostCompatibilityAsync(_runner, _interactionService, effectiveAppHostFile, _telemetry, context.WorkingDirectory, _fileLoggerProvider.LogFilePath, cancellationToken); + compatibilityActivity.SetAppHostCompatibility( + appHostCompatibilityCheck.Value.IsCompatibleAppHost, + appHostCompatibilityCheck.Value.SupportsBackchannel, + appHostCompatibilityCheck.Value.AspireHostingVersion); } } catch @@ -318,6 +343,7 @@ public async Task RunAsync(AppHostProjectContext context, CancellationToken // Signal that build/preparation is complete context.BuildCompletionSource?.TrySetResult(true); + activity.AddAppHostBuildReadyEvent(); var runOptions = new ProcessInvocationOptions { @@ -343,6 +369,7 @@ public async Task RunAsync(AppHostProjectContext context, CancellationToken // dotnet watch does not support --no-build, so watch + context.NoBuild is invalid and will fail in the runner. // noRestore: only relevant when noBuild is false (since --no-build implies --no-restore) var noBuild = !watch || context.NoBuild; + using var runDotnetActivity = _profilingTelemetry.StartAppHostRunDotnetLifetime(watch, noBuild, context.NoRestore); return await _runner.RunAsync( effectiveAppHostFile, watch, diff --git a/src/Aspire.Cli/Projects/GuestAppHostProject.cs b/src/Aspire.Cli/Projects/GuestAppHostProject.cs index 0a514f55200..333053708ba 100644 --- a/src/Aspire.Cli/Projects/GuestAppHostProject.cs +++ b/src/Aspire.Cli/Projects/GuestAppHostProject.cs @@ -12,6 +12,7 @@ using Aspire.Cli.Interaction; using Aspire.Cli.Packaging; using Aspire.Cli.Resources; +using Aspire.Cli.Telemetry; using Aspire.Cli.Utils; using Aspire.Hosting; using Aspire.Shared.UserSecrets; @@ -41,6 +42,7 @@ internal sealed class GuestAppHostProject : IAppHostProject, IGuestAppHostSdkGen private readonly FileLoggerProvider _fileLoggerProvider; private readonly TimeProvider _timeProvider; private readonly RunningInstanceManager _runningInstanceManager; + private readonly ProfilingTelemetry _profilingTelemetry; // Language is always resolved via constructor private readonly LanguageInfo _resolvedLanguage; @@ -59,6 +61,7 @@ public GuestAppHostProject( ILanguageDiscovery languageDiscovery, ILogger logger, FileLoggerProvider fileLoggerProvider, + ProfilingTelemetry profilingTelemetry, TimeProvider? timeProvider = null) { _resolvedLanguage = language; @@ -73,6 +76,7 @@ public GuestAppHostProject( _languageDiscovery = languageDiscovery; _logger = logger; _fileLoggerProvider = fileLoggerProvider; + _profilingTelemetry = profilingTelemetry; _timeProvider = timeProvider ?? TimeProvider.System; _runningInstanceManager = new RunningInstanceManager(_logger, _interactionService, _timeProvider); } @@ -237,7 +241,8 @@ internal async Task BuildAndGenerateSdkAsync(DirectoryInfo directory, Canc appHostServerProject, environmentVariables: null, debug: false, - _logger); + _logger, + _profilingTelemetry); // Step 3: Connect to server var rpcClient = await serverSession.GetRpcClientAsync(cancellationToken); @@ -393,7 +398,8 @@ public async Task RunAsync(AppHostProjectContext context, CancellationToken appHostServerProject, launchSettingsEnvVars, context.Debug, - _logger); + _logger, + _profilingTelemetry); var socketPath = serverSession.SocketPath; var appHostServerProcess = serverSession.ServerProcess; var appHostServerOutputCollector = serverSession.Output; @@ -868,7 +874,8 @@ public async Task PublishAsync(PublishContext context, CancellationToken ca appHostServerProject, launchSettingsEnvVars, context.Debug, - _logger); + _logger, + _profilingTelemetry); var jsonRpcSocketPath = serverSession.SocketPath; var appHostServerProcess = serverSession.ServerProcess; var appHostServerOutputCollector = serverSession.Output; @@ -1377,7 +1384,7 @@ private async Task EnsureRuntimeCreatedAsync( runtimeSpec = TypeScriptAppHostToolchainResolver.ApplyToRuntimeSpec(runtimeSpec, toolchain); } - _guestRuntime = new GuestRuntime(runtimeSpec, _logger, _fileLoggerProvider); + _guestRuntime = new GuestRuntime(runtimeSpec, _logger, _fileLoggerProvider, profilingTelemetry: _profilingTelemetry); _logger.LogDebug("Created GuestRuntime for {RuntimeDisplayName}: Execute={Command} {Args}", runtimeSpec.DisplayName, diff --git a/src/Aspire.Cli/Projects/GuestRuntime.cs b/src/Aspire.Cli/Projects/GuestRuntime.cs index e8a0d601c18..5e9305c049d 100644 --- a/src/Aspire.Cli/Projects/GuestRuntime.cs +++ b/src/Aspire.Cli/Projects/GuestRuntime.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using Aspire.Cli.Diagnostics; +using Aspire.Cli.Telemetry; using Aspire.Cli.Utils; using Aspire.TypeSystem; using Microsoft.Extensions.Logging; @@ -18,6 +19,7 @@ internal sealed class GuestRuntime private readonly ILogger _logger; private readonly FileLoggerProvider? _fileLoggerProvider; private readonly Func _commandResolver; + private readonly ProfilingTelemetry? _profilingTelemetry; /// /// Creates a new GuestRuntime for the given runtime specification. @@ -26,12 +28,14 @@ internal sealed class GuestRuntime /// Logger for debugging output. /// Optional file logger for writing output to disk. /// Optional command resolver used to locate executables on PATH. - public GuestRuntime(RuntimeSpec spec, ILogger logger, FileLoggerProvider? fileLoggerProvider = null, Func? commandResolver = null) + /// Optional profiling telemetry for child-process diagnostics. + public GuestRuntime(RuntimeSpec spec, ILogger logger, FileLoggerProvider? fileLoggerProvider = null, Func? commandResolver = null, ProfilingTelemetry? profilingTelemetry = null) { _spec = spec; _logger = logger; _fileLoggerProvider = fileLoggerProvider; _commandResolver = commandResolver ?? PathLookupHelper.FindFullPathFromPath; + _profilingTelemetry = profilingTelemetry; } /// @@ -73,14 +77,19 @@ public GuestRuntime(RuntimeSpec spec, ILogger logger, FileLoggerProvider? fileLo var environmentVariables = commandSpec.EnvironmentVariables ?? new Dictionary(); var launcher = CreateDefaultLauncher(); + using var activity = _profilingTelemetry is null + ? default + : _profilingTelemetry.StartGuestInitializeCommand(_spec.Language, _spec.DisplayName, commandSpec.Command, args.Length, directory); var (exitCode, output) = await launcher.LaunchAsync( commandSpec.Command, args, directory, environmentVariables, cancellationToken); + activity.SetProcessExitCode(exitCode); if (exitCode != 0) { + activity.SetError($"{_spec.DisplayName} initialization exited with code {exitCode}."); return (exitCode, output ?? outputCollector); } } @@ -108,12 +117,20 @@ public GuestRuntime(RuntimeSpec spec, ILogger logger, FileLoggerProvider? fileLo var environmentVariables = _spec.InstallDependencies.EnvironmentVariables ?? new Dictionary(); var launcher = CreateDefaultLauncher(); + using var activity = _profilingTelemetry is null + ? default + : _profilingTelemetry.StartGuestInstallDependencies(_spec.Language, _spec.DisplayName, _spec.InstallDependencies.Command, args.Length, directory); var (exitCode, output) = await launcher.LaunchAsync( _spec.InstallDependencies.Command, args, directory, environmentVariables, cancellationToken); + activity.SetProcessExitCode(exitCode); + if (exitCode != 0) + { + activity.SetError($"{_spec.DisplayName} dependency installation exited with code {exitCode}."); + } return (exitCode, output ?? outputCollector); } @@ -203,9 +220,14 @@ public GuestRuntime(RuntimeSpec spec, ILogger logger, FileLoggerProvider? fileLo var mergedEnvironment = MergeEnvironmentVariables(environmentVariables, commandSpec); _logger.LogDebug("Launching pre-execution command: {Command} {Args}", commandSpec.Command, string.Join(" ", args)); + using var activity = _profilingTelemetry is null + ? default + : _profilingTelemetry.StartGuestExecuteCommand(_spec.Language, _spec.DisplayName, commandSpec.Command, args.Length, directory); var (exitCode, output) = await preExecuteLauncher.LaunchAsync(commandSpec.Command, args, directory, mergedEnvironment, cancellationToken); + activity.SetProcessExitCode(exitCode); if (exitCode != 0) { + activity.SetError($"{_spec.DisplayName} pre-execution exited with code {exitCode}."); return (exitCode, output ?? new OutputCollector()); } } @@ -227,7 +249,17 @@ public GuestRuntime(RuntimeSpec spec, ILogger logger, FileLoggerProvider? fileLo var mergedEnvironment = MergeEnvironmentVariables(environmentVariables, commandSpec); _logger.LogDebug("Launching: {Command} {Args}", commandSpec.Command, string.Join(" ", args)); - return await launcher.LaunchAsync(commandSpec.Command, args, directory, mergedEnvironment, cancellationToken); + using var activity = _profilingTelemetry is null + ? default + : _profilingTelemetry.StartGuestExecuteCommand(_spec.Language, _spec.DisplayName, commandSpec.Command, args.Length, directory); + var (exitCode, output) = await launcher.LaunchAsync(commandSpec.Command, args, directory, mergedEnvironment, cancellationToken); + activity.SetProcessExitCode(exitCode); + if (exitCode != 0) + { + activity.SetError($"{_spec.DisplayName} execution exited with code {exitCode}."); + } + + return (exitCode, output); } private static Dictionary MergeEnvironmentVariables( diff --git a/src/Aspire.Cli/Telemetry/AspireCliTelemetry.cs b/src/Aspire.Cli/Telemetry/AspireCliTelemetry.cs index 9efb22e10df..35b1b390f9b 100644 --- a/src/Aspire.Cli/Telemetry/AspireCliTelemetry.cs +++ b/src/Aspire.Cli/Telemetry/AspireCliTelemetry.cs @@ -100,6 +100,14 @@ internal AspireCliTelemetry(ILogger logger, IMachineInformat return StartActivityCore(_reportedActivitySource, name, kind); } + /// + /// Starts a new activity for reported telemetry with an explicit parent context. + /// + public Activity? StartReportedActivity(string name, ActivityKind kind, ActivityContext parentContext) + { + return StartActivityCore(_reportedActivitySource, name, kind, parentContext); + } + /// /// Starts a new activity for diagnostic telemetry used for internal diagnostics only. /// Uses the caller member name if no name is provided. @@ -112,14 +120,29 @@ internal AspireCliTelemetry(ILogger logger, IMachineInformat return StartActivityCore(_diagnosticsActivitySource, name, kind); } + /// + /// Starts a new activity for diagnostic telemetry with an explicit parent context. + /// + public Activity? StartDiagnosticActivity(string name, ActivityKind kind, ActivityContext parentContext) + { + return StartActivityCore(_diagnosticsActivitySource, name, kind, parentContext); + } + private Activity? StartActivityCore(ActivitySource source, string name, ActivityKind kind) + { + return StartActivityCore(source, name, kind, parentContext: null); + } + + private Activity? StartActivityCore(ActivitySource source, string name, ActivityKind kind, ActivityContext? parentContext) { CheckInitialization(); // Activities must have a name. ArgumentException.ThrowIfNullOrWhiteSpace(name); - var activity = source.StartActivity(name, kind); + var activity = parentContext is { } context + ? source.StartActivity(name, kind, context) + : source.StartActivity(name, kind); if (activity is not null) { diff --git a/src/Aspire.Cli/Telemetry/ProfilingTelemetry.cs b/src/Aspire.Cli/Telemetry/ProfilingTelemetry.cs new file mode 100644 index 00000000000..cc7c624168a --- /dev/null +++ b/src/Aspire.Cli/Telemetry/ProfilingTelemetry.cs @@ -0,0 +1,733 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; +using Aspire.Cli.Backchannel; +using Aspire.Cli.DotNet; +using Aspire.Hosting; +using Microsoft.Extensions.Configuration; + +namespace Aspire.Cli.Telemetry; + +/// +/// Creates profiling-only activities used by CLI diagnostics. +/// +internal sealed class ProfilingTelemetry(IConfiguration configuration) : IDisposable +{ + public const string ActivitySourceName = "Aspire.Cli.Profiling"; + + internal const string EnabledEnvironmentVariable = KnownConfigNames.ProfilingEnabled; + internal const string SessionIdEnvironmentVariable = KnownConfigNames.ProfilingSessionId; + internal const string TraceParentEnvironmentVariable = KnownConfigNames.ProfilingTraceParent; + internal const string TraceStateEnvironmentVariable = KnownConfigNames.ProfilingTraceState; + internal const string SessionIdBaggageName = "aspire.profiling.session_id"; + + private readonly ActivitySource _activitySource = new(ActivitySourceName); + + /// + /// Activity names for profiling spans. These names describe local diagnostic + /// work such as CLI orchestration and child-process lifetimes; they are not + /// exported through customer telemetry. + /// + internal static class Activities + { + public const string RunCommand = "aspire/cli/run"; + public const string RunAppHostFindAppHost = "aspire/cli/run_apphost.find_apphost"; + public const string RunAppHostStopExistingInstance = "aspire/cli/run_apphost.stop_existing_instance"; + public const string RunAppHostStartProject = "aspire/cli/run_apphost.start_project"; + public const string RunAppHostWaitForBuild = "aspire/cli/run_apphost.wait_for_build"; + public const string RunAppHostWaitForBackchannel = "aspire/cli/run_apphost.wait_for_backchannel"; + public const string RunAppHostGetDashboardUrls = "aspire/cli/run_apphost.get_dashboard_urls"; + public const string RunAppHostLifetime = "aspire/cli/run_apphost.lifetime"; + public const string StartAppHostSpawnChild = "aspire/cli/start_apphost.spawn_child"; + public const string StartAppHostWaitForBackchannel = "aspire/cli/start_apphost.wait_for_backchannel"; + public const string StartAppHostGetDashboardUrls = "aspire/cli/start_apphost.get_dashboard_urls"; + public const string BackchannelConnect = "aspire/cli/backchannel.connect"; + public const string BackchannelGetDashboardUrls = "aspire/cli/backchannel.get_dashboard_urls"; + public const string AppHostRun = "aspire/cli/apphost.run"; + public const string AppHostConfigureIsolatedMode = "aspire/cli/apphost.configure_isolated_mode"; + public const string AppHostEnsureDevCertificates = "aspire/cli/apphost.ensure_dev_certificates"; + public const string AppHostBuild = "aspire/cli/apphost.build"; + public const string AppHostCheckCompatibility = "aspire/cli/apphost.check_compatibility"; + public const string AppHostRunDotnetLifetime = "aspire/cli/apphost.run_dotnet.lifetime"; + public const string AppHostServerLifetime = "aspire/cli/apphost_server.lifetime"; + public const string DotNetRunLifetime = "aspire/cli/dotnet.run.lifetime"; + public const string GuestInitializeCommand = "aspire/cli/guest.initialize_command"; + public const string GuestInstallDependencies = "aspire/cli/guest.install_dependencies"; + public const string GuestExecuteCommand = "aspire/cli/guest.execute_command"; + public const string NpmCommand = "aspire/cli/npm.command"; + + public static string DotNetCommand(string command) => $"aspire/cli/dotnet.{command}"; + } + + /// + /// Tag names for profiling spans. Tags capture low-cardinality dimensions + /// and useful diagnostics such as process IDs, exit codes, command names, + /// output counts, and emitted artifact paths. + /// + internal static class Tags + { + public const string ProfilingSessionId = "aspire.profiling.session_id"; + public const string LegacyStartupOperationId = "aspire.startup.operation_id"; + public const string DotNetCommand = "aspire.cli.dotnet.command"; + public const string DotNetProjectFile = "aspire.cli.dotnet.project_file"; + public const string DotNetWorkingDirectory = "aspire.cli.dotnet.working_directory"; + public const string DotNetNoLaunchProfile = "aspire.cli.dotnet.no_launch_profile"; + public const string DotNetStartDebugSession = "aspire.cli.dotnet.start_debug_session"; + public const string DotNetDebug = "aspire.cli.dotnet.debug"; + public const string DotNetMsBuildServer = "aspire.cli.dotnet.msbuild_server"; + public const string DotNetArgsCount = "aspire.cli.dotnet.args.count"; + public const string DotNetStdoutLines = "aspire.cli.dotnet.stdout_lines"; + public const string DotNetStderrLines = "aspire.cli.dotnet.stderr_lines"; + public const string DotNetBinlogEnabled = "aspire.cli.dotnet.binlog_enabled"; + public const string DotNetBinlogPath = "aspire.cli.dotnet.binlog_path"; + public const string DotNetBinlogArtifactType = "aspire.cli.dotnet.binlog_artifact_type"; + public const string DotNetBinlogSkipReason = "aspire.cli.dotnet.binlog_skip_reason"; + public const string AppHostProjectFileSpecified = "aspire.cli.apphost.project_file_specified"; + public const string AppHostRunningInstanceResult = "aspire.cli.apphost.running_instance_result"; + public const string AppHostLanguage = "aspire.cli.apphost.language"; + public const string AppHostNoBuild = "aspire.cli.apphost.no_build"; + public const string AppHostNoRestore = "aspire.cli.apphost.no_restore"; + public const string AppHostWaitForDebugger = "aspire.cli.apphost.wait_for_debugger"; + public const string AppHostBuildSuccess = "aspire.cli.apphost.build_success"; + public const string AppHostBackchannelConnected = "aspire.cli.apphost.backchannel_connected"; + public const string AppHostDashboardHealthy = "aspire.cli.apphost.dashboard_healthy"; + public const string AppHostDashboardHasUrl = "aspire.cli.apphost.dashboard_has_url"; + public const string AppHostDashboardHasCodespacesUrl = "aspire.cli.apphost.dashboard_has_codespaces_url"; + public const string AppHostExtensionHost = "aspire.cli.apphost.extension_host"; + public const string AppHostExtensionHasBuildCapability = "aspire.cli.apphost.extension_has_build_capability"; + public const string AppHostIsCompatible = "aspire.cli.apphost.is_compatible"; + public const string AppHostSupportsBackchannel = "aspire.cli.apphost.supports_backchannel"; + public const string AppHostAspireHostingVersion = "aspire.cli.apphost.aspire_hosting_version"; + public const string AppHostWatch = "aspire.cli.apphost.watch"; + public const string DevCertificateEnvironmentVariableCount = "aspire.cli.dev_cert.env_var_count"; + public const string BackchannelSocketFile = "aspire.cli.backchannel.socket_file"; + public const string BackchannelAutoReconnect = "aspire.cli.backchannel.auto_reconnect"; + public const string BackchannelRetryCount = "aspire.cli.backchannel.retry_count"; + public const string BackchannelExpectedHash = "aspire.cli.backchannel.expected_hash"; + public const string BackchannelHasLegacyHash = "aspire.cli.backchannel.has_legacy_hash"; + public const string BackchannelScanCount = "aspire.cli.backchannel.scan_count"; + public const string BackchannelCapabilityCount = "aspire.cli.backchannel.capability_count"; + public const string BackchannelHasBaselineCapability = "aspire.cli.backchannel.has_baseline_capability"; + public const string ChildCommand = "aspire.cli.child.command"; + public const string AppHostServerImplementation = "aspire.cli.apphost_server.implementation"; + public const string GuestRuntimeLanguage = "aspire.cli.guest.language"; + public const string GuestRuntimeDisplayName = "aspire.cli.guest.display_name"; + public const string GuestCommand = "aspire.cli.guest.command"; + public const string GuestWorkingDirectory = "aspire.cli.guest.working_directory"; + public const string NpmCommand = "aspire.cli.npm.command"; + public const string NpmWorkingDirectory = "aspire.cli.npm.working_directory"; + public const string ProcessCommandArgsCount = "process.command_args.count"; + } + + /// + /// Event names for profiling spans. Events mark meaningful points within a + /// span, such as process start, first output, retries, and readiness signals. + /// + internal static class Events + { + public const string DotNetProcessStarted = "aspire/cli/dotnet.process_started"; + public const string DotNetProcessStartFailed = "aspire/cli/dotnet.process_start_failed"; + public const string DotNetProcessExited = "aspire/cli/dotnet.process_exited"; + public const string DotNetFirstStdout = "aspire/cli/dotnet.first_stdout"; + public const string DotNetFirstStderr = "aspire/cli/dotnet.first_stderr"; + public const string BackchannelWaitForRpc = "aspire/cli/backchannel.wait_for_rpc"; + public const string BackchannelRpcReady = "aspire/cli/backchannel.rpc_ready"; + public const string BackchannelGetDashboardUrlsInvoke = "aspire/cli/backchannel.get_dashboard_urls.invoke"; + public const string BackchannelGetDashboardUrlsResponse = "aspire/cli/backchannel.get_dashboard_urls.response"; + public const string BackchannelConnectAttempt = "aspire/cli/backchannel.connect_attempt"; + public const string BackchannelConnected = "aspire/cli/backchannel.connected"; + public const string BackchannelSocketConnectStart = "aspire/cli/backchannel.socket_connect_start"; + public const string BackchannelSocketConnected = "aspire/cli/backchannel.socket_connected"; + public const string BackchannelRpcListening = "aspire/cli/backchannel.rpc_listening"; + public const string BackchannelGetCapabilitiesStart = "aspire/cli/backchannel.get_capabilities_start"; + public const string BackchannelGetCapabilitiesResponse = "aspire/cli/backchannel.get_capabilities_response"; + public const string StartAppHostBackchannelConnected = "aspire/cli/start_apphost.backchannel_connected"; + public const string RunAppHostStarted = "aspire/cli/run_apphost.started"; + public const string AuxBackchannelGetDashboardUrlsInvoke = "aspire/cli/aux_backchannel.get_dashboard_urls.invoke"; + public const string AuxBackchannelGetDashboardUrlsResponse = "aspire/cli/aux_backchannel.get_dashboard_urls.response"; + public const string AuxBackchannelGetDashboardUrlsNotFound = "aspire/cli/aux_backchannel.get_dashboard_urls.not_found"; + public const string AppHostBuildReady = "aspire/cli/apphost.build_ready"; + } + + /// + /// Common profiling tag values. Values should be stable strings so trace + /// queries can group by them across CLI versions. + /// + internal static class Values + { + public const string UnsupportedDotNetCommand = "unsupported_dotnet_command"; + public const string MsBuildBinlog = "msbuild.binlog"; + } + + public bool IsEnabled => IsProfilingEnabled(configuration); + + public ActivityScope CurrentActivity => IsEnabled ? new(Activity.Current, ownsActivity: false) : default; + + public static bool IsProfilingEnabled(IConfiguration configuration) + { + return IsTruthy(configuration[EnabledEnvironmentVariable]) || + IsTruthy(configuration[KnownConfigNames.Legacy.StartupProfilingEnabled]); + } + + public static void AddCurrentContextToEnvironment(IDictionary environment) + { + AddActivityContextToEnvironment(Activity.Current, environment); + } + + public static void AddActivityContextToEnvironment(Activity? activity, IDictionary environment) + { + if (activity is null) + { + return; + } + + environment[EnabledEnvironmentVariable] = "true"; + environment[KnownConfigNames.Legacy.StartupProfilingEnabled] = "true"; + + var sessionId = GetProfilingSessionId(activity); + if (!string.IsNullOrWhiteSpace(sessionId)) + { + environment[SessionIdEnvironmentVariable] = sessionId; + environment[KnownConfigNames.Legacy.StartupOperationId] = sessionId; + } + + if (!string.IsNullOrWhiteSpace(activity.Id)) + { + environment[TraceParentEnvironmentVariable] = activity.Id; + environment[KnownConfigNames.Legacy.StartupTraceParent] = activity.Id; + } + + if (!string.IsNullOrWhiteSpace(activity.TraceStateString)) + { + environment[TraceStateEnvironmentVariable] = activity.TraceStateString; + environment[KnownConfigNames.Legacy.StartupTraceState] = activity.TraceStateString; + } + } + + internal ActivityScope StartAppHostBuild(bool noRestore, bool extensionHost, bool extensionHasBuildCapability) + { + var activity = StartActivity(Activities.AppHostBuild); + activity.SetAppHostNoRestore(noRestore); + activity.SetAppHostExtensionHost(extensionHost); + activity.SetAppHostExtensionHasBuildCapability(extensionHasBuildCapability); + return activity; + } + + internal ActivityScope StartAppHostCheckCompatibility() + { + return StartActivity(Activities.AppHostCheckCompatibility); + } + + internal ActivityScope StartAppHostConfigureIsolatedMode() + { + return StartActivity(Activities.AppHostConfigureIsolatedMode); + } + + internal ActivityScope StartAppHostEnsureDevCertificates() + { + return StartActivity(Activities.AppHostEnsureDevCertificates); + } + + internal ActivityScope StartAppHostRun() + { + return StartActivity(Activities.AppHostRun); + } + + internal ActivityScope StartAppHostRunDotnetLifetime(bool watch, bool noBuild, bool noRestore) + { + var activity = StartActivity(Activities.AppHostRunDotnetLifetime); + activity.SetAppHostWatch(watch); + activity.SetAppHostNoBuild(noBuild); + activity.SetAppHostNoRestore(noRestore); + return activity; + } + + internal ActivityScope StartAuxiliaryBackchannelGetDashboardUrls() + { + var activity = CurrentActivity; + activity.AddAuxBackchannelGetDashboardUrlsInvokeEvent(); + return activity; + } + + internal ActivityScope StartBackchannelConnect(string socketPath) + { + var activity = StartActivity(Activities.BackchannelConnect); + activity.SetBackchannelSocketFile(socketPath); + return activity; + } + + internal ActivityScope StartBackchannelConnect(string socketPath, bool autoReconnect, int retryCount) + { + var activity = StartBackchannelConnect(socketPath); + activity.SetBackchannelAutoReconnect(autoReconnect); + activity.SetBackchannelRetryCount(retryCount); + return activity; + } + + internal ActivityScope StartBackchannelGetDashboardUrls() + { + return StartActivity(Activities.BackchannelGetDashboardUrls); + } + + internal ActivityScope StartDetachedGetDashboardUrls() + { + return StartActivity(Activities.StartAppHostGetDashboardUrls); + } + + internal ActivityScope StartDetachedSpawnChild(string executablePath, int argsCount, string childCommand) + { + var activity = StartActivity(Activities.StartAppHostSpawnChild); + activity.SetProcessExecutableName(Path.GetFileName(executablePath)); + activity.SetProcessCommandArgsCount(argsCount); + activity.SetChildCommand(childCommand); + return activity; + } + + internal ActivityScope StartDetachedWaitForBackchannel(int childProcessId, string expectedHash, bool hasLegacyHash) + { + var activity = StartActivity(Activities.StartAppHostWaitForBackchannel); + activity.SetProcessId(childProcessId); + activity.SetBackchannelExpectedHash(expectedHash); + activity.SetBackchannelHasLegacyHash(hasLegacyHash); + return activity; + } + + internal ActivityScope StartDotNetProcess(string dotnetCommand, FileInfo? projectFile, DirectoryInfo workingDirectory, ProcessInvocationOptions options) + { + var activityName = string.Equals(dotnetCommand, "run", StringComparison.Ordinal) + ? Activities.DotNetRunLifetime + : Activities.DotNetCommand(dotnetCommand); + var activity = StartActivity(activityName, ActivityKind.Client); + activity.SetDotNetInvocation(dotnetCommand, projectFile, workingDirectory, options); + return activity; + } + + internal ActivityScope StartAppHostServerLifetime(string implementationName) + { + var activity = StartActivity(Activities.AppHostServerLifetime, ActivityKind.Client); + activity.SetAppHostServerImplementation(implementationName); + return activity; + } + + internal ActivityScope StartGuestInitializeCommand(string languageId, string displayName, string command, int argsCount, DirectoryInfo workingDirectory) + { + var activity = StartGuestProcessActivity(Activities.GuestInitializeCommand, languageId, displayName, command, argsCount, workingDirectory); + return activity; + } + + internal ActivityScope StartGuestInstallDependencies(string languageId, string displayName, string command, int argsCount, DirectoryInfo workingDirectory) + { + var activity = StartGuestProcessActivity(Activities.GuestInstallDependencies, languageId, displayName, command, argsCount, workingDirectory); + return activity; + } + + internal ActivityScope StartGuestExecuteCommand(string languageId, string displayName, string command, int argsCount, DirectoryInfo workingDirectory) + { + var activity = StartGuestProcessActivity(Activities.GuestExecuteCommand, languageId, displayName, command, argsCount, workingDirectory); + return activity; + } + + internal ActivityScope StartNpmCommand(string command, int argsCount, string workingDirectory) + { + var activity = StartActivity(Activities.NpmCommand, ActivityKind.Client); + activity.SetNpmInvocation(command, argsCount, workingDirectory); + return activity; + } + + internal ActivityScope StartRunAppHostFindAppHost(FileInfo? passedAppHostProjectFile) + { + var activity = StartActivity(Activities.RunAppHostFindAppHost); + activity.SetAppHostProjectFileSpecified(passedAppHostProjectFile is not null); + return activity; + } + + internal ActivityScope StartRunAppHostGetDashboardUrls() + { + return StartActivity(Activities.RunAppHostGetDashboardUrls); + } + + internal ActivityScope StartRunAppHostLifetime() + { + var activity = StartActivity(Activities.RunAppHostLifetime); + activity.AddRunAppHostStartedEvent(); + return activity; + } + + internal ActivityScope StartRunAppHostStartProject(string languageId, bool noBuild, bool waitForDebugger) + { + var activity = StartActivity(Activities.RunAppHostStartProject); + activity.SetAppHostLanguage(languageId); + activity.SetAppHostNoBuild(noBuild); + activity.SetAppHostWaitForDebugger(waitForDebugger); + return activity; + } + + internal ActivityScope StartRunAppHostStopExistingInstance() + { + return StartActivity(Activities.RunAppHostStopExistingInstance); + } + + internal ActivityScope StartRunAppHostWaitForBackchannel() + { + return StartActivity(Activities.RunAppHostWaitForBackchannel); + } + + internal ActivityScope StartRunAppHostWaitForBuild() + { + return StartActivity(Activities.RunAppHostWaitForBuild); + } + + internal ActivityScope StartRunCommand() + { + return StartActivity(Activities.RunCommand, startWithRemoteParent: true); + } + + private ActivityScope StartActivity( + string name, + ActivityKind kind = ActivityKind.Internal, + bool startWithRemoteParent = false) + { + if (!IsEnabled) + { + return default; + } + + var ambientActivity = Activity.Current; + Activity? activity; + if (startWithRemoteParent && + TryGetConfiguredActivityContext(out var parentContext)) + { + activity = _activitySource.StartActivity(name, kind, parentContext); + } + else + { + activity = _activitySource.StartActivity(name, kind); + } + + AddProfilingSession(activity, ambientActivity); + return new ActivityScope(activity); + } + + private void AddProfilingSession(Activity? activity, Activity? ambientActivity) + { + if (activity is null) + { + return; + } + + // Profiling spans can be siblings under short-lived reported/diagnostic activities. + // Seed the ambient ancestor chain with baggage so later profiling siblings reuse the + // same session after an intermediate parent activity has ended. + var sessionId = GetProfilingSessionIdFromAncestors(ambientActivity) ?? GetProfilingSessionId(activity) ?? GetConfiguredSessionId() ?? Guid.NewGuid().ToString("N"); + AddProfilingSessionBaggage(ambientActivity, sessionId); + + // Keep profiling tags on profiling spans only. Reported/customer activities only + // carry the session as baggage so it can flow across async and process boundaries. + activity.SetBaggage(SessionIdBaggageName, sessionId); + activity.SetTag(Tags.ProfilingSessionId, sessionId); + activity.SetTag(Tags.LegacyStartupOperationId, sessionId); + } + + private bool TryGetConfiguredActivityContext(out ActivityContext activityContext) + { + var traceParent = GetConfigurationValue(configuration, TraceParentEnvironmentVariable, KnownConfigNames.Legacy.StartupTraceParent); + var traceState = GetConfigurationValue(configuration, TraceStateEnvironmentVariable, KnownConfigNames.Legacy.StartupTraceState); + if (!string.IsNullOrWhiteSpace(traceParent) && + ActivityContext.TryParse(traceParent, traceState, out activityContext)) + { + return true; + } + + activityContext = default; + return false; + } + + private string? GetConfiguredSessionId() + { + return GetConfigurationValue(configuration, SessionIdEnvironmentVariable, KnownConfigNames.Legacy.StartupOperationId); + } + + private static string? GetProfilingSessionId(Activity? activity) + { + return activity?.GetBaggageItem(SessionIdBaggageName) is { Length: > 0 } sessionId ? sessionId : null; + } + + private static string? GetProfilingSessionIdFromAncestors(Activity? activity) + { + for (var current = activity; current is not null; current = current.Parent) + { + if (GetProfilingSessionId(current) is { } sessionId) + { + return sessionId; + } + } + + return null; + } + + private static void AddProfilingSessionBaggage(Activity? activity, string sessionId) + { + for (var current = activity; current is not null; current = current.Parent) + { + if (GetProfilingSessionId(current) is null) + { + current.SetBaggage(SessionIdBaggageName, sessionId); + } + } + } + + private static string? GetConfigurationValue(IConfiguration configuration, string name, string legacyName) + { + return configuration[name] is { Length: > 0 } value ? value : configuration[legacyName]; + } + + private static bool IsTruthy(string? value) + { + return string.Equals(value, "true", StringComparison.OrdinalIgnoreCase) || value == "1"; + } + + private ActivityScope StartGuestProcessActivity(string activityName, string languageId, string displayName, string command, int argsCount, DirectoryInfo workingDirectory) + { + var activity = StartActivity(activityName, ActivityKind.Client); + activity.SetGuestInvocation(languageId, displayName, command, argsCount, workingDirectory); + return activity; + } + + public void Dispose() + { + _activitySource.Dispose(); + } + + internal readonly struct ActivityScope(Activity? activity, bool ownsActivity = true) : IDisposable + { + public bool IsRunning => activity is not null; + + public void AddAppHostBuildReadyEvent() => AddEvent(Events.AppHostBuildReady); + + public void AddAuxBackchannelGetDashboardUrlsInvokeEvent() => AddEvent(Events.AuxBackchannelGetDashboardUrlsInvoke); + + public void AddAuxBackchannelGetDashboardUrlsNotFoundEvent() => AddEvent(Events.AuxBackchannelGetDashboardUrlsNotFound); + + public void AddAuxBackchannelGetDashboardUrlsResponseEvent() => AddEvent(Events.AuxBackchannelGetDashboardUrlsResponse); + + public void AddBackchannelConnectedEvent() => AddEvent(Events.BackchannelConnected); + + public void AddBackchannelConnectAttemptEvent(int retryCount) + { + activity?.AddEvent(new ActivityEvent(Events.BackchannelConnectAttempt, tags: new ActivityTagsCollection + { + [Tags.BackchannelRetryCount] = retryCount + })); + } + + public void AddBackchannelGetCapabilitiesStartEvent() => AddEvent(Events.BackchannelGetCapabilitiesStart); + + public void AddBackchannelGetCapabilitiesResponseEvent() => AddEvent(Events.BackchannelGetCapabilitiesResponse); + + public void AddBackchannelGetDashboardUrlsInvokeEvent() => AddEvent(Events.BackchannelGetDashboardUrlsInvoke); + + public void AddBackchannelGetDashboardUrlsResponseEvent() => AddEvent(Events.BackchannelGetDashboardUrlsResponse); + + public void AddBackchannelRpcListeningEvent() => AddEvent(Events.BackchannelRpcListening); + + public void AddBackchannelRpcReadyEvent() => AddEvent(Events.BackchannelRpcReady); + + public void AddBackchannelSocketConnectedEvent() => AddEvent(Events.BackchannelSocketConnected); + + public void AddBackchannelSocketConnectStartEvent() => AddEvent(Events.BackchannelSocketConnectStart); + + public void AddBackchannelWaitForRpcEvent() => AddEvent(Events.BackchannelWaitForRpc); + + public void AddDotNetFirstStderrEvent() => AddEvent(Events.DotNetFirstStderr); + + public void AddDotNetFirstStdoutEvent() => AddEvent(Events.DotNetFirstStdout); + + public void AddDotNetProcessExitedEvent() => AddEvent(Events.DotNetProcessExited); + + public void AddDotNetProcessStartFailedEvent() => AddEvent(Events.DotNetProcessStartFailed); + + public void AddDotNetProcessStartedEvent(int processId) + { + SetProcessId(processId); + activity?.AddEvent(new ActivityEvent(Events.DotNetProcessStarted, tags: new ActivityTagsCollection + { + [TelemetryConstants.Tags.ProcessPid] = processId + })); + } + + public void AddDotNetProcessStartResult(bool started, int? processId) + { + if (started) + { + Debug.Assert(processId is not null); + AddDotNetProcessStartedEvent(processId.Value); + } + else + { + AddDotNetProcessStartFailedEvent(); + } + } + + public void AddRunAppHostStartedEvent() => AddEvent(Events.RunAppHostStarted); + + public void AddStartAppHostBackchannelConnectedEvent() => AddEvent(Events.StartAppHostBackchannelConnected); + + public void SetAppHostBackchannelConnected(bool connected) => SetTag(Tags.AppHostBackchannelConnected, connected); + + public void SetAppHostBuildSuccess(bool buildSuccess) => SetTag(Tags.AppHostBuildSuccess, buildSuccess); + + public void SetAppHostBuildExitCode(int exitCode) + { + SetProcessExitCode(exitCode); + if (exitCode != 0) + { + SetError($"Build exited with code {exitCode}."); + } + } + + public void SetAppHostCompatibility(bool isCompatible, bool supportsBackchannel, string? aspireHostingVersion) + { + SetTag(Tags.AppHostIsCompatible, isCompatible); + SetTag(Tags.AppHostSupportsBackchannel, supportsBackchannel); + SetTag(Tags.AppHostAspireHostingVersion, aspireHostingVersion); + } + + public void SetAppHostDashboardUrls(DashboardUrlsState? dashboardUrls) + { + SetTag(Tags.AppHostDashboardHealthy, dashboardUrls?.DashboardHealthy); + SetTag(Tags.AppHostDashboardHasUrl, !string.IsNullOrEmpty(dashboardUrls?.BaseUrlWithLoginToken)); + SetTag(Tags.AppHostDashboardHasCodespacesUrl, !string.IsNullOrEmpty(dashboardUrls?.CodespacesUrlWithLoginToken)); + } + + public void SetAppHostDashboardHealthy(bool? healthy) => SetTag(Tags.AppHostDashboardHealthy, healthy); + + public void SetAppHostServerImplementation(string implementationName) => SetTag(Tags.AppHostServerImplementation, implementationName); + + public void SetAppHostExtensionHasBuildCapability(bool hasCapability) => SetTag(Tags.AppHostExtensionHasBuildCapability, hasCapability); + + public void SetAppHostExtensionHost(bool extensionHost) => SetTag(Tags.AppHostExtensionHost, extensionHost); + + public void SetAppHostLanguage(string? languageId) => SetTag(Tags.AppHostLanguage, languageId); + + public void SetAppHostNoBuild(bool noBuild) => SetTag(Tags.AppHostNoBuild, noBuild); + + public void SetAppHostNoRestore(bool noRestore) => SetTag(Tags.AppHostNoRestore, noRestore); + + public void SetAppHostProjectFileSpecified(bool specified) => SetTag(Tags.AppHostProjectFileSpecified, specified); + + public void SetAppHostRunningInstanceResult(object? result) => SetTag(Tags.AppHostRunningInstanceResult, result?.ToString()); + + public void SetAppHostWatch(bool watch) => SetTag(Tags.AppHostWatch, watch); + + public void SetAppHostWaitForDebugger(bool waitForDebugger) => SetTag(Tags.AppHostWaitForDebugger, waitForDebugger); + + public void SetBackchannelAutoReconnect(bool autoReconnect) => SetTag(Tags.BackchannelAutoReconnect, autoReconnect); + + public void SetBackchannelCapabilitySummary(string[] capabilities, string baselineCapability) + { + SetTag(Tags.BackchannelCapabilityCount, capabilities.Length); + SetTag(Tags.BackchannelHasBaselineCapability, capabilities.Any(capability => capability == baselineCapability)); + } + + public void SetBackchannelExpectedHash(string expectedHash) => SetTag(Tags.BackchannelExpectedHash, expectedHash); + + public void SetBackchannelHasLegacyHash(bool hasLegacyHash) => SetTag(Tags.BackchannelHasLegacyHash, hasLegacyHash); + + public void SetBackchannelRetryCount(int retryCount) => SetTag(Tags.BackchannelRetryCount, retryCount); + + public void SetBackchannelScanCount(int scanCount) => SetTag(Tags.BackchannelScanCount, scanCount); + + public void SetBackchannelSocketFile(string socketPath) => SetTag(Tags.BackchannelSocketFile, Path.GetFileName(socketPath)); + + public void SetChildCommand(string command) => SetTag(Tags.ChildCommand, command); + + public void SetDevCertificateEnvironmentVariables(int count) => SetTag(Tags.DevCertificateEnvironmentVariableCount, count); + + public void SetDotNetArgsCount(int argsCount) => SetTag(Tags.DotNetArgsCount, argsCount); + + public void SetDotNetBinlogPath(string binlogPath) + { + SetTag(Tags.DotNetBinlogEnabled, true); + SetTag(Tags.DotNetBinlogPath, binlogPath); + SetTag(Tags.DotNetBinlogArtifactType, Values.MsBuildBinlog); + } + + public void SetDotNetBinlogSkippedUnsupportedCommand() + { + SetTag(Tags.DotNetBinlogEnabled, false); + SetTag(Tags.DotNetBinlogSkipReason, Values.UnsupportedDotNetCommand); + } + + public void SetDotNetInvocation(string dotnetCommand, FileInfo? projectFile, DirectoryInfo workingDirectory, ProcessInvocationOptions options) + { + SetTag(Tags.DotNetCommand, dotnetCommand); + SetTag(Tags.DotNetProjectFile, projectFile?.FullName); + SetTag(Tags.DotNetWorkingDirectory, workingDirectory.FullName); + SetTag(Tags.DotNetNoLaunchProfile, options.NoLaunchProfile); + SetTag(Tags.DotNetStartDebugSession, options.StartDebugSession); + SetTag(Tags.DotNetDebug, options.Debug); + } + + public void SetGuestInvocation(string languageId, string displayName, string command, int argsCount, DirectoryInfo workingDirectory) + { + SetTag(Tags.GuestRuntimeLanguage, languageId); + SetTag(Tags.GuestRuntimeDisplayName, displayName); + SetTag(Tags.GuestCommand, command); + SetTag(Tags.GuestWorkingDirectory, workingDirectory.FullName); + SetProcessExecutableName(Path.GetFileName(command)); + SetProcessCommandArgsCount(argsCount); + } + + public void SetNpmInvocation(string command, int argsCount, string workingDirectory) + { + SetTag(Tags.NpmCommand, command); + SetTag(Tags.NpmWorkingDirectory, workingDirectory); + SetProcessExecutableName(Path.GetFileName(command)); + SetProcessCommandArgsCount(argsCount); + } + + public void SetDotNetMsBuildServer(string? msBuildServer) => SetTag(Tags.DotNetMsBuildServer, msBuildServer); + + public void SetDotNetResolvedExecutable(string dotnetPath, string? msBuildServer) + { + SetProcessExecutableName(Path.GetFileName(dotnetPath)); + SetDotNetMsBuildServer(msBuildServer); + } + + public void SetDotNetCompleted(int exitCode, int stdoutLineCount, int stderrLineCount) + { + SetProcessExitCode(exitCode); + SetDotNetOutputLineCounts(stdoutLineCount, stderrLineCount); + AddDotNetProcessExitedEvent(); + } + + public void SetDotNetOutputLineCounts(int stdoutLineCount, int stderrLineCount) + { + SetTag(Tags.DotNetStdoutLines, stdoutLineCount); + SetTag(Tags.DotNetStderrLines, stderrLineCount); + } + + public void SetError(string description) => activity?.SetStatus(ActivityStatusCode.Error, description); + + public void SetProcessCommandArgsCount(int argsCount) => SetTag(Tags.ProcessCommandArgsCount, argsCount); + + public void SetProcessExecutableName(string? executableName) => SetTag(TelemetryConstants.Tags.ProcessExecutableName, executableName); + + public void SetProcessExitCode(int exitCode) => SetTag(TelemetryConstants.Tags.ProcessExitCode, exitCode); + + public void SetProcessId(int processId) => SetTag(TelemetryConstants.Tags.ProcessPid, processId); + + public void Dispose() + { + if (ownsActivity) + { + activity?.Dispose(); + } + } + + private void AddEvent(string name) => activity?.AddEvent(new ActivityEvent(name)); + + private void SetTag(string key, object? value) => activity?.SetTag(key, value); + } +} diff --git a/src/Aspire.Cli/Telemetry/TelemetryManager.cs b/src/Aspire.Cli/Telemetry/TelemetryManager.cs index 8747c601ffa..19888f91b7c 100644 --- a/src/Aspire.Cli/Telemetry/TelemetryManager.cs +++ b/src/Aspire.Cli/Telemetry/TelemetryManager.cs @@ -48,6 +48,9 @@ public TelemetryManager(IConfiguration configuration, string[]? args = null) #if DEBUG var useOtlpExporter = !string.IsNullOrEmpty(configuration[AspireCliTelemetry.OtlpExporterEndpointConfigKey]); var consoleExporterLevel = configuration.GetEnum(AspireCliTelemetry.ConsoleExporterLevelConfigKey, defaultValue: null); + var profilingEnabled = + configuration.GetBool(Aspire.Hosting.KnownConfigNames.ProfilingEnabled) ?? + configuration.GetBool(Aspire.Hosting.KnownConfigNames.Legacy.StartupProfilingEnabled, defaultValue: false); #else var useOtlpExporter = false; ConsoleExporterLevel? consoleExporterLevel = null; @@ -97,6 +100,11 @@ public TelemetryManager(IConfiguration configuration, string[]? args = null) .AddSource(AspireCliTelemetry.ReportedActivitySourceName) .SetResourceBuilder(resource); + if (profilingEnabled) + { + diagnosticBuilder.AddSource(ProfilingTelemetry.ActivitySourceName); + } + if (consoleExporterLevel == ConsoleExporterLevel.Diagnostic) { diagnosticBuilder.AddConsoleExporter(); diff --git a/src/Aspire.Cli/Telemetry/TelemetryServiceCollectionExtensions.cs b/src/Aspire.Cli/Telemetry/TelemetryServiceCollectionExtensions.cs index 74cbc17ebe4..ef96eef5319 100644 --- a/src/Aspire.Cli/Telemetry/TelemetryServiceCollectionExtensions.cs +++ b/src/Aspire.Cli/Telemetry/TelemetryServiceCollectionExtensions.cs @@ -37,6 +37,7 @@ public static IServiceCollection AddTelemetryServices(this IServiceCollection se services.AddSingleton(); services.AddSingleton(); + services.AddSingleton(); services.AddHostedService(sp => sp.GetRequiredService()); return services; diff --git a/src/Aspire.Hosting/ApplicationModel/ResourceNotificationService.cs b/src/Aspire.Hosting/ApplicationModel/ResourceNotificationService.cs index 808924758b9..c959737fd06 100644 --- a/src/Aspire.Hosting/ApplicationModel/ResourceNotificationService.cs +++ b/src/Aspire.Hosting/ApplicationModel/ResourceNotificationService.cs @@ -8,6 +8,8 @@ using System.Runtime.CompilerServices; using System.Threading.Channels; using Aspire.Dashboard.Model; +using Aspire.Hosting.Diagnostics; +using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Diagnostics.HealthChecks; using Microsoft.Extensions.Hosting; @@ -29,6 +31,7 @@ public class ResourceNotificationService : IDisposable private readonly ResourceLoggerService _resourceLoggerService; private Action? OnResourceUpdated { get; set; } + private IConfiguration? Configuration => _serviceProvider.GetService(); // This is for testing internal WaitBehavior DefaultWaitBehavior { get; set; } @@ -138,33 +141,45 @@ public async Task WaitForResourceAsync(string resourceName, IEnumerable< private async Task WaitUntilHealthyAsync(IResource resource, IResource dependency, WaitBehavior waitBehavior, CancellationToken cancellationToken) { - await WaitUntilStateAsync(resource, dependency, waitBehavior, async (resourceLogger, displayName, resourceId, resourceEvent) => + using var activity = ProfilingTelemetry.StartResourceWaitForDependency(Configuration, resource, dependency, WaitType.WaitUntilHealthy, waitBehavior); + + try { - // If our dependency resource has health check annotations we want to wait until they turn healthy - // otherwise we don't care about their health status. - if (dependency.TryGetAnnotationsOfType(out var _)) + await WaitUntilStateAsync(resource, dependency, waitBehavior, async (resourceLogger, displayName, resourceId, resourceEvent) => { - resourceLogger.LogInformation("Waiting for resource '{ResourceName}' to become healthy.", displayName); - await WaitForResourceCoreAsync( - dependency.Name, - re => re.ResourceId == resourceId && re.Snapshot.HealthStatus == HealthStatus.Healthy, - $"Resource '{displayName}' failed to become healthy before the operation was cancelled.", - cancellationToken).ConfigureAwait(false); - } + // If our dependency resource has health check annotations we want to wait until they turn healthy + // otherwise we don't care about their health status. + if (dependency.TryGetAnnotationsOfType(out var _)) + { + resourceLogger.LogInformation("Waiting for resource '{ResourceName}' to become healthy.", displayName); + await WaitForResourceCoreAsync( + dependency.Name, + re => re.ResourceId == resourceId && re.Snapshot.HealthStatus == HealthStatus.Healthy, + $"Resource '{displayName}' failed to become healthy before the operation was cancelled.", + waitCondition: "healthy", + cancellationToken: cancellationToken).ConfigureAwait(false); + } - // Now wait for the resource ready event to be executed. - resourceLogger.LogInformation("Waiting for resource ready to execute for '{ResourceName}'.", displayName); - resourceEvent = await WaitForResourceCoreAsync( - dependency.Name, - re => re.ResourceId == resourceId && re.Snapshot.ResourceReadyEvent is not null, - $"Resource '{displayName}' failed to execute the resource ready event before the operation was cancelled.", - cancellationToken: cancellationToken).ConfigureAwait(false); + // Now wait for the resource ready event to be executed. + resourceLogger.LogInformation("Waiting for resource ready to execute for '{ResourceName}'.", displayName); + resourceEvent = await WaitForResourceCoreAsync( + dependency.Name, + re => re.ResourceId == resourceId && re.Snapshot.ResourceReadyEvent is not null, + $"Resource '{displayName}' failed to execute the resource ready event before the operation was cancelled.", + waitCondition: "resource_ready", + cancellationToken: cancellationToken).ConfigureAwait(false); - // Observe the result of the resource ready event task - await resourceEvent.Snapshot.ResourceReadyEvent!.EventTask.WaitAsync(cancellationToken).ConfigureAwait(false); + // Observe the result of the resource ready event task + await resourceEvent.Snapshot.ResourceReadyEvent!.EventTask.WaitAsync(cancellationToken).ConfigureAwait(false); - resourceLogger.LogInformation("Finished waiting for resource '{ResourceName}'.", displayName); - }, cancellationToken).ConfigureAwait(false); + resourceLogger.LogInformation("Finished waiting for resource '{ResourceName}'.", displayName); + }, cancellationToken).ConfigureAwait(false); + } + catch (Exception ex) + { + activity.SetError(ex); + throw; + } } /// @@ -226,6 +241,7 @@ public async Task WaitForResourceHealthyAsync(string resourceName resourceName, re => ShouldYieldHealthyWait(waitBehavior, re.Snapshot), $"Resource '{resourceName}' failed to become healthy before the operation was cancelled.", + waitCondition: "healthy", cancellationToken: cancellationToken).ConfigureAwait(false); if (resourceEvent.Snapshot.HealthStatus != HealthStatus.Healthy) @@ -240,6 +256,7 @@ public async Task WaitForResourceHealthyAsync(string resourceName resourceName, re => re.ResourceId == resourceEvent.ResourceId && re.Snapshot.ResourceReadyEvent is not null, $"Resource '{resourceName}' failed to execute the resource ready event before the operation was cancelled.", + waitCondition: "resource_ready", cancellationToken: cancellationToken).ConfigureAwait(false); // Observe the result of the resource ready event task @@ -264,6 +281,9 @@ internal static bool ShouldYieldHealthyWait(WaitBehavior waitBehavior, CustomRes private async Task WaitUntilCompletionAsync(IResource resource, IResource dependency, int exitCode, CancellationToken cancellationToken) { + using var activity = ProfilingTelemetry.StartResourceWaitForDependency(Configuration, resource, dependency, WaitType.WaitForCompletion, waitBehavior: null); + activity.SetResourceWaitExpectedExitCode(exitCode); + var names = dependency.GetResolvedResourceNames(); var tasks = new Task[names.Length]; @@ -287,7 +307,15 @@ await PublishUpdateAsync(resource, s => tasks[i] = Core(displayName, names[i]); } - await Task.WhenAll(tasks).ConfigureAwait(false); + try + { + await Task.WhenAll(tasks).ConfigureAwait(false); + } + catch (Exception ex) + { + activity.SetError(ex); + throw; + } async Task Core(string displayName, string resourceId) { @@ -295,6 +323,7 @@ async Task Core(string displayName, string resourceId) dependency.Name, re => re.ResourceId == resourceId && IsKnownTerminalState(re.Snapshot), $"Resource '{displayName}' failed to reach a terminal state before the operation was cancelled.", + waitCondition: "terminal", cancellationToken: cancellationToken).ConfigureAwait(false); var snapshot = resourceEvent.Snapshot; @@ -364,6 +393,7 @@ async Task Core(string displayName, string resourceId) dependency.Name, re => re.ResourceId == resourceId && IsContinuableState(waitBehavior, re.Snapshot), $"Resource '{displayName}' failed to reach the 'Running' state before the operation was cancelled.", + waitCondition: "running", cancellationToken: cancellationToken).ConfigureAwait(false); var snapshot = resourceEvent.Snapshot; @@ -413,13 +443,23 @@ static bool IsContinuableState(WaitBehavior waitBehavior, CustomResourceSnapshot private async Task WaitUntilStartedAsync(IResource resource, IResource dependency, WaitBehavior waitBehavior, CancellationToken cancellationToken) { - await WaitUntilStateAsync(resource, dependency, waitBehavior, (resourceLogger, displayName, resourceId, resourceEvent) => + using var activity = ProfilingTelemetry.StartResourceWaitForDependency(Configuration, resource, dependency, WaitType.WaitUntilStarted, waitBehavior); + + try + { + await WaitUntilStateAsync(resource, dependency, waitBehavior, (resourceLogger, displayName, resourceId, resourceEvent) => + { + // Unlike WaitUntilHealthyAsync, we don't wait for health checks here. + // We only wait for the resource to reach the Running state. + resourceLogger.LogInformation("Finished waiting for resource '{ResourceName}' to start.", displayName); + return Task.CompletedTask; + }, cancellationToken).ConfigureAwait(false); + } + catch (Exception ex) { - // Unlike WaitUntilHealthyAsync, we don't wait for health checks here. - // We only wait for the resource to reach the Running state. - resourceLogger.LogInformation("Finished waiting for resource '{ResourceName}' to start.", displayName); - return Task.CompletedTask; - }, cancellationToken).ConfigureAwait(false); + activity.SetError(ex); + throw; + } } /// @@ -436,26 +476,42 @@ public async Task WaitForDependenciesAsync(IResource resource, CancellationToken return; } - var pendingDependencies = new List(); - foreach (var waitAnnotation in waitAnnotations) + var waitAnnotationList = waitAnnotations.ToArray(); + if (waitAnnotationList.Length == 0) { - if (waitAnnotation.Resource is IResourceWithoutLifetime) + return; + } + + using var activity = ProfilingTelemetry.StartResourceWaitForDependencies(Configuration, resource, waitAnnotationList.Length); + + try + { + var pendingDependencies = new List(); + foreach (var waitAnnotation in waitAnnotationList) { - // IResourceWithoutLifetime are inert and don't need to be waited on. - continue; + if (waitAnnotation.Resource is IResourceWithoutLifetime) + { + // IResourceWithoutLifetime are inert and don't need to be waited on. + continue; + } + + var pendingDependency = waitAnnotation.WaitType switch + { + WaitType.WaitUntilHealthy => WaitUntilHealthyAsync(resource, waitAnnotation.Resource, waitAnnotation.WaitBehavior ?? DefaultWaitBehavior, cancellationToken), + WaitType.WaitForCompletion => WaitUntilCompletionAsync(resource, waitAnnotation.Resource, waitAnnotation.ExitCode, cancellationToken), + WaitType.WaitUntilStarted => WaitUntilStartedAsync(resource, waitAnnotation.Resource, waitAnnotation.WaitBehavior ?? DefaultWaitBehavior, cancellationToken), + _ => throw new DistributedApplicationException($"Unexpected wait type: {waitAnnotation.WaitType}") + }; + pendingDependencies.Add(pendingDependency); } - var pendingDependency = waitAnnotation.WaitType switch - { - WaitType.WaitUntilHealthy => WaitUntilHealthyAsync(resource, waitAnnotation.Resource, waitAnnotation.WaitBehavior ?? DefaultWaitBehavior, cancellationToken), - WaitType.WaitForCompletion => WaitUntilCompletionAsync(resource, waitAnnotation.Resource, waitAnnotation.ExitCode, cancellationToken), - WaitType.WaitUntilStarted => WaitUntilStartedAsync(resource, waitAnnotation.Resource, waitAnnotation.WaitBehavior ?? DefaultWaitBehavior, cancellationToken), - _ => throw new DistributedApplicationException($"Unexpected wait type: {waitAnnotation.WaitType}") - }; - pendingDependencies.Add(pendingDependency); + await Task.WhenAll(pendingDependencies).ConfigureAwait(false); + } + catch (Exception ex) + { + activity.SetError(ex); + throw; } - - await Task.WhenAll(pendingDependencies).ConfigureAwait(false); } /// @@ -485,29 +541,43 @@ public async Task WaitForResourceAsync(string resourceName, Func< return resourceEvent; } - private async Task WaitForResourceCoreAsync(string resourceName, Func predicate, string cancellationMessage, CancellationToken cancellationToken = default) + private async Task WaitForResourceCoreAsync(string resourceName, Func predicate, string cancellationMessage, CancellationToken cancellationToken = default, string waitCondition = "predicate") { + // Waits can run under non-profiling activities; don't attach high-cardinality + // resource wait tags/events unless profiling was explicitly enabled. + var activity = ProfilingTelemetry.CurrentActivity(Configuration); + activity.SetResourceWaitTarget(resourceName, waitCondition); + try { using var watchCts = CancellationTokenSource.CreateLinkedTokenSource(_disposing.Token, cancellationToken); var watchToken = watchCts.Token; await foreach (var resourceEvent in WatchAsync(watchToken).ConfigureAwait(false)) { - if (string.Equals(resourceName, resourceEvent.Resource.Name, StringComparisons.ResourceName) && predicate(resourceEvent)) + if (!string.Equals(resourceName, resourceEvent.Resource.Name, StringComparisons.ResourceName)) + { + continue; + } + + activity.AddResourceWaitObserved(resourceEvent, waitCondition); + + if (predicate(resourceEvent)) { + activity.AddResourceWaitCompleted(resourceEvent, waitCondition); return resourceEvent; } } } catch (OperationCanceledException ex) { + activity.AddResourceWaitCancelled(resourceName, waitCondition); + var errorMessage = BuildCancellationErrorMessage(cancellationMessage, resourceName); throw new OperationCanceledException(errorMessage, ex, ex.CancellationToken); } throw new OperationCanceledException(BuildCancellationErrorMessage(cancellationMessage, resourceName)); } - private readonly object _onResourceUpdatedLock = new(); /// diff --git a/src/Aspire.Hosting/Aspire.Hosting.csproj b/src/Aspire.Hosting/Aspire.Hosting.csproj index adc80988fc4..8d87bfa7347 100644 --- a/src/Aspire.Hosting/Aspire.Hosting.csproj +++ b/src/Aspire.Hosting/Aspire.Hosting.csproj @@ -65,6 +65,8 @@ + + diff --git a/src/Aspire.Hosting/Dcp/DcpExecutor.cs b/src/Aspire.Hosting/Dcp/DcpExecutor.cs index e624e39759c..c263900cb98 100644 --- a/src/Aspire.Hosting/Dcp/DcpExecutor.cs +++ b/src/Aspire.Hosting/Dcp/DcpExecutor.cs @@ -12,6 +12,7 @@ using System.Text.RegularExpressions; using Aspire.Dashboard.Model; using Aspire.Hosting.ApplicationModel; +using Aspire.Hosting.Diagnostics; using Aspire.Hosting.Dcp.Model; using Aspire.Hosting.Eventing; using Aspire.Hosting.Utils; @@ -112,7 +113,7 @@ public DcpExecutor(ILogger logger, _executionContext = executionContext; _appResources = appResources; - _resourceWatcher = new DcpResourceWatcher(logger, kubernetesService, loggerService, executorEvents, model, _appResources, _shutdownCancellation.Token); + _resourceWatcher = new DcpResourceWatcher(logger, kubernetesService, loggerService, executorEvents, model, _appResources, _configuration, _shutdownCancellation.Token); DeleteResourceRetryPipeline = DcpPipelineBuilder.BuildDeleteRetryPipeline(logger); @@ -126,6 +127,8 @@ public DcpExecutor(ILogger logger, public async Task RunApplicationAsync(CancellationToken ct = default) { + using var activity = ProfilingTelemetry.StartDcpRunApplication(_configuration, _model.Resources.Count); + _dcpInfo = await _dcpDependencyCheckService.GetDcpInfoAsync(cancellationToken: ct).ConfigureAwait(false); Debug.Assert(_dcpInfo is not null, "DCP info should not be null at this point"); @@ -147,16 +150,40 @@ public async Task RunApplicationAsync(CancellationToken ct = default) AspireEventSource.Instance.DcpServiceObjectPreparationStart(); try { - PrepareServices(); + using var prepareServicesActivity = ProfilingTelemetry.StartDcpPrepareServices(_configuration); + try + { + PrepareServices(); + } + catch (Exception ex) + { + prepareServicesActivity.SetError(ex); + throw; + } } finally { AspireEventSource.Instance.DcpServiceObjectPreparationStop(); } - var containers = _containerCreator.PrepareObjects().ToArray(); - _containerCreator.PrepareContainerExecutables(); - var executables = _executableCreator.PrepareObjects().ToArray(); + RenderedModelResource[] containers; + RenderedModelResource[] executables; + using (var prepareResourcesActivity = ProfilingTelemetry.StartDcpPrepareResources(_configuration)) + { + try + { + containers = _containerCreator.PrepareObjects().ToArray(); + _containerCreator.PrepareContainerExecutables(); + executables = _executableCreator.PrepareObjects().ToArray(); + + prepareResourcesActivity.SetDcpPreparedResourceCounts(containers.Length, executables.Length); + } + catch (Exception ex) + { + prepareResourcesActivity.SetError(ex); + throw; + } + } await _executorEvents.PublishAsync(new OnResourcesPreparedContext(ct)).ConfigureAwait(false); @@ -227,6 +254,7 @@ Task createTunnelFunc(ContainerCreationContext cctx) => Task.Run(async () => } catch (Exception ex) { + activity.SetError(ex); _shutdownCancellation.Cancel(); _containerContextSource.TrySetException(ex); throw; @@ -455,6 +483,7 @@ private async Task UpdateWithEffectiveAddressInfo(IEnumerable services, var createServicePipeline = DcpPipelineBuilder.BuildObjectWatchRetryPipeline(_options.Value, _logger, timeout); var initialServiceCount = needAddressAllocated.Length; HashSet stillPending = [.. needAddressAllocated.Select(s => s.Metadata.Name)]; + using var activity = ProfilingTelemetry.StartDcpAllocateServiceAddresses(_configuration, initialServiceCount); try { @@ -470,6 +499,7 @@ private async Task UpdateWithEffectiveAddressInfo(IEnumerable services, } original.ApplyAddressInfoFrom(observed); + activity.AddDcpServiceAddressAllocated(original.Metadata.Name); AspireEventSource.Instance.DcpServiceAddressAllocated(original.Metadata.Name); return true; }, @@ -482,6 +512,7 @@ private async Task UpdateWithEffectiveAddressInfo(IEnumerable services, if (stillPending.Contains(sar.Metadata.Name)) { _distributedApplicationLogger.LogWarning("Unable to allocate a network port for service '{ServiceName}'; service may be unreachable and its clients may not work properly.", sar.Metadata.Name); + activity.AddDcpServiceAddressAllocationFailed(sar.Metadata.Name); AspireEventSource.Instance.DcpServiceAddressAllocationFailed(sar.Metadata.Name); } } @@ -509,9 +540,15 @@ private async Task UpdateWithEffectiveAddressInfo(IEnumerable services, cs.Service!.Status!.EffectiveAddress = ContainerHostName; } } + catch (Exception ex) + { + activity.SetError(ex); + throw; + } finally { AspireEventSource.Instance.DcpServiceAddressAllocationStop(initialServiceCount - stillPending.Count); + activity.SetDcpServiceAllocatedCount(initialServiceCount - stillPending.Count); } } @@ -580,6 +617,8 @@ private async Task CreateDcpObjectsAsync(IEnumerable objects, Cancellati return; } + using var activity = ProfilingTelemetry.StartDcpCreateObjects(_configuration, RT.ObjectKind, toCreate.Length); + AspireEventSource.Instance.DcpObjectSetCreationStart(RT.ObjectKind, toCreate.Length); try { @@ -590,8 +629,18 @@ private async Task CreateDcpObjectsAsync(IEnumerable objects, Cancellati { try { + using var createObjectActivity = ProfilingTelemetry.StartDcpCreateObject(_configuration, rtc.Kind, rtc.Metadata.Name); + createObjectActivity.AnnotateTraceContext(rtc.Annotate); AspireEventSource.Instance.DcpObjectCreationStart(rtc.Kind, rtc.Metadata.Name); - await _kubernetesService.CreateAsync(rtc, cancellationToken).ConfigureAwait(false); + try + { + await _kubernetesService.CreateAsync(rtc, cancellationToken).ConfigureAwait(false); + } + catch (Exception ex) + { + createObjectActivity.SetError(ex); + throw; + } } finally { @@ -604,10 +653,16 @@ private async Task CreateDcpObjectsAsync(IEnumerable objects, Cancellati } catch (OperationCanceledException ex) { + activity.SetError(ex); // We catch and suppress the OperationCancelledException because the user may CTRL-C // during start up of the resources. _logger.LogDebug(ex, "Cancellation during creation of resources."); } + catch (Exception ex) + { + activity.SetError(ex); + throw; + } finally { AspireEventSource.Instance.DcpObjectSetCreationStop(RT.ObjectKind, toCreate.Length); @@ -868,6 +923,7 @@ public async Task CreateRenderedResourcesAsync( var resourceKind = allResourceKinds.First(); var tasks = new List(); + using var activity = ProfilingTelemetry.StartDcpCreateRenderedResources(_configuration, resourceKind, allResources.Length); try { @@ -881,6 +937,11 @@ public async Task CreateRenderedResourcesAsync( await Task.WhenAll(tasks).WaitAsync(cancellationToken).ConfigureAwait(false); } + catch (Exception ex) + { + activity.SetError(ex); + throw; + } finally { AspireEventSource.Instance.DcpObjectSetCreationStop(resourceKind, allResources.Length); @@ -903,6 +964,7 @@ private async Task CreateResourceReplicasAsync( var resourceType = GetResourceType(replicaResources.First().DcpResource, modelResource); Debug.Assert(replicaResources.Any()); var replicas = replicaResources.ToArray(); + using var activity = ProfilingTelemetry.StartResourceCreate(_configuration, modelResource, resourceType, replicas.Length); try { @@ -956,11 +1018,17 @@ await _executorEvents.PublishAsync(new OnResourceChangedContext( { try { + using var replicaActivity = ProfilingTelemetry.StartDcpCreateResourceReplica(_configuration, er.ModelResource, er.DcpResourceKind, er.DcpResourceName); AspireEventSource.Instance.DcpObjectCreationStart(er.DcpResourceKind, er.DcpResourceName); try { await creator.CreateObjectAsync(er, context, resourceLogger, this, cancellationToken).ConfigureAwait(false); } + catch (Exception ex) + { + replicaActivity.SetError(ex); + throw; + } finally { AspireEventSource.Instance.DcpObjectCreationStop(er.DcpResourceKind, er.DcpResourceName); @@ -983,6 +1051,7 @@ await _executorEvents.PublishAsync(new OnResourceChangedContext( } catch (Exception ex) { + activity.SetError(ex); resourceLogger.LogError(ex, "Failed to create resource {ResourceName}", modelResource.Name); await _executorEvents.PublishAsync(new OnResourceFailedToStartContext(cancellationToken, resourceType, modelResource, DcpResourceName: null)).ConfigureAwait(false); } @@ -1066,6 +1135,7 @@ public async Task StopResourceAsync(IResourceReference resourceReference, Cancel var appResource = (IAppResource)resourceReference; bool stopped = false; + using var activity = ProfilingTelemetry.StartResourceStop(_configuration, resourceReference.ModelResource, appResource.DcpResourceKind, appResource.DcpResourceName); AspireEventSource.Instance.StopResourceStart(appResource.DcpResourceKind, appResource.DcpResourceName); try { @@ -1113,8 +1183,14 @@ public async Task StopResourceAsync(IResourceReference resourceReference, Cancel } }, resourceReference.DcpResourceName, cancellationToken).ConfigureAwait(false); } + catch (Exception ex) + { + activity.SetError(ex); + throw; + } finally { + activity.SetResourceStopped(stopped); AspireEventSource.Instance.StopResourceStop(appResource.DcpResourceKind, appResource.DcpResourceName); } @@ -1129,6 +1205,7 @@ public async Task StartResourceAsync(IResourceReference resourceReference, Cance var appResource = (IAppResource)resourceReference; var resourceType = GetResourceType(appResource.DcpResource, resourceReference.ModelResource); var resourceLogger = _loggerService.GetLogger(resourceReference.DcpResourceName); + using var activity = ProfilingTelemetry.StartResourceStart(_configuration, resourceReference.ModelResource, appResource.DcpResourceKind, appResource.DcpResourceName, resourceType); AspireEventSource.Instance.StartResourceStart(appResource.DcpResourceKind, appResource.DcpResourceName); try @@ -1181,6 +1258,7 @@ public async Task StartResourceAsync(IResourceReference resourceReference, Cance } catch (Exception ex) { + activity.SetError(ex); _logger.LogError(ex, "Failed to start resource {ResourceName}", resourceReference.ModelResource.Name); await _executorEvents.PublishAsync(new OnResourceFailedToStartContext(cancellationToken, resourceType, resourceReference.ModelResource, resourceReference.DcpResourceName)).ConfigureAwait(false); throw; diff --git a/src/Aspire.Hosting/Dcp/DcpResourceWatcher.cs b/src/Aspire.Hosting/Dcp/DcpResourceWatcher.cs index 6554bb6a06a..ed5c15efbb4 100644 --- a/src/Aspire.Hosting/Dcp/DcpResourceWatcher.cs +++ b/src/Aspire.Hosting/Dcp/DcpResourceWatcher.cs @@ -8,10 +8,12 @@ using System.Threading.Channels; using Aspire.Hosting.ApplicationModel; using Aspire.Hosting.Dashboard; +using Aspire.Hosting.Diagnostics; using Aspire.Hosting.Dcp.Model; using Aspire.Shared.ConsoleLogs; using k8s; using k8s.Autorest; +using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Logging; using Polly; @@ -27,6 +29,7 @@ internal sealed class DcpResourceWatcher : IConsoleLogsService, IAsyncDisposable private readonly ResourceLoggerService _loggerService; private readonly DcpExecutorEvents _executorEvents; private readonly ILogger _logger; + private readonly IConfiguration _configuration; private readonly CancellationToken _shutdownToken; private readonly DcpResourceState _resourceState; @@ -51,12 +54,14 @@ public DcpResourceWatcher( DcpExecutorEvents executorEvents, DistributedApplicationModel model, DcpAppResourceStore appResources, + IConfiguration configuration, CancellationToken shutdownToken) { _kubernetesService = kubernetesService; _loggerService = loggerService; _executorEvents = executorEvents; _logger = logger; + _configuration = configuration; _shutdownToken = shutdownToken; _resourceState = new(model.Resources.ToDictionary(r => r.Name), appResources.Get()); @@ -262,6 +267,7 @@ private async Task ProcessResourceChange(WatchEventType watchEventType, T res var resourceType = DcpExecutor.GetResourceType(resource, appModelResource); var status = GetResourceStatus(resource); + AddDcpResourceObservedEvent(resource, appModelResource, resourceKind, status); await _executorEvents.PublishAsync(new OnResourceChangedContext(_shutdownToken, resourceType, appModelResource, resource.Metadata.Name, status, s => snapshotFactory(resource, s))).ConfigureAwait(false); if (resource is Container { LogsAvailable: true } || @@ -326,6 +332,19 @@ internal static ResourceStatus GetResourceStatus(CustomResource resource) return new(null, null, null); } + private void AddDcpResourceObservedEvent(CustomResource resource, IResource appModelResource, string resourceKind, ResourceStatus status) + { + using var activity = ProfilingTelemetry.StartDcpResourceObserved( + _configuration, + appModelResource, + resourceKind, + resource.Metadata.Name, + status.State, + status.StartupTimestamp, + status.FinishedTimestamp, + resource.Metadata.Annotations); + } + public async IAsyncEnumerable> GetAllLogsAsync(string resourceName, [EnumeratorCancellation] CancellationToken cancellationToken) { IAsyncEnumerable>? enumerable = null; diff --git a/src/Aspire.Hosting/Dcp/KubernetesService.cs b/src/Aspire.Hosting/Dcp/KubernetesService.cs index d5472421291..20d36c2c7e3 100644 --- a/src/Aspire.Hosting/Dcp/KubernetesService.cs +++ b/src/Aspire.Hosting/Dcp/KubernetesService.cs @@ -4,12 +4,14 @@ using System.Diagnostics; using System.Globalization; using System.Runtime.CompilerServices; +using Aspire.Hosting.Diagnostics; using Aspire.Hosting.Dcp.Model; using Aspire.Hosting.Utils; using k8s; using k8s.Autorest; using k8s.Exceptions; using k8s.Models; +using Microsoft.Extensions.Configuration; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; using Polly; @@ -76,7 +78,7 @@ Task GetLogStreamAsync( Task CleanupResourcesAsync(CancellationToken cancellationToken = default); } -internal sealed class KubernetesService(ILogger logger, IOptions dcpOptions, Locations locations) : IKubernetesService, IDisposable +internal sealed class KubernetesService(ILogger logger, IOptions dcpOptions, Locations locations, IConfiguration configuration) : IKubernetesService, IDisposable { // A pseudo-resource type used for log operations on the DCP execution document. private const string DcpExecutionResourceType = "DcpExecution"; @@ -473,7 +475,10 @@ private async Task ExecuteWithRetry( var delay = s_initialRetryDelay; AspireEventSource.Instance.DcpApiCallStart(operationType, resourceType); - var resiliencePipeline = CreateKubernetesCallResiliencePipeline(operationType, resourceType, isRetryable); + using var activity = ProfilingTelemetry.StartDcpKubernetesApi(configuration, operationType, resourceType); + var retryCount = 0; + + var resiliencePipeline = CreateKubernetesCallResiliencePipeline(operationType, resourceType, isRetryable, activity, () => retryCount++); try { @@ -485,6 +490,7 @@ private async Task ExecuteWithRetry( } finally { + activity.SetDcpApiRetryCount(retryCount); AspireEventSource.Instance.DcpApiCallStop(operationType, resourceType); } } @@ -498,7 +504,9 @@ ex is KubeConfigException || private ResiliencePipeline CreateKubernetesCallResiliencePipeline( DcpApiOperationType operationType, string resourceType, - Func isRetryable) + Func isRetryable, + ProfilingTelemetry.ActivityScope activity, + Action recordRetry) { var resiliencePipeline = new ResiliencePipelineBuilder() .AddTimeout(new TimeoutStrategyOptions @@ -506,6 +514,7 @@ private ResiliencePipeline CreateKubernetesCallResiliencePipeline( Timeout = MaxRetryDuration, OnTimeout = (_) => { + activity.AddKubernetesApiTimeout(); AspireEventSource.Instance.DcpApiCallTimeout(operationType, resourceType); return ValueTask.CompletedTask; } @@ -519,6 +528,8 @@ private ResiliencePipeline CreateKubernetesCallResiliencePipeline( MaxDelay = TimeSpan.FromSeconds(5), OnRetry = (retry) => { + recordRetry(); + activity.AddKubernetesApiRetry(retry.AttemptNumber, retry.RetryDelay, retry.Outcome.Exception); AspireEventSource.Instance.DcpApiCallRetry(operationType, resourceType); return ValueTask.CompletedTask; } @@ -566,13 +577,20 @@ private async Task EnsureKubernetesAsync(CancellationToken cancellationToken = d return; } + using var activity = ProfilingTelemetry.StartDcpEnsureKubernetesClient(configuration, File.Exists(locations.DcpKubeconfigPath)); + + var lockWaitStopwatch = Stopwatch.StartNew(); await _kubeconfigReadSemaphore.WaitAsync(-1, cancellationToken).ConfigureAwait(false); + lockWaitStopwatch.Stop(); + activity.SetDcpKubeconfigLockWait(lockWaitStopwatch.ElapsedMilliseconds); + activity.AddKubeconfigLockAcquired(); try { // Second chance shortcut if multiple threads got caught. if (_kubernetes != null) { + activity.SetDcpKubernetesClientAlreadyInitialized(); return; } @@ -600,9 +618,12 @@ private async Task EnsureKubernetesAsync(CancellationToken cancellationToken = d locations.DcpKubeconfigPath, readStopwatch.ElapsedMilliseconds ); + activity.SetDcpKubeconfigReadDuration(readStopwatch.ElapsedMilliseconds); + activity.AddKubeconfigReadComplete(); return new DcpKubernetesClient(config); }, cancellationToken).ConfigureAwait(false); + activity.AddKubernetesClientCreated(); } finally { diff --git a/src/Aspire.Hosting/Diagnostics/ProfilingTelemetry.cs b/src/Aspire.Hosting/Diagnostics/ProfilingTelemetry.cs new file mode 100644 index 00000000000..2e827dc8796 --- /dev/null +++ b/src/Aspire.Hosting/Diagnostics/ProfilingTelemetry.cs @@ -0,0 +1,641 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; +using System.Globalization; +using Aspire.Hosting.ApplicationModel; +using Aspire.Hosting.Dcp; +using Microsoft.Extensions.Configuration; + +namespace Aspire.Hosting.Diagnostics; + +internal static class ProfilingTelemetry +{ + public const string ActivitySourceName = "Aspire.Hosting.Profiling"; + + internal static class Activities + { + // Activity names describe AppHost/DCP orchestration work. Keep names stable + // because profiling exports are queried across CLI and AppHost versions. + public const string DcpRunApplication = "aspire.hosting.dcp.run_application"; + public const string DcpPrepareServices = "aspire.hosting.dcp.prepare_services"; + public const string DcpPrepareResources = "aspire.hosting.dcp.prepare_resources"; + public const string DcpAllocateServiceAddresses = "aspire.hosting.dcp.allocate_service_addresses"; + public const string DcpCreateObjects = "aspire.hosting.dcp.create_objects"; + public const string DcpCreateObject = "aspire.hosting.dcp.create_object"; + public const string DcpCreateRenderedResources = "aspire.hosting.dcp.create_rendered_resources"; + public const string ResourceCreate = "aspire.hosting.resource.create"; + public const string DcpCreateResourceReplica = "aspire.hosting.dcp.create_resource_replica"; + public const string DcpKubernetesApi = "aspire.hosting.dcp.kubernetes_api"; + public const string DcpEnsureKubernetesClient = "aspire.hosting.dcp.ensure_kubernetes_client"; + public const string DcpResourceObserved = "aspire.hosting.dcp.resource_observed"; + public const string ResourceBeforeStartWait = "aspire.hosting.resource.before_start_wait"; + public const string ResourceWaitForDependency = "aspire.hosting.resource.wait_for_dependency"; + public const string ResourceWaitForDependencies = "aspire.hosting.resource.wait_for_dependencies"; + public const string ResourceStop = "aspire.hosting.resource.stop"; + public const string ResourceStart = "aspire.hosting.resource.start"; + } + + internal static class Tags + { + // Tags capture dimensions and diagnostics for spans/events, such as resource + // identity, DCP object identity, wait conditions, exit codes, and timing data. + public const string ProfilingSessionId = "aspire.profiling.session_id"; + public const string LegacyStartupOperationId = "aspire.startup.operation_id"; + public const string AppHostName = "aspire.apphost.name"; + public const string AppHostOperation = "aspire.apphost.operation"; + public const string ResourceName = "aspire.resource.name"; + public const string ResourceId = "aspire.resource.id"; + public const string ResourceType = "aspire.resource.type"; + public const string ResourceKind = "aspire.resource.kind"; + public const string ResourceCount = "aspire.resource.count"; + public const string ResourceReplicaCount = "aspire.resource.replica_count"; + public const string ResourceStopped = "aspire.resource.stopped"; + public const string ResourceState = "aspire.resource.state"; + public const string ResourceHealthStatus = "aspire.resource.health_status"; + public const string ResourceExitCode = "aspire.resource.exit_code"; + public const string ResourceReady = "aspire.resource.ready"; + public const string ResourceSnapshotVersion = "aspire.resource.snapshot.version"; + public const string ResourceStartTime = "aspire.resource.start_time"; + public const string ResourceStopTime = "aspire.resource.stop_time"; + public const string ResourceWaitExpectedExitCode = "aspire.resource.wait.expected_exit_code"; + public const string ResourceWaitDependencyCount = "aspire.resource.wait.dependency_count"; + public const string ResourceWaitType = "aspire.resource.wait.type"; + public const string ResourceWaitDependencyName = "aspire.resource.wait.dependency.name"; + public const string ResourceWaitDependencyType = "aspire.resource.wait.dependency.type"; + public const string ResourceWaitBehavior = "aspire.resource.wait.behavior"; + public const string ResourceWaitTargetName = "aspire.resource.wait.target.name"; + public const string ResourceWaitCondition = "aspire.resource.wait.condition"; + public const string DcpResourceName = "aspire.dcp.resource.name"; + public const string DcpResourceKind = "aspire.dcp.resource.kind"; + public const string DcpResourceCount = "aspire.dcp.resource.count"; + public const string DcpContainerCount = "aspire.dcp.container.count"; + public const string DcpExecutableCount = "aspire.dcp.executable.count"; + public const string DcpServiceCount = "aspire.dcp.service.count"; + public const string DcpServiceAllocatedCount = "aspire.dcp.service.allocated_count"; + public const string DcpServiceName = "aspire.dcp.service.name"; + public const string DcpApiOperation = "aspire.dcp.api.operation"; + public const string DcpApiRetryCount = "aspire.dcp.api.retry_count"; + public const string DcpApiRetryAttempt = "aspire.dcp.api.retry_attempt"; + public const string DcpApiRetryDelayMilliseconds = "aspire.dcp.api.retry_delay_ms"; + public const string DcpKubeconfigExists = "aspire.dcp.kubeconfig.exists"; + public const string DcpKubeconfigLockWaitMilliseconds = "aspire.dcp.kubeconfig.lock_wait_ms"; + public const string DcpKubeconfigReadDurationMilliseconds = "aspire.dcp.kubeconfig.read_duration_ms"; + public const string DcpKubernetesClientAlreadyInitialized = "aspire.dcp.kubernetes_client_already_initialized"; + public const string DcpCreateObjectId = "aspire.hosting.dcp.create_object.id"; + public const string DcpCreateObjectKind = "aspire.hosting.dcp.create_object.kind"; + public const string DcpCreateObjectName = "aspire.hosting.dcp.create_object.name"; + public const string DcpCreateObjectTraceId = "aspire.hosting.dcp.create_object.trace_id"; + public const string DcpCreateObjectSpanId = "aspire.hosting.dcp.create_object.span_id"; + public const string ExceptionType = "exception.type"; + public const string ExceptionMessage = "exception.message"; + } + + internal static class Events + { + // Events mark important moments within longer spans, for example retries, + // readiness observations, resource wait completions, and exception details. + public const string DcpServiceAddressAllocated = "aspire.dcp.service_address_allocated"; + public const string DcpServiceAddressAllocationFailed = "aspire.dcp.service_address_allocation_failed"; + public const string KubernetesApiTimeout = "aspire.hosting.dcp.kubernetes_api.timeout"; + public const string KubernetesApiRetry = "aspire.hosting.dcp.kubernetes_api.retry"; + public const string KubeconfigLockAcquired = "aspire.hosting.dcp.kubeconfig_lock_acquired"; + public const string KubeconfigReadComplete = "aspire.hosting.dcp.kubeconfig_read_complete"; + public const string KubernetesClientCreated = "aspire.hosting.dcp.kubernetes_client_created"; + public const string ResourceWaitObserved = "aspire.resource.wait.observed"; + public const string ResourceWaitCompleted = "aspire.resource.wait.completed"; + public const string ResourceWaitCancelled = "aspire.resource.wait.cancelled"; + public const string Exception = "exception"; + } + + internal static class Annotations + { + // DCP annotations carry profiling trace context through rendered resources so + // later watch/reconcile notifications can reconnect to the resource creation span. + public const string ProfilingSessionId = "aspire-profiling-session-id"; + public const string TraceParent = "aspire-profiling-traceparent"; + public const string TraceState = "aspire-profiling-tracestate"; + public const string LegacyStartupOperationId = "aspire-startup-operation-id"; + public const string LegacyStartupTraceParent = "aspire-startup-traceparent"; + public const string LegacyStartupTraceState = "aspire-startup-tracestate"; + } + + private static readonly ActivitySource s_activitySource = new(ActivitySourceName); + + public static ActivityScope CurrentActivity(IConfiguration? configuration) => + IsEnabled(configuration) ? new(Activity.Current, configuration, ownsActivity: false) : default; + + public static IEnumerable> CreateAppHostResourceAttributes(string appHostPath, string operation) + { + return + [ + new(Tags.AppHostName, Path.GetFileName(appHostPath)), + new(Tags.AppHostOperation, operation) + ]; + } + + public static ActivityScope StartDcpRunApplication(IConfiguration? configuration, int resourceCount) + { + var activity = StartActivity(configuration, Activities.DcpRunApplication); + activity.SetResourceCount(resourceCount); + return activity; + } + + public static ActivityScope StartDcpPrepareServices(IConfiguration? configuration) + { + return StartActivity(configuration, Activities.DcpPrepareServices); + } + + public static ActivityScope StartDcpPrepareResources(IConfiguration? configuration) + { + return StartActivity(configuration, Activities.DcpPrepareResources); + } + + public static ActivityScope StartDcpAllocateServiceAddresses(IConfiguration? configuration, int serviceCount) + { + var activity = StartActivity(configuration, Activities.DcpAllocateServiceAddresses); + activity.SetDcpServiceCount(serviceCount); + return activity; + } + + public static ActivityScope StartDcpCreateObjects(IConfiguration? configuration, string resourceKind, int resourceCount) + { + var activity = StartActivity(configuration, Activities.DcpCreateObjects); + activity.SetDcpResourceSet(resourceKind, resourceCount); + return activity; + } + + public static ActivityScope StartDcpCreateObject(IConfiguration? configuration, string resourceKind, string resourceName) + { + var activity = StartActivity(configuration, Activities.DcpCreateObject); + activity.SetDcpResource(resourceKind, resourceName); + activity.SetDcpCreateObject(resourceKind, resourceName); + return activity; + } + + public static ActivityScope StartDcpCreateRenderedResources(IConfiguration? configuration, string resourceKind, int resourceCount) + { + var activity = StartActivity(configuration, Activities.DcpCreateRenderedResources); + activity.SetDcpResourceSet(resourceKind, resourceCount); + return activity; + } + + public static ActivityScope StartDcpCreateResourceReplica(IConfiguration? configuration, IResource resource, string resourceKind, string resourceName) + { + var activity = StartActivity(configuration, Activities.DcpCreateResourceReplica); + activity.SetResource(resource); + activity.SetDcpResource(resourceKind, resourceName); + return activity; + } + + public static ActivityScope StartDcpEnsureKubernetesClient(IConfiguration? configuration, bool kubeconfigExists) + { + var activity = StartActivity(configuration, Activities.DcpEnsureKubernetesClient); + activity.SetDcpKubeconfigExists(kubeconfigExists); + return activity; + } + + public static ActivityScope StartDcpKubernetesApi(IConfiguration? configuration, DcpApiOperationType operationType, string resourceType) + { + var activity = StartActivity(configuration, Activities.DcpKubernetesApi); + activity.SetDcpKubernetesApi(operationType, resourceType); + return activity; + } + + public static ActivityScope StartDcpResourceObserved( + IConfiguration? configuration, + IResource appModelResource, + string resourceKind, + string resourceName, + string? state, + DateTime? startupTimestamp, + DateTime? finishedTimestamp, + IDictionary? annotations) + { + // Resource observations arrive from DCP watch notifications after the create-object span has ended, + // so use a short child activity from the annotated trace context instead of an event on Activity.Current. + var activity = StartActivityFromTraceAnnotations(configuration, Activities.DcpResourceObserved, annotations); + activity.SetResource(appModelResource); + activity.SetDcpResource(resourceKind, resourceName); + activity.SetDcpCreateObjectFromTraceAnnotations(resourceKind, resourceName, annotations); + activity.SetResourceObserved(state, startupTimestamp, finishedTimestamp); + return activity; + } + + public static ActivityScope StartResourceBeforeStartWait(IConfiguration? configuration, IResource resource) + { + var activity = StartActivity(configuration, Activities.ResourceBeforeStartWait); + activity.SetResource(resource); + return activity; + } + + public static ActivityScope StartResourceCreate(IConfiguration? configuration, IResource resource, string resourceKind, int replicaCount) + { + var activity = StartActivity(configuration, Activities.ResourceCreate); + activity.SetResource(resource); + activity.SetResourceCreate(resourceKind, replicaCount); + return activity; + } + + public static ActivityScope StartResourceStart(IConfiguration? configuration, IResource resource, string resourceKind, string resourceName, string resourceType) + { + var activity = StartActivity(configuration, Activities.ResourceStart); + activity.SetResource(resource); + activity.SetDcpResource(resourceKind, resourceName); + activity.SetResourceKind(resourceType); + return activity; + } + + public static ActivityScope StartResourceStop(IConfiguration? configuration, IResource resource, string resourceKind, string resourceName) + { + var activity = StartActivity(configuration, Activities.ResourceStop); + activity.SetResource(resource); + activity.SetDcpResource(resourceKind, resourceName); + return activity; + } + + public static ActivityScope StartResourceWaitForDependencies(IConfiguration? configuration, IResource resource, int dependencyCount) + { + var activity = StartActivity(configuration, Activities.ResourceWaitForDependencies); + activity.SetResource(resource); + activity.SetResourceWaitDependencyCount(dependencyCount); + return activity; + } + + public static ActivityScope StartResourceWaitForDependency(IConfiguration? configuration, IResource resource, IResource dependency, WaitType waitType, WaitBehavior? waitBehavior) + { + var activity = StartActivity(configuration, Activities.ResourceWaitForDependency); + activity.SetDependencyWait(resource, dependency, waitType, waitBehavior); + return activity; + } + + private static ActivityScope StartActivity(IConfiguration? configuration, string name) + { + if (!IsEnabled(configuration)) + { + return default; + } + + var activity = Activity.Current is null && TryGetProfilingParentContext(configuration, out var parentContext) + ? s_activitySource.StartActivity(name, ActivityKind.Internal, parentContext) + : s_activitySource.StartActivity(name, ActivityKind.Internal); + + AddProfilingSessionId(activity, configuration); + return new ActivityScope(activity, configuration); + } + + private static ActivityScope StartActivityFromTraceAnnotations(IConfiguration? configuration, string name, IDictionary? annotations) + { + if (!IsEnabled(configuration)) + { + return default; + } + + Activity? activity = null; + if (annotations is not null && + TryGetAnnotation(annotations, Annotations.TraceParent, Annotations.LegacyStartupTraceParent, out var traceParent)) + { + // DCP annotations carry the create_object trace context to later watch/reconcile spans. + TryGetAnnotation(annotations, Annotations.TraceState, Annotations.LegacyStartupTraceState, out var traceState); + if (ActivityContext.TryParse(traceParent, traceState, out var parentContext)) + { + activity = s_activitySource.StartActivity(name, ActivityKind.Internal, parentContext); + } + } + + if (activity is null) + { + return StartActivity(configuration, name); + } + + AddProfilingSessionId(activity, configuration, annotations); + + return new ActivityScope(activity, configuration); + } + + private static void SetDcpCreateObjectTags(Activity activity, string resourceKind, string resourceName, string traceId, string spanId) + { + activity.SetTag(Tags.DcpCreateObjectId, $"{resourceKind}/{resourceName}"); + activity.SetTag(Tags.DcpCreateObjectKind, resourceKind); + activity.SetTag(Tags.DcpCreateObjectName, resourceName); + activity.SetTag(Tags.DcpCreateObjectTraceId, traceId); + activity.SetTag(Tags.DcpCreateObjectSpanId, spanId); + } + + private static void AddProfilingSessionId(Activity? activity, IConfiguration? configuration, IDictionary? annotations = null) + { + if (activity is null) + { + return; + } + + var sessionId = annotations is not null && TryGetAnnotation(annotations, Annotations.ProfilingSessionId, Annotations.LegacyStartupOperationId, out var annotationSessionId) + ? annotationSessionId + : GetConfigurationValue(configuration, KnownConfigNames.ProfilingSessionId, KnownConfigNames.Legacy.StartupOperationId); + if (!string.IsNullOrEmpty(sessionId)) + { + activity.SetTag(Tags.ProfilingSessionId, sessionId); + activity.SetTag(Tags.LegacyStartupOperationId, sessionId); + } + } + + private static bool TryGetProfilingParentContext(IConfiguration? configuration, out ActivityContext parentContext) + { + var traceParent = GetConfigurationValue(configuration, KnownConfigNames.ProfilingTraceParent, KnownConfigNames.Legacy.StartupTraceParent); + var traceState = GetConfigurationValue(configuration, KnownConfigNames.ProfilingTraceState, KnownConfigNames.Legacy.StartupTraceState); + if (string.IsNullOrEmpty(traceParent)) + { + parentContext = default; + return false; + } + + return ActivityContext.TryParse(traceParent, traceState, out parentContext); + } + + internal static bool IsEnabled(IConfiguration? configuration) + { + return IsTruthy(configuration?[KnownConfigNames.ProfilingEnabled]) || + IsTruthy(configuration?[KnownConfigNames.Legacy.StartupProfilingEnabled]); + } + + private static bool TryGetAnnotation(IDictionary annotations, string name, string legacyName, out string? value) + { + if (annotations.TryGetValue(name, out value) && !string.IsNullOrEmpty(value)) + { + return true; + } + + return annotations.TryGetValue(legacyName, out value) && !string.IsNullOrEmpty(value); + } + + private static string? GetConfigurationValue(IConfiguration? configuration, string name, string legacyName) + { + return configuration?[name] is { Length: > 0 } value + ? value + : configuration?[legacyName]; + } + + private static bool IsTruthy(string? value) + { + return string.Equals(value, "true", StringComparison.OrdinalIgnoreCase) || value == "1"; + } + + internal readonly struct ActivityScope(Activity? activity, IConfiguration? configuration = null, bool ownsActivity = true) : IDisposable + { + public void AddDcpServiceAddressAllocated(string serviceName) + { + activity?.AddEvent(new ActivityEvent(Events.DcpServiceAddressAllocated, tags: new ActivityTagsCollection + { + [Tags.DcpServiceName] = serviceName + })); + } + + public void AddDcpServiceAddressAllocationFailed(string serviceName) + { + activity?.AddEvent(new ActivityEvent(Events.DcpServiceAddressAllocationFailed, tags: new ActivityTagsCollection + { + [Tags.DcpServiceName] = serviceName + })); + } + + public void AddKubeconfigLockAcquired() => AddEvent(Events.KubeconfigLockAcquired); + + public void AddKubeconfigReadComplete() => AddEvent(Events.KubeconfigReadComplete); + + public void AddKubernetesApiRetry(int attemptNumber, TimeSpan retryDelay, Exception? exception) + { + activity?.AddEvent(new ActivityEvent(Events.KubernetesApiRetry, tags: new ActivityTagsCollection + { + [Tags.DcpApiRetryAttempt] = attemptNumber, + [Tags.DcpApiRetryDelayMilliseconds] = retryDelay.TotalMilliseconds, + [Tags.ExceptionType] = exception?.GetType().FullName, + [Tags.ExceptionMessage] = exception?.Message + })); + } + + public void AddKubernetesApiTimeout() => AddEvent(Events.KubernetesApiTimeout); + + public void AddKubernetesClientCreated() => AddEvent(Events.KubernetesClientCreated); + + public void AddResourceWaitCancelled(string resourceName, string waitCondition) + { + activity?.AddEvent(new ActivityEvent(Events.ResourceWaitCancelled, tags: new ActivityTagsCollection + { + [Tags.ResourceWaitTargetName] = resourceName, + [Tags.ResourceWaitCondition] = waitCondition + })); + } + + public void AddResourceWaitCompleted(ResourceEvent resourceEvent, string waitCondition) => + AddResourceWaitEvent(Events.ResourceWaitCompleted, resourceEvent, waitCondition); + + public void AddResourceWaitObserved(ResourceEvent resourceEvent, string waitCondition) => + AddResourceWaitEvent(Events.ResourceWaitObserved, resourceEvent, waitCondition); + + public void AnnotateTraceContext(Action annotate) + { + if (!IsEnabled(configuration)) + { + return; + } + + var sessionId = GetConfigurationValue(configuration, KnownConfigNames.ProfilingSessionId, KnownConfigNames.Legacy.StartupOperationId); + if (!string.IsNullOrEmpty(sessionId)) + { + annotate(Annotations.ProfilingSessionId, sessionId); + annotate(Annotations.LegacyStartupOperationId, sessionId); + } + + var traceParent = activity?.Id ?? GetConfigurationValue(configuration, KnownConfigNames.ProfilingTraceParent, KnownConfigNames.Legacy.StartupTraceParent); + if (!string.IsNullOrEmpty(traceParent)) + { + annotate(Annotations.TraceParent, traceParent); + annotate(Annotations.LegacyStartupTraceParent, traceParent); + } + + var traceState = activity?.TraceStateString ?? GetConfigurationValue(configuration, KnownConfigNames.ProfilingTraceState, KnownConfigNames.Legacy.StartupTraceState); + if (!string.IsNullOrEmpty(traceState)) + { + annotate(Annotations.TraceState, traceState); + annotate(Annotations.LegacyStartupTraceState, traceState); + } + } + + public void SetDcpCreateObject(string resourceKind, string resourceName) + { + if (activity is null) + { + return; + } + + SetDcpCreateObjectTags(activity, resourceKind, resourceName, activity.TraceId.ToString(), activity.SpanId.ToString()); + } + + public void SetDcpCreateObjectFromTraceAnnotations(string resourceKind, string resourceName, IDictionary? annotations) + { + if (activity is null) + { + return; + } + + if (annotations is not null && + TryGetAnnotation(annotations, Annotations.TraceParent, Annotations.LegacyStartupTraceParent, out var traceParent) && + ActivityContext.TryParse( + traceParent, + TryGetAnnotation(annotations, Annotations.TraceState, Annotations.LegacyStartupTraceState, out var traceState) ? traceState : null, + out var createObjectContext)) + { + SetDcpCreateObjectTags(activity, resourceKind, resourceName, createObjectContext.TraceId.ToString(), createObjectContext.SpanId.ToString()); + } + else + { + SetDcpCreateObject(resourceKind, resourceName); + } + } + + public void SetDcpKubeconfigExists(bool exists) => SetTag(Tags.DcpKubeconfigExists, exists); + + public void SetDcpKubeconfigLockWait(long elapsedMilliseconds) => SetTag(Tags.DcpKubeconfigLockWaitMilliseconds, elapsedMilliseconds); + + public void SetDcpKubeconfigReadDuration(long elapsedMilliseconds) => SetTag(Tags.DcpKubeconfigReadDurationMilliseconds, elapsedMilliseconds); + + public void SetDcpKubernetesApi(DcpApiOperationType operationType, string resourceType) + { + SetTag(Tags.DcpApiOperation, operationType.ToString()); + SetTag(Tags.DcpResourceKind, resourceType); + } + + public void SetDcpKubernetesClientAlreadyInitialized() => SetTag(Tags.DcpKubernetesClientAlreadyInitialized, true); + + public void SetDcpPreparedResourceCounts(int containerCount, int executableCount) + { + SetTag(Tags.DcpContainerCount, containerCount); + SetTag(Tags.DcpExecutableCount, executableCount); + } + + public void SetDcpResource(string resourceKind, string resourceName) + { + SetTag(Tags.DcpResourceKind, resourceKind); + SetTag(Tags.DcpResourceName, resourceName); + } + + public void SetDcpResourceSet(string resourceKind, int resourceCount) + { + SetTag(Tags.DcpResourceKind, resourceKind); + SetTag(Tags.DcpResourceCount, resourceCount); + } + + public void SetDcpServiceAllocatedCount(int count) => SetTag(Tags.DcpServiceAllocatedCount, count); + + public void SetDcpServiceCount(int count) => SetTag(Tags.DcpServiceCount, count); + + public void SetDcpApiRetryCount(int retryCount) => SetTag(Tags.DcpApiRetryCount, retryCount); + + public void SetDependencyWait(IResource resource, IResource dependency, WaitType waitType, WaitBehavior? waitBehavior) + { + SetResource(resource); + SetTag(Tags.ResourceWaitType, waitType.ToString()); + SetTag(Tags.ResourceWaitDependencyName, dependency.Name); + SetTag(Tags.ResourceWaitDependencyType, dependency.GetType().Name); + SetTag(Tags.ResourceWaitBehavior, waitBehavior?.ToString()); + } + + public void SetError(Exception exception) + { + if (activity is null) + { + return; + } + + activity.SetStatus(ActivityStatusCode.Error, exception.Message); + activity.AddEvent(new ActivityEvent(Events.Exception, tags: new ActivityTagsCollection + { + [Tags.ExceptionType] = exception.GetType().FullName, + [Tags.ExceptionMessage] = exception.Message + })); + } + + public void SetResource(IResource resource) + { + SetTag(Tags.ResourceName, resource.Name); + SetTag(Tags.ResourceType, resource.GetType().Name); + } + + public void SetResourceCount(int count) => SetTag(Tags.ResourceCount, count); + + public void SetResourceCreate(string resourceKind, int replicaCount) + { + SetTag(Tags.ResourceKind, resourceKind); + SetTag(Tags.ResourceReplicaCount, replicaCount); + } + + public void SetResourceKind(string resourceKind) => SetTag(Tags.ResourceKind, resourceKind); + + public void SetResourceObserved(string? state, DateTime? startupTimestamp, DateTime? finishedTimestamp) + { + SetTag(Tags.ResourceState, state); + SetTag(Tags.ResourceStartTime, startupTimestamp?.ToString("O", CultureInfo.InvariantCulture)); + SetTag(Tags.ResourceStopTime, finishedTimestamp?.ToString("O", CultureInfo.InvariantCulture)); + } + + public void SetResourceStopped(bool stopped) => SetTag(Tags.ResourceStopped, stopped); + + public void SetResourceWaitDependencyCount(int count) => SetTag(Tags.ResourceWaitDependencyCount, count); + + public void SetResourceWaitExpectedExitCode(int exitCode) => SetTag(Tags.ResourceWaitExpectedExitCode, exitCode); + + public void SetResourceWaitTarget(string resourceName, string waitCondition) + { + SetTag(Tags.ResourceWaitTargetName, resourceName); + SetTag(Tags.ResourceWaitCondition, waitCondition); + } + + public void Dispose() + { + if (ownsActivity) + { + activity?.Dispose(); + } + } + + private void AddEvent(string name) => activity?.AddEvent(new ActivityEvent(name)); + + private void AddResourceWaitEvent(string eventName, ResourceEvent resourceEvent, string waitCondition) + { + if (activity is null) + { + return; + } + + var snapshot = resourceEvent.Snapshot; + var tags = new ActivityTagsCollection + { + [Tags.ResourceName] = resourceEvent.Resource.Name, + [Tags.ResourceId] = resourceEvent.ResourceId, + [Tags.ResourceType] = snapshot.ResourceType, + [Tags.ResourceWaitCondition] = waitCondition, + [Tags.ResourceSnapshotVersion] = snapshot.Version, + [Tags.ResourceReady] = snapshot.ResourceReadyEvent is not null + }; + + if (snapshot.State?.Text is { } state) + { + tags[Tags.ResourceState] = state; + } + + if (snapshot.HealthStatus is { } healthStatus) + { + tags[Tags.ResourceHealthStatus] = healthStatus.ToString(); + } + + if (snapshot.ExitCode is { } exitCode) + { + tags[Tags.ResourceExitCode] = exitCode; + } + + activity.AddEvent(new ActivityEvent(eventName, tags: tags)); + } + + private void SetTag(string key, object? value) => activity?.SetTag(key, value); + } +} diff --git a/src/Aspire.Hosting/DistributedApplicationBuilder.cs b/src/Aspire.Hosting/DistributedApplicationBuilder.cs index 9a27861ea11..a18e96ca491 100644 --- a/src/Aspire.Hosting/DistributedApplicationBuilder.cs +++ b/src/Aspire.Hosting/DistributedApplicationBuilder.cs @@ -20,6 +20,7 @@ using Aspire.Hosting.Dcp; using Aspire.Hosting.Devcontainers; using Aspire.Hosting.Devcontainers.Codespaces; +using Aspire.Hosting.Diagnostics; using Aspire.Hosting.Eventing; using Aspire.Hosting.Exec; using Aspire.Hosting.Health; @@ -39,6 +40,9 @@ using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; +using OpenTelemetry.Exporter; +using OpenTelemetry.Resources; +using OpenTelemetry.Trace; namespace Aspire.Hosting; @@ -489,6 +493,8 @@ public DistributedApplicationBuilder(DistributedApplicationOptions options) _innerBuilder.Services.TryAddEventingSubscriber(); } + ConfigureProfilingTelemetry(); + if (ExecutionContext.IsRunMode) { // Orchestrator @@ -624,6 +630,82 @@ private void ConfigureHealthChecks() } } + private void ConfigureProfilingTelemetry() + { + if (!ShouldConfigureProfilingTelemetry()) + { + return; + } + + var resourceBuilder = OpenTelemetry.Resources.ResourceBuilder.CreateDefault() + .AddService( + serviceName: "aspire-apphost", + serviceVersion: GetAppHostServiceVersion()) + .AddAttributes(ProfilingTelemetry.CreateAppHostResourceAttributes(AppHostPath, ExecutionContext.Operation.ToString())); + + _innerBuilder.Services.AddOpenTelemetry() + .WithTracing(builder => + { + builder + .AddSource(ProfilingTelemetry.ActivitySourceName) + .SetResourceBuilder(resourceBuilder); + + if (!string.IsNullOrEmpty(_innerBuilder.Configuration[KnownOtelConfigNames.ExporterOtlpEndpoint])) + { + builder.AddOtlpExporter(); + } + else + { + var (url, protocol) = OtlpEndpointResolver.ResolveOtlpEndpoint(_innerBuilder.Configuration); + + builder.AddOtlpExporter(options => + { + options.Endpoint = new Uri(url); + options.Protocol = protocol switch + { + "http/protobuf" => OtlpExportProtocol.HttpProtobuf, + _ => OtlpExportProtocol.Grpc + }; + + if (_innerBuilder.Configuration["AppHost:OtlpApiKey"] is { } otlpApiKey) + { + options.Headers = $"x-otlp-api-key={otlpApiKey}"; + } + }); + } + }); + } + + private bool ShouldConfigureProfilingTelemetry() + { + // Dashboard OTLP is normally configured for app telemetry. Profiling + // spans are high-cardinality diagnostics, so only export them when requested. + // This intentionally supports publish/deploy/inspect operations as well as + // run so profiling can follow the full CLI/AppHost/pipeline operation. + var profilingEnabled = + _innerBuilder.Configuration.GetBool(KnownConfigNames.ProfilingEnabled) ?? + _innerBuilder.Configuration.GetBool(KnownConfigNames.Legacy.StartupProfilingEnabled); + if (profilingEnabled is not true) + { + return false; + } + + if (!string.IsNullOrEmpty(_innerBuilder.Configuration[KnownOtelConfigNames.ExporterOtlpEndpoint])) + { + return true; + } + + var dashboardOtlpGrpcUrl = _innerBuilder.Configuration.GetString(KnownConfigNames.DashboardOtlpGrpcEndpointUrl, KnownConfigNames.Legacy.DashboardOtlpGrpcEndpointUrl); + var dashboardOtlpHttpUrl = _innerBuilder.Configuration.GetString(KnownConfigNames.DashboardOtlpHttpEndpointUrl, KnownConfigNames.Legacy.DashboardOtlpHttpEndpointUrl); + + return !string.IsNullOrEmpty(dashboardOtlpGrpcUrl) || !string.IsNullOrEmpty(dashboardOtlpHttpUrl); + } + + private static string? GetAppHostServiceVersion() + { + return typeof(DistributedApplication).Assembly.GetCustomAttribute()?.InformationalVersion; + } + private void MapTransportOptionsFromCustomKeys(TransportOptions options) { if (Configuration.GetBool(KnownConfigNames.AllowUnsecuredTransport) is { } allowUnsecuredTransport) @@ -845,7 +927,7 @@ private static void ValidateResourceName(IResource resource) policy = NameValidationPolicyAnnotation.Default; } - ModelName.ValidateName(nameof(Resource), resource.Name, policy.MaxLength, policy.ValidateStartsWithLetter, policy.ValidateAllowedCharacters, policy.ValidateNoConsecutiveHyphens, policy.ValidateNoTrailingHyphen); + ModelName.ValidateName(nameof(Aspire.Hosting.ApplicationModel.Resource), resource.Name, policy.MaxLength, policy.ValidateStartsWithLetter, policy.ValidateAllowedCharacters, policy.ValidateNoConsecutiveHyphens, policy.ValidateNoTrailingHyphen); } private static bool PathsEqual(string left, string right) => diff --git a/src/Aspire.Hosting/Orchestrator/ApplicationOrchestrator.cs b/src/Aspire.Hosting/Orchestrator/ApplicationOrchestrator.cs index 93709b1596d..c6679b4c520 100644 --- a/src/Aspire.Hosting/Orchestrator/ApplicationOrchestrator.cs +++ b/src/Aspire.Hosting/Orchestrator/ApplicationOrchestrator.cs @@ -9,9 +9,12 @@ using Aspire.Dashboard.Model; using Aspire.Hosting.ApplicationModel; using Aspire.Hosting.Dashboard; +using Aspire.Hosting.Diagnostics; using Aspire.Hosting.Dcp; using Aspire.Hosting.Eventing; using Aspire.Hosting.Lifecycle; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using Microsoft.Extensions.Options; @@ -34,6 +37,7 @@ internal sealed class ApplicationOrchestrator private readonly DistributedApplicationExecutionContext _executionContext; private readonly ParameterProcessor _parameterProcessor; private readonly CancellationTokenSource _shutdownCancellation = new(); + private IConfiguration? Configuration => _serviceProvider.GetService(); public ApplicationOrchestrator(DistributedApplicationModel model, IDcpExecutor dcpExecutor, @@ -93,6 +97,7 @@ await _notificationService.PublishUpdateAsync(resourceWithConnectionString, stat private async Task WaitForInBeforeResourceStartedEvent(BeforeResourceStartedEvent @event, CancellationToken cancellationToken) { + using var activity = ProfilingTelemetry.StartResourceBeforeStartWait(Configuration, @event.Resource); using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken); var waitForDependenciesTask = _notificationService.WaitForDependenciesAsync(@event.Resource, cts.Token); @@ -118,6 +123,11 @@ private async Task WaitForInBeforeResourceStartedEvent(BeforeResourceStartedEven await completedTask.ConfigureAwait(false); } } + catch (Exception ex) + { + activity.SetError(ex); + throw; + } finally { // Ensure both wait tasks are cancelled. diff --git a/src/Shared/KnownConfigNames.cs b/src/Shared/KnownConfigNames.cs index 7f18e0e034b..19ff0db1ac1 100644 --- a/src/Shared/KnownConfigNames.cs +++ b/src/Shared/KnownConfigNames.cs @@ -44,6 +44,24 @@ internal static class KnownConfigNames public const string DotnetCliUiLanguage = "DOTNET_CLI_UI_LANGUAGE"; public const string MsBuildTerminalLogger = "MSBUILDTERMINALLOGGER"; + // Enables Aspire's local profiling telemetry. This is diagnostic telemetry used to correlate + // CLI, AppHost, DCP, and child-process spans, and is separate from customer telemetry. + public const string ProfilingEnabled = "ASPIRE_PROFILING_ENABLED"; + + // Stable identifier shared by every process participating in one profiling capture. + public const string ProfilingSessionId = "ASPIRE_PROFILING_SESSION_ID"; + + // W3C trace context propagated from the launching process to child processes so their spans + // attach to the same profiling trace. + public const string ProfilingTraceParent = "traceparent"; + + // Optional W3C tracestate companion value for traceparent. + public const string ProfilingTraceState = "tracestate"; + + // When set, the CLI adds MSBuild binary log arguments to supported dotnet commands and records + // the emitted binlog path on the profiling span. + public const string CliDotnetBinlogDirectory = "ASPIRE_CLI_DOTNET_BINLOG_DIR"; + public const string ExtensionEndpoint = "ASPIRE_EXTENSION_ENDPOINT"; public const string ExtensionPromptEnabled = "ASPIRE_EXTENSION_PROMPT_ENABLED"; public const string ExtensionToken = "ASPIRE_EXTENSION_TOKEN"; @@ -80,5 +98,17 @@ public static class Legacy public const string ContainerRuntime = "DOTNET_ASPIRE_CONTAINER_RUNTIME"; public const string DependencyCheckTimeout = "DOTNET_ASPIRE_DEPENDENCY_CHECK_TIMEOUT"; public const string ServiceStartupWatchTimeout = "DOTNET_ASPIRE_SERVICE_STARTUP_WATCH_TIMEOUT"; + + // Legacy startup-profiling names are still read and written because DCP consumes them + // when correlating AppHost resource lifecycle spans. Keep them until DCP and older + // Aspire tools no longer need startup-named profiling correlation. + public const string StartupProfilingEnabled = "ASPIRE_STARTUP_PROFILING_ENABLED"; + + // Legacy profiling session identifier, formerly named for startup-only profiling. + public const string StartupOperationId = "ASPIRE_STARTUP_OPERATION_ID"; + + // Startup-named W3C trace context propagated to DCP for resource lifecycle correlation. + public const string StartupTraceParent = "ASPIRE_STARTUP_TRACEPARENT"; + public const string StartupTraceState = "ASPIRE_STARTUP_TRACESTATE"; } } diff --git a/tests/Aspire.Cli.Tests/Commands/RunCommandTests.cs b/tests/Aspire.Cli.Tests/Commands/RunCommandTests.cs index bc3479c14c8..4ccd4a3939d 100644 --- a/tests/Aspire.Cli.Tests/Commands/RunCommandTests.cs +++ b/tests/Aspire.Cli.Tests/Commands/RunCommandTests.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Runtime.CompilerServices; +using System.Diagnostics; using System.Text.Json; using Aspire.Cli.Backchannel; using Aspire.Cli.Commands; @@ -1737,6 +1738,38 @@ public void DetachedChildEnvironmentFilter_PreservesDebugSessionVariables() Assert.False(AppHostLauncher.IsExtensionEnvironmentVariable(KnownConfigNames.DcpInstanceIdPrefix)); } + [Fact] + public void DetachedChildEnvironment_IncludesProfilingTelemetryContext() + { + using var listener = CreateActivityListener("test-detached-child-environment"); + using var source = new ActivitySource("test-detached-child-environment"); + using var activity = source.StartActivity("parent"); + Assert.NotNull(activity); + activity.SetBaggage(ProfilingTelemetry.SessionIdBaggageName, "session-1"); + activity.TraceStateString = "state-1"; + + var environment = AppHostLauncher.CreateDetachedChildEnvironment(activity); + + Assert.Equal("true", environment[KnownConfigNames.CliRunDetached]); + Assert.Equal("true", environment[ProfilingTelemetry.EnabledEnvironmentVariable]); + Assert.Equal("session-1", environment[ProfilingTelemetry.SessionIdEnvironmentVariable]); + Assert.Equal("session-1", environment[KnownConfigNames.Legacy.StartupOperationId]); + Assert.Equal(activity.Id, environment[ProfilingTelemetry.TraceParentEnvironmentVariable]); + Assert.Equal("state-1", environment[ProfilingTelemetry.TraceStateEnvironmentVariable]); + } + + [Fact] + public void DetachedChildEnvironment_AllowsMissingProfilingTelemetryContext() + { + var environment = AppHostLauncher.CreateDetachedChildEnvironment(null); + + Assert.Equal("true", environment[KnownConfigNames.CliRunDetached]); + Assert.False(environment.ContainsKey(ProfilingTelemetry.EnabledEnvironmentVariable)); + Assert.False(environment.ContainsKey(ProfilingTelemetry.SessionIdEnvironmentVariable)); + Assert.False(environment.ContainsKey(ProfilingTelemetry.TraceParentEnvironmentVariable)); + Assert.False(environment.ContainsKey(ProfilingTelemetry.TraceStateEnvironmentVariable)); + } + [Theory] [InlineData(false, false)] [InlineData(true, false)] @@ -1789,4 +1822,14 @@ public async Task RunCommand_RecordsRunAppHostTelemetryActivity(bool detached, b Assert.Equal("certificate_trust_failed", tags[TelemetryConstants.Tags.ErrorType]); } + private static ActivityListener CreateActivityListener(string sourceName) + { + var listener = new ActivityListener + { + ShouldListenTo = source => source.Name == sourceName, + Sample = (ref ActivityCreationOptions _) => ActivitySamplingResult.AllDataAndRecorded + }; + ActivitySource.AddActivityListener(listener); + return listener; + } } diff --git a/tests/Aspire.Cli.Tests/DotNet/DotNetCliRunnerTests.cs b/tests/Aspire.Cli.Tests/DotNet/DotNetCliRunnerTests.cs index 4e03b603973..4599368db02 100644 --- a/tests/Aspire.Cli.Tests/DotNet/DotNetCliRunnerTests.cs +++ b/tests/Aspire.Cli.Tests/DotNet/DotNetCliRunnerTests.cs @@ -6,6 +6,7 @@ using Aspire.Cli.Interaction; using Aspire.Cli.Tests.TestServices; using Aspire.Cli.Tests.Utils; +using Aspire.Hosting; using Microsoft.AspNetCore.InternalTesting; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; @@ -219,6 +220,79 @@ public async Task BuildAsyncDoesNotIncludeNoRestoreFlagWhenNoRestoreIsFalse() Assert.Equal(0, exitCode); } + [Fact] + public async Task BuildAsyncAddsBinlogWhenBinlogDirectoryIsConfigured() + { + using var workspace = TemporaryWorkspace.Create(outputHelper); + var projectFile = new FileInfo(Path.Combine(workspace.WorkspaceRoot.FullName, "AppHost.csproj")); + await File.WriteAllTextAsync(projectFile.FullName, "Not a real project file."); + + var binlogDirectory = workspace.WorkspaceRoot.CreateSubdirectory("binlogs"); + var configuration = new ConfigurationBuilder() + .AddInMemoryCollection(new Dictionary + { + [KnownConfigNames.CliDotnetBinlogDirectory] = binlogDirectory.FullName + }) + .Build(); + + var services = CliTestHelper.CreateServiceCollection(workspace, outputHelper); + using var provider = services.BuildServiceProvider(); + + var options = new ProcessInvocationOptions(); + + var executionContext = CreateExecutionContext(workspace.WorkspaceRoot); + var runner = DotNetCliRunnerTestHelper.Create( + provider, + executionContext, + (args, _, _, _) => + { + var binlogArgument = Assert.Single(args, arg => arg.StartsWith("/bl:", StringComparison.Ordinal)); + var binlogPath = binlogArgument["/bl:".Length..]; + Assert.Equal(binlogDirectory.FullName, Path.GetDirectoryName(binlogPath)); + Assert.EndsWith(".binlog", binlogPath); + Assert.Contains("build", Path.GetFileName(binlogPath)); + Assert.Contains("AppHost", Path.GetFileName(binlogPath)); + }, + 0, + configuration: configuration); + + var exitCode = await runner.BuildAsync(projectFile, noRestore: false, options, CancellationToken.None).DefaultTimeout(); + + Assert.Equal(0, exitCode); + } + + [Fact] + public async Task NewProjectAsyncDoesNotAddBinlogWhenBinlogDirectoryIsConfigured() + { + using var workspace = TemporaryWorkspace.Create(outputHelper); + var configuration = new ConfigurationBuilder() + .AddInMemoryCollection(new Dictionary + { + [KnownConfigNames.CliDotnetBinlogDirectory] = workspace.WorkspaceRoot.CreateSubdirectory("binlogs").FullName + }) + .Build(); + + var services = CliTestHelper.CreateServiceCollection(workspace, outputHelper); + using var provider = services.BuildServiceProvider(); + + var options = new ProcessInvocationOptions(); + + var executionContext = CreateExecutionContext(workspace.WorkspaceRoot); + var runner = DotNetCliRunnerTestHelper.Create( + provider, + executionContext, + (args, _, _, _) => + { + Assert.DoesNotContain(args, arg => arg.StartsWith("/bl:", StringComparison.Ordinal)); + }, + 0, + configuration: configuration); + + var exitCode = await runner.NewProjectAsync("aspire", "TestProject", "/tmp/test", [], options, CancellationToken.None).DefaultTimeout(); + + Assert.Equal(0, exitCode); + } + [Fact] public async Task RunAsyncInjectsDotnetCliUseMsBuildServerWhenNoBuildIsFalse() { diff --git a/tests/Aspire.Cli.Tests/Projects/GuestAppHostProjectTests.cs b/tests/Aspire.Cli.Tests/Projects/GuestAppHostProjectTests.cs index 5f8e9ccebf2..efb91fccf64 100644 --- a/tests/Aspire.Cli.Tests/Projects/GuestAppHostProjectTests.cs +++ b/tests/Aspire.Cli.Tests/Projects/GuestAppHostProjectTests.cs @@ -4,6 +4,7 @@ using Aspire.Cli.Configuration; using Aspire.Cli.Diagnostics; using Aspire.Cli.Projects; +using Aspire.Cli.Telemetry; using Aspire.Cli.Tests.TestServices; using Aspire.Cli.Tests.Utils; using Microsoft.Extensions.Configuration; @@ -11,14 +12,24 @@ namespace Aspire.Cli.Tests.Projects; -public class GuestAppHostProjectTests(ITestOutputHelper outputHelper) : IDisposable +public class GuestAppHostProjectTests : IDisposable { private const string AspNetCoreEnvironmentVariableName = "ASPNETCORE_ENVIRONMENT"; - private readonly TemporaryWorkspace _workspace = TemporaryWorkspace.Create(outputHelper); + private readonly TemporaryWorkspace _workspace; + private readonly IConfiguration _configuration; + private readonly ProfilingTelemetry _profilingTelemetry; + + public GuestAppHostProjectTests(ITestOutputHelper outputHelper) + { + _workspace = TemporaryWorkspace.Create(outputHelper); + _configuration = new ConfigurationBuilder().Build(); + _profilingTelemetry = new ProfilingTelemetry(_configuration); + } public void Dispose() { + _profilingTelemetry.Dispose(); _workspace.Dispose(); GC.SuppressFinalize(this); } @@ -530,7 +541,7 @@ public void CreateGuestEnvironmentVariables_AspireEnvironmentTakesPrecedenceOver Assert.Equal("Testing", envVars["ASPIRE_ENVIRONMENT"]); } - private static GuestAppHostProject CreateGuestAppHostProject() + private GuestAppHostProject CreateGuestAppHostProject() { var language = new LanguageInfo( LanguageId: "typescript/nodejs", @@ -539,9 +550,7 @@ private static GuestAppHostProject CreateGuestAppHostProject() DetectionPatterns: ["apphost.ts"], CodeGenerator: "TypeScript"); - var configuration = new ConfigurationBuilder().Build(); - - var logFilePath = Path.Combine(Path.GetTempPath(), $"test-guest-{Guid.NewGuid()}.log"); + var logFilePath = Path.Combine(_workspace.WorkspaceRoot.FullName, $"test-guest-{Guid.NewGuid()}.log"); return new GuestAppHostProject( language: language, @@ -551,10 +560,11 @@ private static GuestAppHostProject CreateGuestAppHostProject() certificateService: new TestCertificateService(), runner: new TestDotNetCliRunner(), packagingService: new TestPackagingService(), - configuration: configuration, - features: new Features(configuration, NullLogger.Instance), + configuration: _configuration, + features: new Features(_configuration, NullLogger.Instance), languageDiscovery: new TestLanguageDiscovery(), logger: NullLogger.Instance, - fileLoggerProvider: new FileLoggerProvider(logFilePath, new TestStartupErrorWriter())); + fileLoggerProvider: new FileLoggerProvider(logFilePath, new TestStartupErrorWriter()), + profilingTelemetry: _profilingTelemetry); } } diff --git a/tests/Aspire.Cli.Tests/Telemetry/AspireCliTelemetryTests.cs b/tests/Aspire.Cli.Tests/Telemetry/AspireCliTelemetryTests.cs index 9d273642a25..d39fc709f6d 100644 --- a/tests/Aspire.Cli.Tests/Telemetry/AspireCliTelemetryTests.cs +++ b/tests/Aspire.Cli.Tests/Telemetry/AspireCliTelemetryTests.cs @@ -24,6 +24,19 @@ public void StartReportedActivity_CreatesActivityWithCorrectName() Assert.Equal(ActivityKind.Internal, activity.Kind); } + [Fact] + public void StartReportedActivity_WithParentContext_CreatesChildActivity() + { + using var fixture = new TelemetryFixture(sampleResult: ActivitySamplingResult.AllData); + var parentContext = ActivityContext.Parse("00-0102030405060708090a0b0c0d0e0f10-1112131415161718-01", null); + + using var activity = fixture.Telemetry.StartReportedActivity("test-activity", ActivityKind.Internal, parentContext); + + Assert.NotNull(activity); + Assert.Equal(parentContext.TraceId, activity.TraceId); + Assert.Equal(parentContext.SpanId, activity.ParentSpanId); + } + [Fact] public void StartDiagnosticActivity_CreatesActivityWithCorrectNameAndDefaultTags() { @@ -56,6 +69,19 @@ public void StartDiagnosticActivity_WithKind_CreatesActivityWithCorrectKind() Assert.Equal(ActivityKind.Client, activity.Kind); } + [Fact] + public void StartDiagnosticActivity_WithParentContext_CreatesChildActivity() + { + using var fixture = new TelemetryFixture(sampleResult: ActivitySamplingResult.AllData); + var parentContext = ActivityContext.Parse("00-1112131415161718191a1b1c1d1e1f20-2122232425262728-01", null); + + using var activity = fixture.Telemetry.StartDiagnosticActivity("test-activity", ActivityKind.Internal, parentContext); + + Assert.NotNull(activity); + Assert.Equal(parentContext.TraceId, activity.TraceId); + Assert.Equal(parentContext.SpanId, activity.ParentSpanId); + } + [Fact] public void StartDiagnosticActivity_UsesCallerMemberName_WhenNoNameProvided() { diff --git a/tests/Aspire.Cli.Tests/Telemetry/ProfilingTelemetryContextTests.cs b/tests/Aspire.Cli.Tests/Telemetry/ProfilingTelemetryContextTests.cs new file mode 100644 index 00000000000..00ce0d8fb59 --- /dev/null +++ b/tests/Aspire.Cli.Tests/Telemetry/ProfilingTelemetryContextTests.cs @@ -0,0 +1,120 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; +using Aspire.Cli.Telemetry; +using Aspire.Hosting; +using Microsoft.Extensions.Configuration; + +namespace Aspire.Cli.Tests.Telemetry; + +public class ProfilingTelemetryContextTests +{ + [Fact] + public void AddActivityContextToEnvironment_EmitsActivityValues() + { + using var listener = CreateActivityListener("test-profiling-context"); + using var source = new ActivitySource("test-profiling-context"); + using var activity = source.StartActivity("parent"); + Assert.NotNull(activity); + + activity.SetBaggage(ProfilingTelemetry.SessionIdBaggageName, "session-1"); + activity.TraceStateString = "state-1"; + + var environment = new Dictionary(); + ProfilingTelemetry.AddActivityContextToEnvironment(activity, environment); + + Assert.Equal("true", environment[ProfilingTelemetry.EnabledEnvironmentVariable]); + Assert.Equal("session-1", environment[ProfilingTelemetry.SessionIdEnvironmentVariable]); + Assert.Equal(activity.Id, environment[ProfilingTelemetry.TraceParentEnvironmentVariable]); + Assert.Equal("state-1", environment[ProfilingTelemetry.TraceStateEnvironmentVariable]); + Assert.Equal("true", environment[KnownConfigNames.Legacy.StartupProfilingEnabled]); + Assert.Equal("session-1", environment[KnownConfigNames.Legacy.StartupOperationId]); + Assert.Equal(activity.Id, environment[KnownConfigNames.Legacy.StartupTraceParent]); + Assert.Equal("state-1", environment[KnownConfigNames.Legacy.StartupTraceState]); + } + + [Fact] + public void AddActivityContextToEnvironment_AllowsMissingActivity() + { + var environment = new Dictionary(); + + ProfilingTelemetry.AddActivityContextToEnvironment(null, environment); + + Assert.Empty(environment); + } + + [Fact] + public void StartRunCommand_ContinuesConfiguredRemoteParentAndSession() + { + Activity? startedActivity = null; + using var listener = CreateActivityListener(ProfilingTelemetry.ActivitySourceName, activity => startedActivity = activity); + using var profilingTelemetry = new ProfilingTelemetry(CreateConfiguration( + (ProfilingTelemetry.EnabledEnvironmentVariable, "true"), + (ProfilingTelemetry.SessionIdEnvironmentVariable, "session-1"), + (ProfilingTelemetry.TraceParentEnvironmentVariable, "00-0102030405060708090a0b0c0d0e0f10-1112131415161718-01"), + (ProfilingTelemetry.TraceStateEnvironmentVariable, "state-1"))); + + using var activity = profilingTelemetry.StartRunCommand(); + + Assert.True(activity.IsRunning); + Assert.NotNull(startedActivity); + Assert.Equal("0102030405060708090a0b0c0d0e0f10", startedActivity.TraceId.ToString()); + Assert.Equal("session-1", startedActivity.GetBaggageItem(ProfilingTelemetry.SessionIdBaggageName)); + Assert.Equal("session-1", startedActivity.GetTagItem(ProfilingTelemetry.Tags.ProfilingSessionId)); + } + + [Theory] + [InlineData(null, false)] + [InlineData("", false)] + [InlineData("false", false)] + [InlineData("0", false)] + [InlineData("true", true)] + [InlineData("TRUE", true)] + [InlineData("1", true)] + public void IsEnabled_ReturnsExpectedValue(string? enabled, bool expected) + { + var isEnabled = ProfilingTelemetry.IsProfilingEnabled(CreateConfiguration( + (ProfilingTelemetry.EnabledEnvironmentVariable, enabled))); + + Assert.Equal(expected, isEnabled); + } + + [Fact] + public void StartRunCommand_ReadsLegacyStartupNames() + { + Activity? startedActivity = null; + using var listener = CreateActivityListener(ProfilingTelemetry.ActivitySourceName, activity => startedActivity = activity); + using var profilingTelemetry = new ProfilingTelemetry(CreateConfiguration( + (KnownConfigNames.Legacy.StartupProfilingEnabled, "true"), + (KnownConfigNames.Legacy.StartupOperationId, "session-1"), + (KnownConfigNames.Legacy.StartupTraceParent, "00-0102030405060708090a0b0c0d0e0f10-1112131415161718-01"), + (KnownConfigNames.Legacy.StartupTraceState, "state-1"))); + + using var activity = profilingTelemetry.StartRunCommand(); + + Assert.True(activity.IsRunning); + Assert.NotNull(startedActivity); + Assert.Equal("0102030405060708090a0b0c0d0e0f10", startedActivity.TraceId.ToString()); + Assert.Equal("session-1", startedActivity.GetBaggageItem(ProfilingTelemetry.SessionIdBaggageName)); + } + + private static ActivityListener CreateActivityListener(string sourceName, Action? activityStarted = null) + { + var listener = new ActivityListener + { + ShouldListenTo = source => source.Name == sourceName, + Sample = (ref ActivityCreationOptions _) => ActivitySamplingResult.AllDataAndRecorded, + ActivityStarted = activityStarted + }; + ActivitySource.AddActivityListener(listener); + return listener; + } + + private static IConfiguration CreateConfiguration(params (string Key, string? Value)[] values) + { + return new ConfigurationBuilder() + .AddInMemoryCollection(values.Select(value => new KeyValuePair(value.Key, value.Value))) + .Build(); + } +} diff --git a/tests/Aspire.Cli.Tests/Telemetry/ProfilingTelemetryTests.cs b/tests/Aspire.Cli.Tests/Telemetry/ProfilingTelemetryTests.cs new file mode 100644 index 00000000000..1655a64429a --- /dev/null +++ b/tests/Aspire.Cli.Tests/Telemetry/ProfilingTelemetryTests.cs @@ -0,0 +1,132 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; +using Aspire.Cli.Telemetry; +using Microsoft.Extensions.Configuration; + +namespace Aspire.Cli.Tests.Telemetry; + +public class ProfilingTelemetryTests +{ + [Fact] + public void StartRunCommand_ReturnsInactiveScopeWhenProfilingIsDisabled() + { + Activity? startedActivity = null; + using var listener = CreateProfilingActivityListener(activity => startedActivity = activity); + using var profilingTelemetry = new ProfilingTelemetry(CreateConfiguration()); + + using var activity = profilingTelemetry.StartRunCommand(); + + Assert.False(activity.IsRunning); + Assert.Null(startedActivity); + } + + [Fact] + public void StartRunCommand_UsesDedicatedProfilingActivitySource() + { + Activity? startedActivity = null; + using var listener = CreateProfilingActivityListener(activity => startedActivity = activity); + using var profilingTelemetry = new ProfilingTelemetry(CreateConfiguration( + (ProfilingTelemetry.EnabledEnvironmentVariable, "true"), + (ProfilingTelemetry.SessionIdEnvironmentVariable, "session-1"))); + + using var activity = profilingTelemetry.StartRunCommand(); + + Assert.True(activity.IsRunning); + Assert.NotNull(startedActivity); + Assert.Equal(ProfilingTelemetry.ActivitySourceName, startedActivity.Source.Name); + Assert.Equal("session-1", startedActivity.GetTagItem(ProfilingTelemetry.Tags.ProfilingSessionId)); + Assert.Equal("session-1", startedActivity.GetTagItem(ProfilingTelemetry.Tags.LegacyStartupOperationId)); + Assert.Equal("session-1", startedActivity.GetBaggageItem(ProfilingTelemetry.SessionIdBaggageName)); + } + + [Fact] + public void ProfilingSpansReuseSessionFromAmbientActivityBaggage() + { + var startedActivities = new List(); + using var parentListener = CreateActivityListener("test-parent", _ => { }); + using var listener = CreateProfilingActivityListener(startedActivities.Add); + using var parentSource = new ActivitySource("test-parent"); + using var parentActivity = parentSource.StartActivity("parent"); + using var profilingTelemetry = new ProfilingTelemetry(CreateConfiguration( + (ProfilingTelemetry.EnabledEnvironmentVariable, "true"))); + Assert.NotNull(parentActivity); + + parentActivity.SetBaggage(ProfilingTelemetry.SessionIdBaggageName, "session-1"); + + using (profilingTelemetry.StartDetachedSpawnChild("aspire", argsCount: 1, childCommand: "start")) + { + } + + using (profilingTelemetry.StartDetachedWaitForBackchannel(childProcessId: 1, expectedHash: "hash", hasLegacyHash: false)) + { + } + + Assert.Equal(2, startedActivities.Count); + Assert.All(startedActivities, activity => + { + Assert.Equal("session-1", activity.GetBaggageItem(ProfilingTelemetry.SessionIdBaggageName)); + Assert.Equal("session-1", activity.GetTagItem(ProfilingTelemetry.Tags.ProfilingSessionId)); + }); + } + + [Fact] + public void ProfilingSpansStoreGeneratedSessionOnAmbientAncestorsForSiblings() + { + var startedActivities = new List(); + using var parentListener = CreateActivityListener("test-parent", _ => { }); + using var diagnosticListener = CreateActivityListener("test-diagnostic", _ => { }); + using var listener = CreateProfilingActivityListener(startedActivities.Add); + using var parentSource = new ActivitySource("test-parent"); + using var diagnosticSource = new ActivitySource("test-diagnostic"); + using var parentActivity = parentSource.StartActivity("parent"); + using var profilingTelemetry = new ProfilingTelemetry(CreateConfiguration( + (ProfilingTelemetry.EnabledEnvironmentVariable, "true"))); + Assert.NotNull(parentActivity); + + using (diagnosticSource.StartActivity("diagnostic")) + { + using (profilingTelemetry.StartDetachedSpawnChild("aspire", argsCount: 1, childCommand: "start")) + { + } + } + + using (profilingTelemetry.StartDetachedWaitForBackchannel(childProcessId: 1, expectedHash: "hash", hasLegacyHash: false)) + { + } + + Assert.Equal(2, startedActivities.Count); + var sessionId = parentActivity.GetBaggageItem(ProfilingTelemetry.SessionIdBaggageName); + Assert.NotNull(sessionId); + Assert.All(startedActivities, activity => + { + Assert.Equal(sessionId, activity.GetBaggageItem(ProfilingTelemetry.SessionIdBaggageName)); + Assert.Equal(sessionId, activity.GetTagItem(ProfilingTelemetry.Tags.ProfilingSessionId)); + }); + } + + private static ActivityListener CreateProfilingActivityListener(Action activityStarted) + { + return CreateActivityListener(ProfilingTelemetry.ActivitySourceName, activityStarted); + } + + private static ActivityListener CreateActivityListener(string sourceName, Action activityStarted) + { + var listener = new ActivityListener + { + ShouldListenTo = source => source.Name == sourceName, + Sample = (ref ActivityCreationOptions _) => ActivitySamplingResult.AllDataAndRecorded, + ActivityStarted = activityStarted + }; + ActivitySource.AddActivityListener(listener); + return listener; + } + + private static IConfiguration CreateConfiguration(params (string Key, string? Value)[] values) + { + return new ConfigurationBuilder() + .AddInMemoryCollection(values.Select(value => new KeyValuePair(value.Key, value.Value))) + .Build(); + } +} diff --git a/tests/Aspire.Cli.Tests/TestServices/TestProcessExecutionFactory.cs b/tests/Aspire.Cli.Tests/TestServices/TestProcessExecutionFactory.cs index c9a0f208d4e..16ebc2097b7 100644 --- a/tests/Aspire.Cli.Tests/TestServices/TestProcessExecutionFactory.cs +++ b/tests/Aspire.Cli.Tests/TestServices/TestProcessExecutionFactory.cs @@ -104,6 +104,8 @@ public TestProcessExecution( public int ExitCode => 0; + public int ProcessId { get; init; } = Environment.ProcessId; + public bool StartReturnValue { get; init; } = true; public bool Start() @@ -166,12 +168,14 @@ public static DotNetCliRunner Create( AssertionCallback = assertionCallback, DefaultExitCode = exitCode }; + var resolvedConfiguration = configuration ?? serviceProvider.GetRequiredService(); return new DotNetCliRunner( logger ?? serviceProvider.GetRequiredService>(), serviceProvider, telemetry ?? TestTelemetryHelper.CreateInitializedTelemetry(), - configuration ?? serviceProvider.GetRequiredService(), + serviceProvider.GetRequiredService(), + resolvedConfiguration, diskCache ?? new NullDiskCache(), serviceProvider.GetRequiredService(), serviceProvider.GetRequiredService(), @@ -196,12 +200,14 @@ public static (DotNetCliRunner Runner, TestProcessExecutionFactory ExecutionFact { AttemptCallback = attemptCallback }; + var resolvedConfiguration = configuration ?? serviceProvider.GetRequiredService(); var runner = new DotNetCliRunner( logger ?? serviceProvider.GetRequiredService>(), serviceProvider, telemetry ?? TestTelemetryHelper.CreateInitializedTelemetry(), - configuration ?? serviceProvider.GetRequiredService(), + serviceProvider.GetRequiredService(), + resolvedConfiguration, diskCache ?? new NullDiskCache(), serviceProvider.GetRequiredService(), serviceProvider.GetRequiredService(), diff --git a/tests/Aspire.Cli.Tests/Utils/CliTestHelper.cs b/tests/Aspire.Cli.Tests/Utils/CliTestHelper.cs index 99a14cb117f..2c423433006 100644 --- a/tests/Aspire.Cli.Tests/Utils/CliTestHelper.cs +++ b/tests/Aspire.Cli.Tests/Utils/CliTestHelper.cs @@ -100,6 +100,7 @@ public static IServiceCollection CreateServiceCollection(TemporaryWorkspace work services.AddSingleton(sp => sp.GetRequiredService().Out); services.AddSingleton(TimeProvider.System); services.AddSingleton(options.TelemetryFactory); + services.AddSingleton(); services.AddSingleton(options.ProjectLocatorFactory); services.AddSingleton(options.SolutionLocatorFactory); services.AddSingleton(options.ExtensionRpcTargetFactory); @@ -462,6 +463,7 @@ public ISolutionLocator CreateDefaultSolutionLocatorFactory(IServiceProvider ser { var logger = serviceProvider.GetRequiredService>(); var telemetry = serviceProvider.GetRequiredService(); + var profilingTelemetry = serviceProvider.GetRequiredService(); var configuration = serviceProvider.GetRequiredService(); var features = serviceProvider.GetRequiredService(); var diskCache = serviceProvider.GetRequiredService(); @@ -469,7 +471,7 @@ public ISolutionLocator CreateDefaultSolutionLocatorFactory(IServiceProvider ser var executionFactory = serviceProvider.GetRequiredService(); var interactionService = serviceProvider.GetRequiredService(); - return new DotNetCliRunner(logger, serviceProvider, telemetry, configuration, diskCache, features, interactionService, executionContext, executionFactory); + return new DotNetCliRunner(logger, serviceProvider, telemetry, profilingTelemetry, configuration, diskCache, features, interactionService, executionContext, executionFactory); }; public Func DotNetSdkInstallerFactory { get; set; } = (IServiceProvider serviceProvider) => @@ -490,7 +492,8 @@ public ISolutionLocator CreateDefaultSolutionLocatorFactory(IServiceProvider ser { var logger = serviceProvider.GetRequiredService>(); var telemetry = serviceProvider.GetRequiredService(); - return new AppHostCliBackchannel(logger, telemetry); + var profilingTelemetry = serviceProvider.GetRequiredService(); + return new AppHostCliBackchannel(logger, telemetry, profilingTelemetry); }; public Func ExtensionRpcTargetFactory { get; set; } = (IServiceProvider serviceProvider) => diff --git a/tools/StartupOtelValidator/ValidateStartupOtelExport.cs b/tools/StartupOtelValidator/ValidateStartupOtelExport.cs new file mode 100644 index 00000000000..e6d9ff2cddd --- /dev/null +++ b/tools/StartupOtelValidator/ValidateStartupOtelExport.cs @@ -0,0 +1,462 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Text; +using System.Text.Json; + +const string ProfilingSessionIdAttribute = "aspire.profiling.session_id"; +const string LegacyStartupOperationIdAttribute = "aspire.startup.operation_id"; + +var exportDirectory = GetRequiredEnvironmentVariable("EXPORT_DIR"); +var spanSummaryPath = GetRequiredEnvironmentVariable("SPAN_SUMMARY_PATH"); +var runRoot = GetRequiredEnvironmentVariable("RUN_ROOT"); +var requireDcpSpans = string.Equals(Environment.GetEnvironmentVariable("REQUIRE_DCP_SPANS"), "true", StringComparison.OrdinalIgnoreCase); + +var spans = ReadExportedSpans(exportDirectory); +WriteSpanSummary(spanSummaryPath, spans); + +var profilingGroups = spans + .Where(span => !string.IsNullOrEmpty(span.ProfilingSessionId)) + .GroupBy(span => span.ProfilingSessionId!); + +if (!profilingGroups.Any()) +{ + Fail($"No exported spans contained {ProfilingSessionIdAttribute} or legacy {LegacyStartupOperationIdAttribute}. See {spanSummaryPath}"); +} + +string? validProfilingSessionId = null; +string? validTraceId = null; +List? validTraceSpans = null; + +foreach (var profilingGroup in profilingGroups) +{ + var traceGroups = profilingGroup + .Where(span => !string.IsNullOrEmpty(span.TraceId)) + .GroupBy(span => span.TraceId!); + + foreach (var traceGroup in traceGroups) + { + var traceSpans = traceGroup.ToList(); + var hasStartCommandSpan = traceSpans.Any(span => + span.Scope == "Aspire.Cli.Profiling" && + span.Name == "aspire/cli/start_apphost.spawn_child"); + var hasChildDiagnosticSpan = traceSpans.Any(span => + span.Scope == "Aspire.Cli.Profiling" && + Contains(span.Name, + "aspire/cli/apphost.ensure_dev_certificates", + "aspire/cli/backchannel.connect", + "aspire/cli/backchannel.get_dashboard_urls", + "aspire/cli/dotnet.build", + "aspire/cli/run")); + var hasHostingDcpSpan = traceSpans.Any(span => + span.Scope == "Aspire.Hosting.Profiling" && + Contains(span.Name, + "aspire.hosting.dcp.run_application", + "aspire.hosting.dcp.create_rendered_resources", + "aspire.hosting.dcp.allocate_service_addresses")); + var hasResourceCreateSpan = traceSpans.Any(span => + span.Scope == "Aspire.Hosting.Profiling" && + span.Name == "aspire.hosting.resource.create"); + var hasResourceWaitSpan = traceSpans.Any(span => + span.Scope == "Aspire.Hosting.Profiling" && + Contains(span.Name, + "aspire.hosting.resource.before_start_wait", + "aspire.hosting.resource.wait_for_dependencies", + "aspire.hosting.resource.wait_for_dependency")); + var hasDcpProcessSpan = traceSpans.Any(span => + span.Scope == "dcp.startup" && + Contains(span.Name, + "dcp.command", + "dcp.start_apiserver", + "dcp.start_apiserver.fork", + "dcp.run", + "dcp.apiserver.start", + "dcp.hosted_services.start", + "dcp.controllers.run", + "dcp.controllers.create_manager")); + var hasDcpResourceSpan = traceSpans.Any(span => + span.Scope == "dcp.startup" && + Contains(span.Name, + "dcp.controller.reconcile", + "dcp.executable.manage", + "dcp.service.ensure_effective_address", + "dcp.container.manage")); + var hasAnyDcpStartupSpan = traceSpans.Any(span => span.Scope == "dcp.startup"); + var hasDcpResourceObservedSpan = traceSpans.Any(span => + span.Scope == "Aspire.Hosting.Profiling" && + span.Name == "aspire.hosting.dcp.resource_observed"); + var hasDcpCreateObjectLink = traceSpans.Any(span => + span.Scope == "dcp.startup" && + !string.IsNullOrEmpty(span.DcpCreateObjectId) && + !string.IsNullOrEmpty(span.DcpCreateObjectSpanId) && + span.LinkSpanIds.Contains(span.DcpCreateObjectSpanId, StringComparer.Ordinal)); + var hasResourceWaitEvents = traceSpans.Any(span => + span.Scope == "Aspire.Hosting.Profiling" && + span.EventNames.Contains("aspire.resource.wait.observed", StringComparer.Ordinal) && + span.EventNames.Contains("aspire.resource.wait.completed", StringComparer.Ordinal)); + var hasRequiredDcpSpans = !requireDcpSpans || (hasDcpProcessSpan && hasDcpResourceSpan); + var hasRequiredDcpCreateObjectLink = !hasAnyDcpStartupSpan || hasDcpCreateObjectLink; + + if (hasStartCommandSpan && + hasChildDiagnosticSpan && + hasHostingDcpSpan && + hasResourceCreateSpan && + hasResourceWaitSpan && + hasDcpResourceObservedSpan && + hasRequiredDcpCreateObjectLink && + hasResourceWaitEvents && + hasRequiredDcpSpans) + { + validProfilingSessionId = profilingGroup.Key; + validTraceId = traceGroup.Key; + validTraceSpans = traceSpans; + break; + } + } + + if (validProfilingSessionId is not null) + { + break; + } +} + +if (validProfilingSessionId is null || validTraceId is null || validTraceSpans is null) +{ + var dcpRequirement = requireDcpSpans ? ", DCP process, and DCP resource/controller" : string.Empty; + Fail($"No profiling session contained correlated CLI, Hosting DCP resource creation, DCP resource observation, resource wait events, Hosting-to-DCP links{dcpRequirement} spans in one trace. See {spanSummaryPath}"); +} + +var startJsonPath = GetRequiredEnvironmentVariable("START_JSON_PATH"); +var startedDashboardUrl = ReadStartedDashboardUrl(startJsonPath); +var summary = new ValidationSummary( + RunRoot: runRoot, + TargetAspirePath: GetRequiredEnvironmentVariable("TARGET_ASPIRE_PATH"), + ProfilerAspirePath: GetRequiredEnvironmentVariable("PROFILER_ASPIRE_PATH"), + LayoutPath: GetOptionalEnvironmentVariable("LAYOUT_PATH"), + DcpPath: GetOptionalEnvironmentVariable("DCP_PATH"), + PostStartDelaySeconds: int.TryParse(Environment.GetEnvironmentVariable("POST_START_DELAY_SECONDS"), out var postStartDelaySeconds) ? postStartDelaySeconds : 0, + RequireDcpSpans: requireDcpSpans, + DashboardUrl: GetRequiredEnvironmentVariable("DASHBOARD_URL"), + OtlpGrpcUrl: GetRequiredEnvironmentVariable("OTLP_GRPC_URL"), + OtlpHttpUrl: GetRequiredEnvironmentVariable("OTLP_HTTP_URL"), + AppHostPath: GetRequiredEnvironmentVariable("APPHOST_PATH"), + StartedDashboardUrl: startedDashboardUrl, + ExportZip: GetRequiredEnvironmentVariable("EXPORT_ZIP"), + DotnetTraceDirectory: GetOptionalEnvironmentVariable("DOTNET_TRACE_DIR"), + DotnetTraceFiles: GetOptionalPathList(Environment.GetEnvironmentVariable("DOTNET_TRACE_DIR"), ".nettrace"), + DotnetBinlogDirectory: GetOptionalEnvironmentVariable("DOTNET_BINLOG_DIR"), + DotnetBinlogFiles: GetOptionalPathList(Environment.GetEnvironmentVariable("DOTNET_BINLOG_DIR"), ".binlog"), + SpanSummary: spanSummaryPath, + ProfilingSessionId: validProfilingSessionId!, + CorrelatedSpanCount: validTraceSpans!.Count, + TraceId: validTraceId!); + +var summaryPath = Path.Combine(runRoot, "summary.json"); +WriteSummary(summaryPath, summary); +Console.WriteLine(FormatSummary(summary)); + +static List ReadExportedSpans(string exportDirectory) +{ + var tracesDirectory = Path.Combine(exportDirectory, "traces"); + if (!Directory.Exists(tracesDirectory)) + { + return []; + } + + var spans = new List(); + foreach (var tracePath in Directory.EnumerateFiles(tracesDirectory, "*.json").Order(StringComparer.Ordinal)) + { + using var traceStream = File.OpenRead(tracePath); + using var traceDocument = JsonDocument.Parse(traceStream); + + // Dashboard trace export files use the OTLP JSON shape: + // { resourceSpans: [{ scopeSpans: [{ scope: { name }, spans: [...] }] }] } + foreach (var resourceSpan in EnumerateArrayProperty(traceDocument.RootElement, "resourceSpans")) + { + foreach (var scopeSpan in EnumerateArrayProperty(resourceSpan, "scopeSpans")) + { + var scopeName = TryGetProperty(scopeSpan, "scope", out var scope) + ? GetStringProperty(scope, "name") + : null; + + foreach (var span in EnumerateArrayProperty(scopeSpan, "spans")) + { + spans.Add(new ExportedSpan( + File: Path.GetFileName(tracePath), + Scope: scopeName, + Name: GetStringProperty(span, "name"), + TraceId: GetStringProperty(span, "traceId"), + SpanId: GetStringProperty(span, "spanId"), + ParentSpanId: GetStringProperty(span, "parentSpanId"), + ProfilingSessionId: GetSpanAttributeValue(span, ProfilingSessionIdAttribute) ?? GetSpanAttributeValue(span, LegacyStartupOperationIdAttribute), + CommandName: GetSpanAttributeValue(span, "aspire.cli.command.name"), + ProcessId: GetSpanAttributeValue(span, "process.pid"), + DcpCreateObjectId: GetSpanAttributeValue(span, "aspire.hosting.dcp.create_object.id"), + DcpCreateObjectKind: GetSpanAttributeValue(span, "aspire.hosting.dcp.create_object.kind"), + DcpCreateObjectName: GetSpanAttributeValue(span, "aspire.hosting.dcp.create_object.name"), + DcpCreateObjectSpanId: GetSpanAttributeValue(span, "aspire.hosting.dcp.create_object.span_id"), + LinkSpanIds: ReadLinkSpanIds(span), + EventNames: ReadEventNames(span))); + } + } + } + } + + return spans; +} + +static List ReadLinkSpanIds(JsonElement span) +{ + return EnumerateArrayProperty(span, "links") + .Select(link => GetStringProperty(link, "spanId")) + .Where(spanId => !string.IsNullOrEmpty(spanId)) + .Select(spanId => spanId!) + .ToList(); +} + +static List ReadEventNames(JsonElement span) +{ + return EnumerateArrayProperty(span, "events") + .Select(@event => GetStringProperty(@event, "name")) + .Where(name => !string.IsNullOrEmpty(name)) + .Select(name => name!) + .ToList(); +} + +static string? GetSpanAttributeValue(JsonElement span, string key) +{ + foreach (var attribute in EnumerateArrayProperty(span, "attributes")) + { + if (GetStringProperty(attribute, "key") != key || !TryGetProperty(attribute, "value", out var value)) + { + continue; + } + + foreach (var propertyName in new[] { "stringValue", "intValue", "doubleValue", "boolValue" }) + { + if (!TryGetProperty(value, propertyName, out var propertyValue)) + { + continue; + } + + return propertyValue.ValueKind switch + { + JsonValueKind.String => propertyValue.GetString(), + JsonValueKind.Number or JsonValueKind.True or JsonValueKind.False => propertyValue.GetRawText(), + _ => null + }; + } + } + + return null; +} + +static string? ReadStartedDashboardUrl(string startJsonPath) +{ + using var startJsonStream = File.OpenRead(startJsonPath); + using var startJsonDocument = JsonDocument.Parse(startJsonStream); + + return GetStringProperty(startJsonDocument.RootElement, "dashboardUrl"); +} + +static IEnumerable EnumerateArrayProperty(JsonElement element, string propertyName) +{ + if (!TryGetProperty(element, propertyName, out var property) || property.ValueKind != JsonValueKind.Array) + { + yield break; + } + + foreach (var item in property.EnumerateArray()) + { + yield return item; + } +} + +static bool TryGetProperty(JsonElement element, string propertyName, out JsonElement property) +{ + if (element.ValueKind == JsonValueKind.Object && element.TryGetProperty(propertyName, out property)) + { + return true; + } + + property = default; + return false; +} + +static string? GetStringProperty(JsonElement element, string propertyName) +{ + return TryGetProperty(element, propertyName, out var property) && property.ValueKind == JsonValueKind.String + ? property.GetString() + : null; +} + +static string GetRequiredEnvironmentVariable(string name) +{ + return Environment.GetEnvironmentVariable(name) is { Length: > 0 } value + ? value + : throw new InvalidOperationException($"Required environment variable '{name}' is not set."); +} + +static string? GetOptionalEnvironmentVariable(string name) +{ + return Environment.GetEnvironmentVariable(name) is { Length: > 0 } value ? value : null; +} + +static List GetOptionalPathList(string? directory, string extension) +{ + return !string.IsNullOrEmpty(directory) && Directory.Exists(directory) + ? Directory.EnumerateFiles(directory, $"*{extension}").Order(StringComparer.Ordinal).ToList() + : []; +} + +static bool Contains(string? value, params string[] candidates) +{ + return value is not null && candidates.Contains(value, StringComparer.Ordinal); +} + +static void Fail(string message) +{ + throw new InvalidOperationException(message); +} + +static void WriteSpanSummary(string path, IReadOnlyList spans) +{ + using var stream = File.Create(path); + using var writer = new Utf8JsonWriter(stream, new JsonWriterOptions { Indented = true }); + + writer.WriteStartArray(); + foreach (var span in spans) + { + WriteExportedSpan(writer, span); + } + writer.WriteEndArray(); +} + +static void WriteSummary(string path, ValidationSummary summary) +{ + using var stream = File.Create(path); + using var writer = new Utf8JsonWriter(stream, new JsonWriterOptions { Indented = true }); + + WriteValidationSummary(writer, summary); +} + +static string FormatSummary(ValidationSummary summary) +{ + using var stream = new MemoryStream(); + using (var writer = new Utf8JsonWriter(stream, new JsonWriterOptions { Indented = true })) + { + WriteValidationSummary(writer, summary); + } + + return Encoding.UTF8.GetString(stream.ToArray()); +} + +static void WriteExportedSpan(Utf8JsonWriter writer, ExportedSpan span) +{ + writer.WriteStartObject(); + WriteString(writer, nameof(ExportedSpan.File), span.File); + WriteString(writer, nameof(ExportedSpan.Scope), span.Scope); + WriteString(writer, nameof(ExportedSpan.Name), span.Name); + WriteString(writer, nameof(ExportedSpan.TraceId), span.TraceId); + WriteString(writer, nameof(ExportedSpan.SpanId), span.SpanId); + WriteString(writer, nameof(ExportedSpan.ParentSpanId), span.ParentSpanId); + WriteString(writer, nameof(ExportedSpan.ProfilingSessionId), span.ProfilingSessionId); + WriteString(writer, nameof(ExportedSpan.CommandName), span.CommandName); + WriteString(writer, nameof(ExportedSpan.ProcessId), span.ProcessId); + WriteString(writer, nameof(ExportedSpan.DcpCreateObjectId), span.DcpCreateObjectId); + WriteString(writer, nameof(ExportedSpan.DcpCreateObjectKind), span.DcpCreateObjectKind); + WriteString(writer, nameof(ExportedSpan.DcpCreateObjectName), span.DcpCreateObjectName); + WriteString(writer, nameof(ExportedSpan.DcpCreateObjectSpanId), span.DcpCreateObjectSpanId); + WriteStringArray(writer, nameof(ExportedSpan.LinkSpanIds), span.LinkSpanIds); + WriteStringArray(writer, nameof(ExportedSpan.EventNames), span.EventNames); + writer.WriteEndObject(); +} + +static void WriteValidationSummary(Utf8JsonWriter writer, ValidationSummary summary) +{ + writer.WriteStartObject(); + writer.WriteString(nameof(ValidationSummary.RunRoot), summary.RunRoot); + writer.WriteString(nameof(ValidationSummary.TargetAspirePath), summary.TargetAspirePath); + writer.WriteString(nameof(ValidationSummary.ProfilerAspirePath), summary.ProfilerAspirePath); + WriteString(writer, nameof(ValidationSummary.LayoutPath), summary.LayoutPath); + WriteString(writer, nameof(ValidationSummary.DcpPath), summary.DcpPath); + writer.WriteNumber(nameof(ValidationSummary.PostStartDelaySeconds), summary.PostStartDelaySeconds); + writer.WriteBoolean(nameof(ValidationSummary.RequireDcpSpans), summary.RequireDcpSpans); + writer.WriteString(nameof(ValidationSummary.DashboardUrl), summary.DashboardUrl); + writer.WriteString(nameof(ValidationSummary.OtlpGrpcUrl), summary.OtlpGrpcUrl); + writer.WriteString(nameof(ValidationSummary.OtlpHttpUrl), summary.OtlpHttpUrl); + writer.WriteString(nameof(ValidationSummary.AppHostPath), summary.AppHostPath); + WriteString(writer, nameof(ValidationSummary.StartedDashboardUrl), summary.StartedDashboardUrl); + writer.WriteString(nameof(ValidationSummary.ExportZip), summary.ExportZip); + WriteString(writer, nameof(ValidationSummary.DotnetTraceDirectory), summary.DotnetTraceDirectory); + WriteStringArray(writer, nameof(ValidationSummary.DotnetTraceFiles), summary.DotnetTraceFiles); + WriteString(writer, nameof(ValidationSummary.DotnetBinlogDirectory), summary.DotnetBinlogDirectory); + WriteStringArray(writer, nameof(ValidationSummary.DotnetBinlogFiles), summary.DotnetBinlogFiles); + writer.WriteString(nameof(ValidationSummary.SpanSummary), summary.SpanSummary); + writer.WriteString(nameof(ValidationSummary.ProfilingSessionId), summary.ProfilingSessionId); + writer.WriteNumber(nameof(ValidationSummary.CorrelatedSpanCount), summary.CorrelatedSpanCount); + writer.WriteString(nameof(ValidationSummary.TraceId), summary.TraceId); + writer.WriteEndObject(); +} + +static void WriteString(Utf8JsonWriter writer, string propertyName, string? value) +{ + if (value is null) + { + writer.WriteNull(propertyName); + } + else + { + writer.WriteString(propertyName, value); + } +} + +static void WriteStringArray(Utf8JsonWriter writer, string propertyName, IReadOnlyList values) +{ + writer.WriteStartArray(propertyName); + foreach (var value in values) + { + writer.WriteStringValue(value); + } + writer.WriteEndArray(); +} + +internal sealed record ExportedSpan( + string File, + string? Scope, + string? Name, + string? TraceId, + string? SpanId, + string? ParentSpanId, + string? ProfilingSessionId, + string? CommandName, + string? ProcessId, + string? DcpCreateObjectId, + string? DcpCreateObjectKind, + string? DcpCreateObjectName, + string? DcpCreateObjectSpanId, + List LinkSpanIds, + List EventNames); + +internal sealed record ValidationSummary( + string RunRoot, + string TargetAspirePath, + string ProfilerAspirePath, + string? LayoutPath, + string? DcpPath, + int PostStartDelaySeconds, + bool RequireDcpSpans, + string DashboardUrl, + string OtlpGrpcUrl, + string OtlpHttpUrl, + string AppHostPath, + string? StartedDashboardUrl, + string ExportZip, + string? DotnetTraceDirectory, + List DotnetTraceFiles, + string? DotnetBinlogDirectory, + List DotnetBinlogFiles, + string SpanSummary, + string ProfilingSessionId, + int CorrelatedSpanCount, + string TraceId);