From a07d7456def8a1d6e8e6ef74b67fc7e0cb26844e Mon Sep 17 00:00:00 2001 From: Microck Date: Thu, 9 Apr 2026 20:29:46 +0000 Subject: [PATCH 01/12] fix(e2e): stabilize live windows and linux lanes Use a short Windows launcher script for scheduled tasks so the real hosted E2E run stays under schtasks /TR limits, and delete the launcher during agent self-removal. Fix the Linux live wrapper generation by preventing heredoc expansion while writing the isolated tailscale wrapper, then assert the Windows launcher exists and is removed in the live test. --- internal/app/workflow.go | 62 ++++++++++++++++++++++++++------- internal/app/workflow_test.go | 27 ++++++++++++++ tests/live/linux-live-e2e.sh | 7 ++-- tests/live/windows-live-e2e.ps1 | 7 ++++ 4 files changed, 88 insertions(+), 15 deletions(-) diff --git a/internal/app/workflow.go b/internal/app/workflow.go index 493cd38..3422d76 100644 --- a/internal/app/workflow.go +++ b/internal/app/workflow.go @@ -443,7 +443,23 @@ WantedBy=timers.target } func (m *Manager) installWindowsAgent(ctx context.Context, agentPath string) error { - taskCmd := fmt.Sprintf(`"%s" agent run --config "%s" --state "%s" --audit "%s" --log "%s"`, agentPath, m.Runtime.ConfigPath, m.Runtime.StatePath, m.Runtime.AuditPath, m.Runtime.LogPath) + launcherPath := windowsAgentLauncherPath(agentPath) + launcherBody := windowsAgentLauncherContent(agentPath, m.Runtime) + if m.Runtime.DryRun { + m.Logger.Info("[dry-run] would install windows agent launcher at %s", launcherPath) + } else { + if err := platform.EnsureParent(launcherPath); err != nil { + return err + } + if err := os.WriteFile(launcherPath, []byte(launcherBody), 0o644); err != nil { + return fmt.Errorf("write windows agent launcher: %w", err) + } + } + + taskCmd := windowsScheduledTaskCommand(launcherPath) + if len(taskCmd) > 261 { + return fmt.Errorf("windows scheduled task target exceeds schtasks /TR limit: %d", len(taskCmd)) + } commands := [][]string{ {"schtasks", "/Create", "/TN", "TailStickAgent-Startup", "/SC", "ONSTART", "/TR", taskCmd, "/RL", "HIGHEST", "/F"}, {"schtasks", "/Create", "/TN", "TailStickAgent-Periodic", "/SC", "MINUTE", "/MO", "1", "/TR", taskCmd, "/RL", "HIGHEST", "/F"}, @@ -464,7 +480,7 @@ func (m *Manager) uninstallAgent(ctx context.Context) error { } else { err = m.uninstallLinuxAgent(ctx) } - removeErr := m.removeLocalAgentBinary(ctx) + removeErr := m.removeLocalAgentArtifacts(ctx) if err != nil { return err } @@ -543,28 +559,50 @@ func (m *Manager) ensureLocalAgentBinary() (string, error) { return target, nil } -func (m *Manager) removeLocalAgentBinary(ctx context.Context) error { - target := platform.AgentBinaryPath() - if m.Runtime.DryRun { - m.Logger.Info("[dry-run] would remove local agent binary %s", target) - return nil +func (m *Manager) removeLocalAgentArtifacts(ctx context.Context) error { + targets := []string{platform.AgentBinaryPath()} + if runtime.GOOS == "windows" { + targets = append(targets, windowsAgentLauncherPath(platform.AgentBinaryPath())) } - err := os.Remove(target) - if err == nil || os.IsNotExist(err) { + if m.Runtime.DryRun { + m.Logger.Info("[dry-run] would remove local agent artifacts %s", strings.Join(targets, ", ")) return nil } if runtime.GOOS != "windows" { + err := os.Remove(targets[0]) + if err == nil || os.IsNotExist(err) { + return nil + } return fmt.Errorf("remove local agent binary: %w", err) } - escaped := strings.ReplaceAll(target, `"`, `\"`) - delCmd := fmt.Sprintf(`start "" /B cmd /C "ping 127.0.0.1 -n 3 >NUL & del /f /q \"%s\""`, escaped) + quotedTargets := make([]string, 0, len(targets)) + for _, target := range targets { + quotedTargets = append(quotedTargets, fmt.Sprintf(`\"%s\"`, strings.ReplaceAll(target, `"`, `\"`))) + } + delCmd := fmt.Sprintf(`start "" /B cmd /C "ping 127.0.0.1 -n 3 >NUL & del /f /q %s"`, strings.Join(quotedTargets, " ")) if _, delayedErr := m.Runner.Run(ctx, []string{"cmd", "/C", delCmd}); delayedErr != nil { - return fmt.Errorf("schedule delayed local agent binary delete: %w", delayedErr) + return fmt.Errorf("schedule delayed local agent artifact delete: %w", delayedErr) } return nil } +func windowsAgentLauncherPath(agentPath string) string { + return filepath.Join(filepath.Dir(agentPath), "agent.cmd") +} + +func windowsScheduledTaskCommand(launcherPath string) string { + return fmt.Sprintf(`"%s"`, launcherPath) +} + +func windowsAgentLauncherContent(agentPath string, rt Runtime) string { + return strings.Join([]string{ + "@echo off", + fmt.Sprintf(`"%s" agent run --config "%s" --state "%s" --audit "%s" --log "%s"`, agentPath, rt.ConfigPath, rt.StatePath, rt.AuditPath, rt.LogPath), + "", + }, "\r\n") +} + func validateExitNode(preset model.Preset, value string) error { if strings.TrimSpace(value) == "" { return nil diff --git a/internal/app/workflow_test.go b/internal/app/workflow_test.go index 3bd7d60..718fd7a 100644 --- a/internal/app/workflow_test.go +++ b/internal/app/workflow_test.go @@ -267,6 +267,33 @@ func TestAgentOnceMarksActiveLeaseAsNoAction(t *testing.T) { } } +func TestWindowsScheduledTaskCommandUsesShortLauncher(t *testing.T) { + root := t.TempDir() + agentPath := filepath.Join(root, "TailStick", "tailstick-agent.exe") + rt := Runtime{ + ConfigPath: filepath.Join(root, strings.Repeat("config-segment-", 8), "tailstick.config.json"), + StatePath: filepath.Join(root, strings.Repeat("state-segment-", 8), "state.json"), + AuditPath: filepath.Join(root, strings.Repeat("audit-segment-", 8), "audit.ndjson"), + LogPath: filepath.Join(root, strings.Repeat("log-segment-", 8), "tailstick.log"), + } + + launcherPath := windowsAgentLauncherPath(agentPath) + taskCmd := windowsScheduledTaskCommand(launcherPath) + if len(taskCmd) > 261 { + t.Fatalf("task command length = %d, want <= 261", len(taskCmd)) + } + if !strings.HasSuffix(launcherPath, filepath.Join("TailStick", "agent.cmd")) { + t.Fatalf("launcher path %q should live beside the agent binary", launcherPath) + } + + launcherBody := windowsAgentLauncherContent(agentPath, rt) + for _, want := range []string{agentPath, rt.ConfigPath, rt.StatePath, rt.AuditPath, rt.LogPath} { + if !strings.Contains(launcherBody, want) { + t.Fatalf("launcher body missing %q", want) + } + } +} + func newWorkflowTestManager(t *testing.T, dryRun bool) (*Manager, string, string, model.Cleanup) { t.Helper() diff --git a/tests/live/linux-live-e2e.sh b/tests/live/linux-live-e2e.sh index fa07057..ed259be 100755 --- a/tests/live/linux-live-e2e.sh +++ b/tests/live/linux-live-e2e.sh @@ -71,12 +71,12 @@ curl -fsSL https://tailscale.com/install.sh | sh REAL_TAILSCALE="$(command -v tailscale)" REAL_TAILSCALED="$(command -v tailscaled)" -cat > "$WRAPPER_DIR/tailscale" < "$WRAPPER_DIR/tailscale" <<'EOF' #!/usr/bin/env bash set -euo pipefail -real="${REAL_TAILSCALE}" -socket="${TS_SOCKET}" +real="__REAL_TAILSCALE__" +socket="__TS_SOCKET__" if [ "\${1:-}" = "version" ]; then exec "\$real" "\$@" @@ -84,6 +84,7 @@ fi exec "\$real" --socket "\$socket" "\$@" EOF +sed -i "s|__REAL_TAILSCALE__|$REAL_TAILSCALE|g; s|__TS_SOCKET__|$TS_SOCKET|g" "$WRAPPER_DIR/tailscale" chmod +x "$WRAPPER_DIR/tailscale" "$REAL_TAILSCALED" --state="$TS_STATE" --socket="$TS_SOCKET" --tun=userspace-networking >"$TS_LOG" 2>&1 & diff --git a/tests/live/windows-live-e2e.ps1 b/tests/live/windows-live-e2e.ps1 index 88bda57..61134cf 100644 --- a/tests/live/windows-live-e2e.ps1 +++ b/tests/live/windows-live-e2e.ps1 @@ -44,6 +44,7 @@ $logPath = Join-Path $workDir "tailstick.log" $auditPath = Join-Path $workDir "audit.ndjson" $programDataRoot = if ($env:ProgramData) { $env:ProgramData } else { "C:\ProgramData" } $agentBinaryPath = Join-Path $programDataRoot "TailStick\tailstick-agent.exe" +$agentLauncherPath = Join-Path $programDataRoot "TailStick\agent.cmd" $headers = Get-BasicAuthHeader $env:TAILSTICK_API_KEY $deviceId = $null @@ -111,6 +112,9 @@ try { if (-not (Test-Path $agentBinaryPath)) { throw "expected agent binary at $agentBinaryPath" } + if (-not (Test-Path $agentLauncherPath)) { + throw "expected agent launcher at $agentLauncherPath" + } $cleanupOutput = ((& $bin cleanup ` --config $configPath ` @@ -157,6 +161,9 @@ try { if (Test-Path $agentBinaryPath) { throw "agent binary still exists after self-removal" } + if (Test-Path $agentLauncherPath) { + throw "agent launcher still exists after self-removal" + } Write-Host "windows-live-e2e: PASS" } finally { From 169a9e272a409435b338410c0da2b9d0244026c9 Mon Sep 17 00:00:00 2001 From: Microck Date: Thu, 9 Apr 2026 20:35:46 +0000 Subject: [PATCH 02/12] fix(ci): mint fresh tailscale keys per live run Generate short-lived Linux and Windows auth keys from the Tailscale API during the live workflow so reruns do not depend on stale or one-shot stored auth keys. Also move the Linux live workdir to /var/tmp and recreate it after package installation, then update the docs to match the new secret requirements. --- .github/workflows/live-e2e.yml | 58 +++++++++++++++++++++++++++++++--- docs/testing.md | 4 +-- tests/live/linux-live-e2e.sh | 4 ++- 3 files changed, 58 insertions(+), 8 deletions(-) diff --git a/.github/workflows/live-e2e.yml b/.github/workflows/live-e2e.yml index aef06cd..ef3108b 100644 --- a/.github/workflows/live-e2e.yml +++ b/.github/workflows/live-e2e.yml @@ -63,9 +63,26 @@ jobs: docker exec tailstick-live-e2e bash -lc 'ls -la /run || true' exit 1 + - name: Create Linux live auth key + id: linux-key + env: + TAILSTICK_API_KEY: ${{ secrets.TAILSTICK_LIVE_E2E_API_KEY }} + run: | + response="$( + curl -fsS -u "${TAILSTICK_API_KEY}:" \ + -H 'Content-Type: application/json' \ + -d '{"capabilities":{"devices":{"create":{"reusable":false,"ephemeral":false,"preauthorized":true}}},"expirySeconds":3600,"description":"linux live e2e"}' \ + https://api.tailscale.com/api/v2/tailnet/-/keys + )" + auth_key="$(printf '%s' "$response" | jq -r '.key')" + test -n "$auth_key" + test "$auth_key" != "null" + echo "::add-mask::$auth_key" + echo "auth_key=$auth_key" >> "$GITHUB_OUTPUT" + - name: Run Linux live E2E env: - TAILSTICK_AUTH_KEY: ${{ secrets.TAILSTICK_LIVE_E2E_AUTH_KEY }} + TAILSTICK_AUTH_KEY: ${{ steps.linux-key.outputs.auth_key }} TAILSTICK_API_KEY: ${{ secrets.TAILSTICK_LIVE_E2E_API_KEY }} TAILSTICK_OPERATOR_PASSWORD: ${{ secrets.TAILSTICK_LIVE_E2E_OPERATOR_PASSWORD }} TAILSTICK_BIN: /src/dist/tailstick-linux-cli @@ -82,9 +99,9 @@ jobs: if: failure() run: | docker exec tailstick-live-e2e bash -lc 'journalctl --no-pager || true' - docker exec tailstick-live-e2e bash -lc 'cat /tmp/tailstick-live-e2e/tailscaled.log || true' - docker exec tailstick-live-e2e bash -lc 'cat /tmp/tailstick-live-e2e/tailstick.log || true' - docker exec tailstick-live-e2e bash -lc 'cat /tmp/tailstick-live-e2e/state.json || true' + docker exec tailstick-live-e2e bash -lc 'cat /var/tmp/tailstick-live-e2e/tailscaled.log || true' + docker exec tailstick-live-e2e bash -lc 'cat /var/tmp/tailstick-live-e2e/tailstick.log || true' + docker exec tailstick-live-e2e bash -lc 'cat /var/tmp/tailstick-live-e2e/state.json || true' - name: Stop isolated Linux container if: always() @@ -118,10 +135,41 @@ jobs: } Add-Content -Path $env:GITHUB_PATH -Value $tailscalePath + - name: Create Windows live auth key + id: windows-key + shell: pwsh + env: + TAILSTICK_API_KEY: ${{ secrets.TAILSTICK_LIVE_E2E_API_KEY }} + run: | + $tokenBytes = [System.Text.Encoding]::ASCII.GetBytes("${env:TAILSTICK_API_KEY}:") + $headers = @{ + Authorization = "Basic $([Convert]::ToBase64String($tokenBytes))" + "Content-Type" = "application/json" + } + $body = @{ + capabilities = @{ + devices = @{ + create = @{ + reusable = $false + ephemeral = $true + preauthorized = $true + } + } + } + expirySeconds = 3600 + description = "windows live e2e" + } | ConvertTo-Json -Depth 5 -Compress + $response = Invoke-RestMethod -Method Post -Uri "https://api.tailscale.com/api/v2/tailnet/-/keys" -Headers $headers -Body $body + if ([string]::IsNullOrWhiteSpace($response.key)) { + throw "failed to create ephemeral auth key for windows live E2E" + } + Write-Host "::add-mask::$($response.key)" + "ephemeral_auth_key=$($response.key)" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 + - name: Run Windows live E2E shell: pwsh env: - TAILSTICK_EPHEMERAL_AUTH_KEY: ${{ secrets.TAILSTICK_LIVE_E2E_EPHEMERAL_AUTH_KEY }} + TAILSTICK_EPHEMERAL_AUTH_KEY: ${{ steps.windows-key.outputs.ephemeral_auth_key }} TAILSTICK_API_KEY: ${{ secrets.TAILSTICK_LIVE_E2E_API_KEY }} TAILSTICK_OPERATOR_PASSWORD: ${{ secrets.TAILSTICK_LIVE_E2E_OPERATOR_PASSWORD }} run: ./tests/live/windows-live-e2e.ps1 diff --git a/docs/testing.md b/docs/testing.md index 5025e96..1e82057 100644 --- a/docs/testing.md +++ b/docs/testing.md @@ -14,13 +14,13 @@ File: `.github/workflows/live-e2e.yml` This workflow is manual by design (`workflow_dispatch`) and uses real Tailscale credentials stored as GitHub Actions secrets: -- `TAILSTICK_LIVE_E2E_AUTH_KEY` -- `TAILSTICK_LIVE_E2E_EPHEMERAL_AUTH_KEY` - `TAILSTICK_LIVE_E2E_API_KEY` - `TAILSTICK_LIVE_E2E_OPERATOR_PASSWORD` It is intentionally separate from the default CI matrix because it creates real tailnet devices and performs real cleanup/delete operations. +Each live lane mints a short-lived auth key from the Tailscale API at runtime so the workflow does not depend on long-lived or previously-consumed auth keys. + ### Linux Live E2E - Runs inside a privileged Ubuntu 24.04 systemd container built from `tests/live/linux-live-e2e.dockerfile` diff --git a/tests/live/linux-live-e2e.sh b/tests/live/linux-live-e2e.sh index ed259be..7d6049d 100755 --- a/tests/live/linux-live-e2e.sh +++ b/tests/live/linux-live-e2e.sh @@ -37,7 +37,7 @@ if [ "$(id -u)" -ne 0 ]; then fi TAILSTICK_BIN="${TAILSTICK_BIN:-/src/dist/tailstick-linux-cli}" -WORKDIR="/tmp/tailstick-live-e2e" +WORKDIR="/var/tmp/tailstick-live-e2e" CONFIG_PATH="$WORKDIR/tailstick.config.json" STATE_PATH="$WORKDIR/state.json" LOG_PATH="$WORKDIR/tailstick.log" @@ -71,6 +71,8 @@ curl -fsSL https://tailscale.com/install.sh | sh REAL_TAILSCALE="$(command -v tailscale)" REAL_TAILSCALED="$(command -v tailscaled)" +mkdir -p "$WORKDIR" "$WRAPPER_DIR" + cat > "$WRAPPER_DIR/tailscale" <<'EOF' #!/usr/bin/env bash set -euo pipefail From a313bb1d7f6975194a37ce2e8e2423f99932bb5d Mon Sep 17 00:00:00 2001 From: Microck Date: Thu, 9 Apr 2026 20:39:43 +0000 Subject: [PATCH 03/12] fix(e2e): correct live task assertions Fix the Linux wrapper script so its internal variables expand at runtime, and switch the Windows live test to verify scheduled tasks through schtasks instead of Get-ScheduledTask. --- tests/live/linux-live-e2e.sh | 6 +++--- tests/live/windows-live-e2e.ps1 | 26 ++++++++++++++++++-------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/tests/live/linux-live-e2e.sh b/tests/live/linux-live-e2e.sh index 7d6049d..4855ef6 100755 --- a/tests/live/linux-live-e2e.sh +++ b/tests/live/linux-live-e2e.sh @@ -80,11 +80,11 @@ set -euo pipefail real="__REAL_TAILSCALE__" socket="__TS_SOCKET__" -if [ "\${1:-}" = "version" ]; then - exec "\$real" "\$@" +if [ "${1:-}" = "version" ]; then + exec "$real" "$@" fi -exec "\$real" --socket "\$socket" "\$@" +exec "$real" --socket "$socket" "$@" EOF sed -i "s|__REAL_TAILSCALE__|$REAL_TAILSCALE|g; s|__TS_SOCKET__|$TS_SOCKET|g" "$WRAPPER_DIR/tailscale" chmod +x "$WRAPPER_DIR/tailscale" diff --git a/tests/live/windows-live-e2e.ps1 b/tests/live/windows-live-e2e.ps1 index 61134cf..52ff068 100644 --- a/tests/live/windows-live-e2e.ps1 +++ b/tests/live/windows-live-e2e.ps1 @@ -31,6 +31,20 @@ function Wait-ForDeviceGone([string]$DeviceId, [hashtable]$Headers) { throw "device $DeviceId still exists after cleanup" } +function Assert-TaskExists([string]$TaskName) { + $queryOutput = (& schtasks /Query /TN $TaskName 2>&1 | Out-String).Trim() + if ($LASTEXITCODE -ne 0) { + throw "expected scheduled task $TaskName to exist: $queryOutput" + } +} + +function Assert-TaskMissing([string]$TaskName) { + $queryOutput = (& schtasks /Query /TN $TaskName 2>&1 | Out-String).Trim() + if ($LASTEXITCODE -eq 0) { + throw "scheduled task $TaskName still exists: $queryOutput" + } +} + Require-Env "TAILSTICK_EPHEMERAL_AUTH_KEY" Require-Env "TAILSTICK_API_KEY" Require-Env "TAILSTICK_OPERATOR_PASSWORD" @@ -106,8 +120,8 @@ try { Invoke-RestMethod -Method Get -Uri "https://api.tailscale.com/api/v2/device/$deviceId" -Headers $headers | Out-Null - Get-ScheduledTask -TaskName "TailStickAgent-Startup" | Out-Null - Get-ScheduledTask -TaskName "TailStickAgent-Periodic" | Out-Null + Assert-TaskExists "TailStickAgent-Startup" + Assert-TaskExists "TailStickAgent-Periodic" if (-not (Test-Path $agentBinaryPath)) { throw "expected agent binary at $agentBinaryPath" @@ -150,12 +164,8 @@ try { throw "agent --once command failed: $agentOutput" } - if ($null -ne (Get-ScheduledTask -TaskName "TailStickAgent-Startup" -ErrorAction SilentlyContinue)) { - throw "startup task still exists after self-removal" - } - if ($null -ne (Get-ScheduledTask -TaskName "TailStickAgent-Periodic" -ErrorAction SilentlyContinue)) { - throw "periodic task still exists after self-removal" - } + Assert-TaskMissing "TailStickAgent-Startup" + Assert-TaskMissing "TailStickAgent-Periodic" Start-Sleep -Seconds 5 if (Test-Path $agentBinaryPath) { From f41de173c07a88c29ce4da3ece601406b8752410 Mon Sep 17 00:00:00 2001 From: Microck Date: Thu, 9 Apr 2026 20:46:01 +0000 Subject: [PATCH 04/12] fix(e2e): handle live cleanup races Avoid starting the Linux oneshot agent service during enrollment so the timer does not race the initial lease state, and treat a 404 device delete as already cleaned for ephemeral Windows nodes. --- internal/app/workflow.go | 14 ++++--- internal/app/workflow_test.go | 22 ++++++++++ internal/tailscale/client.go | 10 ++++- internal/tailscale/client_test.go | 69 +++++++++++++++++++++++++++++++ 4 files changed, 108 insertions(+), 7 deletions(-) create mode 100644 internal/tailscale/client_test.go diff --git a/internal/app/workflow.go b/internal/app/workflow.go index 3422d76..4b8fc23 100644 --- a/internal/app/workflow.go +++ b/internal/app/workflow.go @@ -430,11 +430,7 @@ WantedBy=timers.target if err := os.WriteFile(timerPath, []byte(timer), 0o644); err != nil { return fmt.Errorf("write systemd timer: %w", err) } - for _, cmd := range [][]string{ - {"systemctl", "daemon-reload"}, - {"systemctl", "enable", "--now", "tailstick-agent.timer"}, - {"systemctl", "start", "tailstick-agent.service"}, - } { + for _, cmd := range linuxAgentInstallCommands() { if _, err := m.Runner.Run(ctx, cmd); err != nil { return err } @@ -603,6 +599,14 @@ func windowsAgentLauncherContent(agentPath string, rt Runtime) string { }, "\r\n") } +func linuxAgentInstallCommands() [][]string { + return [][]string{ + {"systemctl", "daemon-reload"}, + {"systemctl", "enable", "tailstick-agent.timer"}, + {"systemctl", "start", "tailstick-agent.timer"}, + } +} + func validateExitNode(preset model.Preset, value string) error { if strings.TrimSpace(value) == "" { return nil diff --git a/internal/app/workflow_test.go b/internal/app/workflow_test.go index 718fd7a..9e9768f 100644 --- a/internal/app/workflow_test.go +++ b/internal/app/workflow_test.go @@ -294,6 +294,28 @@ func TestWindowsScheduledTaskCommandUsesShortLauncher(t *testing.T) { } } +func TestLinuxAgentInstallCommandsStartOnlyTheTimer(t *testing.T) { + got := linuxAgentInstallCommands() + want := [][]string{ + {"systemctl", "daemon-reload"}, + {"systemctl", "enable", "tailstick-agent.timer"}, + {"systemctl", "start", "tailstick-agent.timer"}, + } + if len(got) != len(want) { + t.Fatalf("got %d commands want %d", len(got), len(want)) + } + for i := range want { + if !equalStringSlices(got[i], want[i]) { + t.Fatalf("command %d = %v want %v", i, got[i], want[i]) + } + } + for _, cmd := range got { + if len(cmd) >= 3 && cmd[0] == "systemctl" && cmd[1] == "start" && cmd[2] == "tailstick-agent.service" { + t.Fatalf("install sequence must not start the oneshot service directly: %v", cmd) + } + } +} + func newWorkflowTestManager(t *testing.T, dryRun bool) (*Manager, string, string, model.Cleanup) { t.Helper() diff --git a/internal/tailscale/client.go b/internal/tailscale/client.go index 173a86f..1ea245d 100644 --- a/internal/tailscale/client.go +++ b/internal/tailscale/client.go @@ -19,6 +19,8 @@ type Client struct { Runner platform.Runner } +var deleteDeviceHTTPClient = http.DefaultClient + func (c Client) IsInstalled(ctx context.Context) bool { _, err := c.Runner.Run(ctx, []string{"tailscale", "version"}) return err == nil @@ -138,7 +140,7 @@ func DeleteDevice(ctx context.Context, apiKey, deviceID string) error { return err } req.SetBasicAuth(apiKey, "") - resp, err := http.DefaultClient.Do(req) + resp, err := deleteDeviceHTTPClient.Do(req) if err != nil { return err } @@ -147,7 +149,11 @@ func DeleteDevice(ctx context.Context, apiKey, deviceID string) error { return nil } body, _ := io.ReadAll(io.LimitReader(resp.Body, 1024)) - return fmt.Errorf("delete device failed: status=%d body=%s", resp.StatusCode, strings.TrimSpace(string(body))) + bodyText := strings.TrimSpace(string(body)) + if resp.StatusCode == http.StatusNotFound { + return nil + } + return fmt.Errorf("delete device failed: status=%d body=%s", resp.StatusCode, bodyText) } func installCommand(preset model.Preset, channel model.Channel) []string { diff --git a/internal/tailscale/client_test.go b/internal/tailscale/client_test.go new file mode 100644 index 0000000..2c4b790 --- /dev/null +++ b/internal/tailscale/client_test.go @@ -0,0 +1,69 @@ +package tailscale + +import ( + "context" + "io" + "net/http" + "strings" + "testing" +) + +func TestDeleteDeviceTreatsNotFoundAsAlreadyDeleted(t *testing.T) { + originalClient := deleteDeviceHTTPClient + deleteDeviceHTTPClient = &http.Client{ + Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) { + if req.Method != http.MethodDelete { + t.Fatalf("got method %s want DELETE", req.Method) + } + if req.URL.String() != "https://api.tailscale.com/api/v2/device/device-123" { + t.Fatalf("got URL %s", req.URL.String()) + } + user, pass, ok := req.BasicAuth() + if !ok || user != "tskey-api-example" || pass != "" { + t.Fatalf("unexpected basic auth user=%q pass=%q ok=%v", user, pass, ok) + } + return &http.Response{ + StatusCode: http.StatusNotFound, + Body: io.NopCloser(strings.NewReader(`{"message":"no manageable device matching this ID found"}`)), + Header: make(http.Header), + }, nil + }), + } + t.Cleanup(func() { + deleteDeviceHTTPClient = originalClient + }) + + if err := DeleteDevice(context.Background(), "tskey-api-example", "device-123"); err != nil { + t.Fatalf("expected 404 delete to be treated as success, got %v", err) + } +} + +func TestDeleteDeviceReturnsErrorForOtherFailures(t *testing.T) { + originalClient := deleteDeviceHTTPClient + deleteDeviceHTTPClient = &http.Client{ + Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) { + return &http.Response{ + StatusCode: http.StatusForbidden, + Body: io.NopCloser(strings.NewReader(`{"message":"forbidden"}`)), + Header: make(http.Header), + }, nil + }), + } + t.Cleanup(func() { + deleteDeviceHTTPClient = originalClient + }) + + err := DeleteDevice(context.Background(), "tskey-api-example", "device-123") + if err == nil { + t.Fatal("expected delete error") + } + if !strings.Contains(err.Error(), "status=403") { + t.Fatalf("got error %q want 403 context", err) + } +} + +type roundTripFunc func(*http.Request) (*http.Response, error) + +func (fn roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) { + return fn(req) +} From 8f595e7e57ce9088c6330c8b0a82833cb72f187e Mon Sep 17 00:00:00 2001 From: Microck Date: Thu, 9 Apr 2026 21:02:03 +0000 Subject: [PATCH 05/12] test(e2e): bound windows live api waits Add per-request timeouts and explicit progress markers to the Windows live E2E script so stalled Tailscale API calls do not hang the workflow without actionable output. --- tests/live/windows-live-e2e.ps1 | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/tests/live/windows-live-e2e.ps1 b/tests/live/windows-live-e2e.ps1 index 52ff068..8481cfa 100644 --- a/tests/live/windows-live-e2e.ps1 +++ b/tests/live/windows-live-e2e.ps1 @@ -14,16 +14,30 @@ function Get-BasicAuthHeader([string]$ApiKey) { } } +function Get-HttpStatusCode($ErrorRecord) { + if ($null -eq $ErrorRecord -or $null -eq $ErrorRecord.Exception -or $null -eq $ErrorRecord.Exception.Response) { + return $null + } + + return $ErrorRecord.Exception.Response.StatusCode.value__ +} + +function Invoke-TailscaleApi([string]$Method, [string]$Uri, [hashtable]$Headers, [int]$TimeoutSec = 15) { + Invoke-RestMethod -Method $Method -Uri $Uri -Headers $Headers -TimeoutSec $TimeoutSec +} + function Wait-ForDeviceGone([string]$DeviceId, [hashtable]$Headers) { for ($i = 0; $i -lt 30; $i++) { try { - Invoke-RestMethod -Method Get -Uri "https://api.tailscale.com/api/v2/device/$DeviceId" -Headers $Headers | Out-Null + Invoke-TailscaleApi -Method Get -Uri "https://api.tailscale.com/api/v2/device/$DeviceId" -Headers $Headers | Out-Null + Write-Host "windows-live-e2e: device $DeviceId still visible, retry $($i + 1)/30" Start-Sleep -Seconds 2 } catch { - $statusCode = $_.Exception.Response.StatusCode.value__ + $statusCode = Get-HttpStatusCode $_ if ($statusCode -eq 404) { return } + Write-Host "windows-live-e2e: device check retry $($i + 1)/30 failed with status=$statusCode" Start-Sleep -Seconds 2 } } @@ -85,6 +99,7 @@ $config = @' $config | Set-Content -Path $configPath -Encoding UTF8 try { + Write-Host "windows-live-e2e: enrolling session lease" $runOutput = ((& $bin run ` --config $configPath ` --state $statePath ` @@ -118,7 +133,8 @@ try { throw "credential ref missing or unreadable" } - Invoke-RestMethod -Method Get -Uri "https://api.tailscale.com/api/v2/device/$deviceId" -Headers $headers | Out-Null + Write-Host "windows-live-e2e: verifying device $deviceId exists" + Invoke-TailscaleApi -Method Get -Uri "https://api.tailscale.com/api/v2/device/$deviceId" -Headers $headers | Out-Null Assert-TaskExists "TailStickAgent-Startup" Assert-TaskExists "TailStickAgent-Periodic" @@ -130,6 +146,7 @@ try { throw "expected agent launcher at $agentLauncherPath" } + Write-Host "windows-live-e2e: forcing cleanup for lease $($record.leaseId)" $cleanupOutput = ((& $bin cleanup ` --config $configPath ` --state $statePath ` @@ -150,9 +167,11 @@ try { throw "credential ref should be removed after cleanup" } + Write-Host "windows-live-e2e: waiting for device $deviceId deletion" Wait-ForDeviceGone -DeviceId $deviceId -Headers $headers $deviceId = $null + Write-Host "windows-live-e2e: running agent self-removal" $agentOutput = ((& $bin agent ` --once ` --config $configPath ` @@ -179,7 +198,7 @@ try { } finally { if (-not [string]::IsNullOrWhiteSpace($deviceId)) { try { - Invoke-RestMethod -Method Delete -Uri "https://api.tailscale.com/api/v2/device/$deviceId" -Headers $headers | Out-Null + Invoke-TailscaleApi -Method Delete -Uri "https://api.tailscale.com/api/v2/device/$deviceId" -Headers $headers | Out-Null } catch { } } From a99f8a9451d2551d26f81390b23aafbd25e7abe1 Mon Sep 17 00:00:00 2001 From: Microck Date: Thu, 9 Apr 2026 21:09:59 +0000 Subject: [PATCH 06/12] test(e2e): timeout windows cli phases Wrap the Windows live E2E CLI invocations with explicit subprocess timeouts and captured output so hangs report the exact failing phase instead of consuming the entire job timeout. --- tests/live/windows-live-e2e.ps1 | 99 ++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 34 deletions(-) diff --git a/tests/live/windows-live-e2e.ps1 b/tests/live/windows-live-e2e.ps1 index 8481cfa..1d47307 100644 --- a/tests/live/windows-live-e2e.ps1 +++ b/tests/live/windows-live-e2e.ps1 @@ -26,6 +26,43 @@ function Invoke-TailscaleApi([string]$Method, [string]$Uri, [hashtable]$Headers, Invoke-RestMethod -Method $Method -Uri $Uri -Headers $Headers -TimeoutSec $TimeoutSec } +function Invoke-NativeCommand([string]$Label, [string]$FilePath, [string[]]$ArgumentList, [int]$TimeoutSec = 180) { + $stdoutPath = Join-Path ([System.IO.Path]::GetTempPath()) ("tailstick-live-" + [Guid]::NewGuid().ToString("N") + ".stdout.log") + $stderrPath = Join-Path ([System.IO.Path]::GetTempPath()) ("tailstick-live-" + [Guid]::NewGuid().ToString("N") + ".stderr.log") + $process = $null + + try { + $process = Start-Process -FilePath $FilePath ` + -ArgumentList $ArgumentList ` + -RedirectStandardOutput $stdoutPath ` + -RedirectStandardError $stderrPath ` + -NoNewWindow ` + -PassThru + + try { + $process | Wait-Process -Timeout $TimeoutSec -ErrorAction Stop + } catch { + if ($null -ne $process -and -not $process.HasExited) { + Stop-Process -Id $process.Id -Force -ErrorAction SilentlyContinue + } + throw "$Label timed out after $TimeoutSec seconds" + } + + $process.Refresh() + $stdout = if (Test-Path $stdoutPath) { Get-Content -Path $stdoutPath -Raw } else { "" } + $stderr = if (Test-Path $stderrPath) { Get-Content -Path $stderrPath -Raw } else { "" } + $combined = ($stdout + $stderr).Trim() + + if ($process.ExitCode -ne 0) { + throw "$Label failed with exit code $($process.ExitCode): $combined" + } + + return $combined + } finally { + Remove-Item -Path $stdoutPath, $stderrPath -Force -ErrorAction SilentlyContinue + } +} + function Wait-ForDeviceGone([string]$DeviceId, [hashtable]$Headers) { for ($i = 0; $i -lt 30; $i++) { try { @@ -100,20 +137,18 @@ $config | Set-Content -Path $configPath -Encoding UTF8 try { Write-Host "windows-live-e2e: enrolling session lease" - $runOutput = ((& $bin run ` - --config $configPath ` - --state $statePath ` - --log $logPath ` - --audit $auditPath ` - --preset live-e2e-windows ` - --mode session ` - --channel latest ` - --allow-existing ` - --password $env:TAILSTICK_OPERATOR_PASSWORD 2>&1) | Out-String).Trim() - - if ($LASTEXITCODE -ne 0) { - throw "run command failed: $runOutput" - } + $runOutput = Invoke-NativeCommand -Label "run command" -FilePath $bin -ArgumentList @( + "run", + "--config", $configPath, + "--state", $statePath, + "--log", $logPath, + "--audit", $auditPath, + "--preset", "live-e2e-windows", + "--mode", "session", + "--channel", "latest", + "--allow-existing", + "--password", $env:TAILSTICK_OPERATOR_PASSWORD + ) -TimeoutSec 180 $state = Get-Content -Path $statePath -Raw | ConvertFrom-Json $record = $state.records | Select-Object -First 1 @@ -147,16 +182,14 @@ try { } Write-Host "windows-live-e2e: forcing cleanup for lease $($record.leaseId)" - $cleanupOutput = ((& $bin cleanup ` - --config $configPath ` - --state $statePath ` - --log $logPath ` - --audit $auditPath ` - --lease-id $record.leaseId 2>&1) | Out-String).Trim() - - if ($LASTEXITCODE -ne 0) { - throw "cleanup command failed: $cleanupOutput" - } + $cleanupOutput = Invoke-NativeCommand -Label "cleanup command" -FilePath $bin -ArgumentList @( + "cleanup", + "--config", $configPath, + "--state", $statePath, + "--log", $logPath, + "--audit", $auditPath, + "--lease-id", $record.leaseId + ) -TimeoutSec 180 $stateAfterCleanup = Get-Content -Path $statePath -Raw | ConvertFrom-Json $recordAfterCleanup = $stateAfterCleanup.records | Select-Object -First 1 @@ -172,16 +205,14 @@ try { $deviceId = $null Write-Host "windows-live-e2e: running agent self-removal" - $agentOutput = ((& $bin agent ` - --once ` - --config $configPath ` - --state $statePath ` - --log $logPath ` - --audit $auditPath 2>&1) | Out-String).Trim() - - if ($LASTEXITCODE -ne 0) { - throw "agent --once command failed: $agentOutput" - } + $agentOutput = Invoke-NativeCommand -Label "agent command" -FilePath $bin -ArgumentList @( + "agent", + "--once", + "--config", $configPath, + "--state", $statePath, + "--log", $logPath, + "--audit", $auditPath + ) -TimeoutSec 120 Assert-TaskMissing "TailStickAgent-Startup" Assert-TaskMissing "TailStickAgent-Periodic" From 169089ba81fa3c0ab0cef2e8094c8b9f69146c19 Mon Sep 17 00:00:00 2001 From: Microck Date: Thu, 9 Apr 2026 21:15:27 +0000 Subject: [PATCH 07/12] fix(e2e): detach windows agent artifact cleanup Use a detached PowerShell Start-Process launcher for delayed Windows self-deletion so agent --once can return after cleanup instead of blocking on the cmd start chain. --- internal/app/workflow.go | 17 +++++++++++------ internal/app/workflow_test.go | 26 ++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/internal/app/workflow.go b/internal/app/workflow.go index 4b8fc23..6b65160 100644 --- a/internal/app/workflow.go +++ b/internal/app/workflow.go @@ -572,12 +572,7 @@ func (m *Manager) removeLocalAgentArtifacts(ctx context.Context) error { return fmt.Errorf("remove local agent binary: %w", err) } - quotedTargets := make([]string, 0, len(targets)) - for _, target := range targets { - quotedTargets = append(quotedTargets, fmt.Sprintf(`\"%s\"`, strings.ReplaceAll(target, `"`, `\"`))) - } - delCmd := fmt.Sprintf(`start "" /B cmd /C "ping 127.0.0.1 -n 3 >NUL & del /f /q %s"`, strings.Join(quotedTargets, " ")) - if _, delayedErr := m.Runner.Run(ctx, []string{"cmd", "/C", delCmd}); delayedErr != nil { + if _, delayedErr := m.Runner.Run(ctx, windowsDelayedDeleteCommand(targets)); delayedErr != nil { return fmt.Errorf("schedule delayed local agent artifact delete: %w", delayedErr) } return nil @@ -607,6 +602,16 @@ func linuxAgentInstallCommands() [][]string { } } +func windowsDelayedDeleteCommand(targets []string) []string { + quotedTargets := make([]string, 0, len(targets)) + for _, target := range targets { + quotedTargets = append(quotedTargets, fmt.Sprintf(`"%s"`, strings.ReplaceAll(target, `"`, `""`))) + } + cmdLine := "/c ping 127.0.0.1 -n 3 >NUL & del /f /q " + strings.Join(quotedTargets, " ") + ps := fmt.Sprintf(`Start-Process -FilePath cmd.exe -ArgumentList '%s' -WindowStyle Hidden`, strings.ReplaceAll(cmdLine, `'`, `''`)) + return []string{"powershell", "-NoProfile", "-Command", ps} +} + func validateExitNode(preset model.Preset, value string) error { if strings.TrimSpace(value) == "" { return nil diff --git a/internal/app/workflow_test.go b/internal/app/workflow_test.go index 9e9768f..dfdc858 100644 --- a/internal/app/workflow_test.go +++ b/internal/app/workflow_test.go @@ -316,6 +316,32 @@ func TestLinuxAgentInstallCommandsStartOnlyTheTimer(t *testing.T) { } } +func TestWindowsDelayedDeleteCommandUsesDetachedProcess(t *testing.T) { + targets := []string{ + `C:\ProgramData\TailStick\tailstick-agent.exe`, + `C:\ProgramData\TailStick\agent.cmd`, + } + + got := windowsDelayedDeleteCommand(targets) + if len(got) != 4 { + t.Fatalf("got %d command parts want 4", len(got)) + } + if got[0] != "powershell" || got[1] != "-NoProfile" || got[2] != "-Command" { + t.Fatalf("unexpected command prefix: %v", got[:3]) + } + if !strings.Contains(got[3], "Start-Process -FilePath cmd.exe") { + t.Fatalf("expected detached Start-Process launcher, got %q", got[3]) + } + for _, target := range targets { + if !strings.Contains(got[3], target) { + t.Fatalf("cleanup command missing target %q", target) + } + } + if strings.Contains(got[3], "/B") { + t.Fatalf("cleanup command should not use cmd start /B: %q", got[3]) + } +} + func newWorkflowTestManager(t *testing.T, dryRun bool) (*Manager, string, string, model.Cleanup) { t.Helper() From 1ea4d9a9737675b8b0fbda0aebe933ae54702f77 Mon Sep 17 00:00:00 2001 From: Microck Date: Thu, 9 Apr 2026 21:21:24 +0000 Subject: [PATCH 08/12] test(e2e): wait for windows scheduled task state --- tests/live/windows-live-e2e.ps1 | 34 +++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/tests/live/windows-live-e2e.ps1 b/tests/live/windows-live-e2e.ps1 index 1d47307..d6f0f61 100644 --- a/tests/live/windows-live-e2e.ps1 +++ b/tests/live/windows-live-e2e.ps1 @@ -96,6 +96,32 @@ function Assert-TaskMissing([string]$TaskName) { } } +function Wait-ForTaskExists([string]$TaskName, [int]$Retries = 10, [int]$DelaySec = 2) { + for ($i = 0; $i -lt $Retries; $i++) { + $queryOutput = (& schtasks /Query /TN $TaskName 2>&1 | Out-String).Trim() + if ($LASTEXITCODE -eq 0) { + return + } + Write-Host "windows-live-e2e: task $TaskName not visible yet, retry $($i + 1)/$Retries" + Start-Sleep -Seconds $DelaySec + } + + Assert-TaskExists $TaskName +} + +function Wait-ForTaskMissing([string]$TaskName, [int]$Retries = 10, [int]$DelaySec = 2) { + for ($i = 0; $i -lt $Retries; $i++) { + $queryOutput = (& schtasks /Query /TN $TaskName 2>&1 | Out-String).Trim() + if ($LASTEXITCODE -ne 0) { + return + } + Write-Host "windows-live-e2e: task $TaskName still present, retry $($i + 1)/$Retries" + Start-Sleep -Seconds $DelaySec + } + + Assert-TaskMissing $TaskName +} + Require-Env "TAILSTICK_EPHEMERAL_AUTH_KEY" Require-Env "TAILSTICK_API_KEY" Require-Env "TAILSTICK_OPERATOR_PASSWORD" @@ -171,8 +197,8 @@ try { Write-Host "windows-live-e2e: verifying device $deviceId exists" Invoke-TailscaleApi -Method Get -Uri "https://api.tailscale.com/api/v2/device/$deviceId" -Headers $headers | Out-Null - Assert-TaskExists "TailStickAgent-Startup" - Assert-TaskExists "TailStickAgent-Periodic" + Wait-ForTaskExists "TailStickAgent-Startup" + Wait-ForTaskExists "TailStickAgent-Periodic" if (-not (Test-Path $agentBinaryPath)) { throw "expected agent binary at $agentBinaryPath" @@ -214,8 +240,8 @@ try { "--audit", $auditPath ) -TimeoutSec 120 - Assert-TaskMissing "TailStickAgent-Startup" - Assert-TaskMissing "TailStickAgent-Periodic" + Wait-ForTaskMissing "TailStickAgent-Startup" + Wait-ForTaskMissing "TailStickAgent-Periodic" Start-Sleep -Seconds 5 if (Test-Path $agentBinaryPath) { From 6234bb4fc6b964c411ee1e8cacd7f21a638513a3 Mon Sep 17 00:00:00 2001 From: Microck Date: Thu, 9 Apr 2026 21:24:17 +0000 Subject: [PATCH 09/12] test(e2e): poll for windows agent artifact cleanup --- tests/live/windows-live-e2e.ps1 | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/tests/live/windows-live-e2e.ps1 b/tests/live/windows-live-e2e.ps1 index d6f0f61..2229ae5 100644 --- a/tests/live/windows-live-e2e.ps1 +++ b/tests/live/windows-live-e2e.ps1 @@ -122,6 +122,20 @@ function Wait-ForTaskMissing([string]$TaskName, [int]$Retries = 10, [int]$DelayS Assert-TaskMissing $TaskName } +function Wait-ForPathMissing([string]$Path, [string]$Label, [int]$Retries = 15, [int]$DelaySec = 2) { + for ($i = 0; $i -lt $Retries; $i++) { + if (-not (Test-Path $Path)) { + return + } + Write-Host "windows-live-e2e: $Label still present at $Path, retry $($i + 1)/$Retries" + Start-Sleep -Seconds $DelaySec + } + + if (Test-Path $Path) { + throw "$Label still exists after self-removal" + } +} + Require-Env "TAILSTICK_EPHEMERAL_AUTH_KEY" Require-Env "TAILSTICK_API_KEY" Require-Env "TAILSTICK_OPERATOR_PASSWORD" @@ -243,13 +257,8 @@ try { Wait-ForTaskMissing "TailStickAgent-Startup" Wait-ForTaskMissing "TailStickAgent-Periodic" - Start-Sleep -Seconds 5 - if (Test-Path $agentBinaryPath) { - throw "agent binary still exists after self-removal" - } - if (Test-Path $agentLauncherPath) { - throw "agent launcher still exists after self-removal" - } + Wait-ForPathMissing -Path $agentBinaryPath -Label "agent binary" + Wait-ForPathMissing -Path $agentLauncherPath -Label "agent launcher" Write-Host "windows-live-e2e: PASS" } finally { From 2b1b349404ff71e083529a49bc81d6aa015a5775 Mon Sep 17 00:00:00 2001 From: Microck Date: Thu, 9 Apr 2026 21:28:51 +0000 Subject: [PATCH 10/12] fix(e2e): stop windows agent after self-removal --- internal/app/workflow.go | 8 ++++++++ internal/app/workflow_test.go | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/internal/app/workflow.go b/internal/app/workflow.go index 6b65160..429f185 100644 --- a/internal/app/workflow.go +++ b/internal/app/workflow.go @@ -261,6 +261,14 @@ func (m *Manager) AgentRun(ctx context.Context, interval time.Duration) error { if err := m.AgentOnce(ctx); err != nil { m.Logger.Error("agent iteration failed: %v", err) } + st, err := state.Load(m.Runtime.StatePath) + if err != nil { + return err + } + if !hasActiveManagedLeases(st) { + m.Logger.Info("tailstick agent stopping: no active managed leases remain") + return nil + } select { case <-ctx.Done(): return ctx.Err() diff --git a/internal/app/workflow_test.go b/internal/app/workflow_test.go index dfdc858..cbb4040 100644 --- a/internal/app/workflow_test.go +++ b/internal/app/workflow_test.go @@ -234,6 +234,40 @@ func TestAgentOnceCleansExpiredLeaseAndPersistsState(t *testing.T) { } } +func TestAgentRunStopsAfterSelfRemovalWhenNoActiveLeasesRemain(t *testing.T) { + mgr, _, statePath, _ := newWorkflowTestManager(t, true) + rec := model.LeaseRecord{ + LeaseID: "lease-cleaned", + Mode: model.LeaseModeTimed, + Status: model.LeaseStatusCleaned, + } + if err := state.Save(statePath, model.LocalState{Records: []model.LeaseRecord{rec}}); err != nil { + t.Fatalf("save state: %v", err) + } + + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + + errCh := make(chan error, 1) + go func() { + errCh <- mgr.AgentRun(ctx, 10*time.Millisecond) + }() + + select { + case err := <-errCh: + if err != nil { + t.Fatalf("agent run returned err: %v", err) + } + case <-time.After(200 * time.Millisecond): + t.Fatal("agent run did not stop after self-removal") + } + + logBody := readFile(t, mgr.Runtime.LogPath) + if !strings.Contains(logBody, "tailstick agent stopping: no active managed leases remain") { + t.Fatalf("expected stop log, got %q", logBody) + } +} + func TestAgentOnceMarksActiveLeaseAsNoAction(t *testing.T) { mgr, _, statePath, _ := newWorkflowTestManager(t, true) expiresAt := time.Now().UTC().Add(2 * time.Hour) From 400e9f46dedae7de19ac22c16bba99eee6e14285 Mon Sep 17 00:00:00 2001 From: Microck Date: Thu, 9 Apr 2026 21:31:33 +0000 Subject: [PATCH 11/12] fix(e2e): clear windows live script exit state --- tests/live/windows-live-e2e.ps1 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/live/windows-live-e2e.ps1 b/tests/live/windows-live-e2e.ps1 index 2229ae5..e56b355 100644 --- a/tests/live/windows-live-e2e.ps1 +++ b/tests/live/windows-live-e2e.ps1 @@ -269,3 +269,5 @@ try { } } } + +exit 0 From cfbc6a1b9aee4b92b79e511239f6d3393199c84f Mon Sep 17 00:00:00 2001 From: Microck Date: Thu, 9 Apr 2026 21:39:43 +0000 Subject: [PATCH 12/12] fix(e2e): use supported agent install commands --- internal/app/workflow.go | 30 +++++++++++++++++------------- internal/app/workflow_test.go | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 13 deletions(-) diff --git a/internal/app/workflow.go b/internal/app/workflow.go index 429f185..d8a4d5e 100644 --- a/internal/app/workflow.go +++ b/internal/app/workflow.go @@ -404,18 +404,7 @@ func (m *Manager) installLinuxAgent(ctx context.Context, agentPath string) error servicePath := "/etc/systemd/system/tailstick-agent.service" timerPath := "/etc/systemd/system/tailstick-agent.timer" - service := fmt.Sprintf(`[Unit] -Description=TailStick lease agent -After=network-online.target -Wants=network-online.target - -[Service] -Type=oneshot -ExecStart=%q agent run --once --config %q --state %q --audit %q --log %q - -[Install] -WantedBy=multi-user.target -`, agentPath, m.Runtime.ConfigPath, m.Runtime.StatePath, m.Runtime.AuditPath, m.Runtime.LogPath) + service := linuxAgentServiceContent(agentPath, m.Runtime) timer := `[Unit] Description=TailStick lease agent timer @@ -597,11 +586,26 @@ func windowsScheduledTaskCommand(launcherPath string) string { func windowsAgentLauncherContent(agentPath string, rt Runtime) string { return strings.Join([]string{ "@echo off", - fmt.Sprintf(`"%s" agent run --config "%s" --state "%s" --audit "%s" --log "%s"`, agentPath, rt.ConfigPath, rt.StatePath, rt.AuditPath, rt.LogPath), + fmt.Sprintf(`"%s" agent --once --config "%s" --state "%s" --audit "%s" --log "%s"`, agentPath, rt.ConfigPath, rt.StatePath, rt.AuditPath, rt.LogPath), "", }, "\r\n") } +func linuxAgentServiceContent(agentPath string, rt Runtime) string { + return fmt.Sprintf(`[Unit] +Description=TailStick lease agent +After=network-online.target +Wants=network-online.target + +[Service] +Type=oneshot +ExecStart=%q agent --once --config %q --state %q --audit %q --log %q + +[Install] +WantedBy=multi-user.target +`, agentPath, rt.ConfigPath, rt.StatePath, rt.AuditPath, rt.LogPath) +} + func linuxAgentInstallCommands() [][]string { return [][]string{ {"systemctl", "daemon-reload"}, diff --git a/internal/app/workflow_test.go b/internal/app/workflow_test.go index cbb4040..a27ba6d 100644 --- a/internal/app/workflow_test.go +++ b/internal/app/workflow_test.go @@ -326,6 +326,12 @@ func TestWindowsScheduledTaskCommandUsesShortLauncher(t *testing.T) { t.Fatalf("launcher body missing %q", want) } } + if !strings.Contains(launcherBody, "agent --once") { + t.Fatalf("launcher body should invoke one-shot agent mode, got %q", launcherBody) + } + if strings.Contains(launcherBody, "agent run") { + t.Fatalf("launcher body should not use unsupported nested agent run form, got %q", launcherBody) + } } func TestLinuxAgentInstallCommandsStartOnlyTheTimer(t *testing.T) { @@ -350,6 +356,34 @@ func TestLinuxAgentInstallCommandsStartOnlyTheTimer(t *testing.T) { } } +func TestLinuxAgentServiceContentUsesAgentOnce(t *testing.T) { + rt := Runtime{ + ConfigPath: "/tmp/tailstick.config.json", + StatePath: "/tmp/state.json", + AuditPath: "/tmp/audit.ndjson", + LogPath: "/tmp/tailstick.log", + } + + body := linuxAgentServiceContent("/usr/local/bin/tailstick-agent", rt) + for _, want := range []string{ + "/usr/local/bin/tailstick-agent", + rt.ConfigPath, + rt.StatePath, + rt.AuditPath, + rt.LogPath, + } { + if !strings.Contains(body, want) { + t.Fatalf("service body missing %q", want) + } + } + if !strings.Contains(body, "agent --once") { + t.Fatalf("service body should invoke one-shot agent mode, got %q", body) + } + if strings.Contains(body, "agent run") { + t.Fatalf("service body should not use unsupported nested agent run form, got %q", body) + } +} + func TestWindowsDelayedDeleteCommandUsesDetachedProcess(t *testing.T) { targets := []string{ `C:\ProgramData\TailStick\tailstick-agent.exe`,