-
Notifications
You must be signed in to change notification settings - Fork 2
feat: add task lifecycle diagnostics #120
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,13 +6,28 @@ import ( | |
| "io" | ||
| "os" | ||
| "os/signal" | ||
| "strings" | ||
| "syscall" | ||
| "time" | ||
|
|
||
| "github.com/gsd-build/daemon/internal/service" | ||
| "github.com/spf13/cobra" | ||
| ) | ||
|
|
||
| type logsOptionsState struct { | ||
| sessionID string | ||
| taskID string | ||
| lastTask bool | ||
| since time.Duration | ||
| level string | ||
| pretty bool | ||
| json bool | ||
| color string | ||
| noColor bool | ||
| } | ||
|
|
||
| var logsOptions logsOptionsState | ||
|
|
||
| func streamLogFile(ctx context.Context, logPath string, out io.Writer, pollInterval time.Duration) error { | ||
| fh, err := os.Open(logPath) | ||
| if err != nil { | ||
|
|
@@ -65,6 +80,10 @@ var logsCmd = &cobra.Command{ | |
| Use: "logs", | ||
| Short: "Tail the daemon log file", | ||
| RunE: func(cmd *cobra.Command, args []string) error { | ||
| if err := validateLogsOptions(); err != nil { | ||
| return err | ||
| } | ||
|
|
||
| logPath := service.LogPath() | ||
| if _, err := os.Stat(logPath); os.IsNotExist(err) { | ||
| return fmt.Errorf("no log file at %s — is the daemon installed?", logPath) | ||
|
|
@@ -77,10 +96,105 @@ var logsCmd = &cobra.Command{ | |
| ctx, stop := signal.NotifyContext(parent, os.Interrupt, syscall.SIGTERM) | ||
| defer stop() | ||
|
|
||
| return streamLogFile(ctx, logPath, os.Stdout, 250*time.Millisecond) | ||
| if !logsOptions.hasStructuredMode() { | ||
| return streamLogFile(ctx, logPath, cmd.OutOrStdout(), 250*time.Millisecond) | ||
| } | ||
|
|
||
| return renderLogFile(logPath, cmd.OutOrStdout()) | ||
| }, | ||
| } | ||
|
|
||
| func (opts logsOptionsState) hasStructuredMode() bool { | ||
| return opts.sessionID != "" || | ||
| opts.taskID != "" || | ||
| opts.lastTask || | ||
| opts.since > 0 || | ||
| opts.level != "" || | ||
| opts.pretty || | ||
| opts.json || | ||
| opts.color != "auto" || | ||
| opts.noColor | ||
| } | ||
|
|
||
| func validateLogsOptions() error { | ||
| count := 0 | ||
| if logsOptions.sessionID != "" { | ||
| count++ | ||
| } | ||
| if logsOptions.taskID != "" { | ||
| count++ | ||
| } | ||
| if logsOptions.lastTask { | ||
| count++ | ||
| } | ||
| if count > 1 { | ||
| return fmt.Errorf("--session, --task, and --last-task are mutually exclusive") | ||
| } | ||
| if logsOptions.pretty && logsOptions.json { | ||
| return fmt.Errorf("--pretty and --json are mutually exclusive") | ||
| } | ||
| switch logsOptions.color { | ||
| case string(colorAuto), string(colorAlways), string(colorNever): | ||
| default: | ||
| return fmt.Errorf("--color must be auto, always, or never") | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| func renderLogFile(logPath string, out io.Writer) error { | ||
| data, err := os.ReadFile(logPath) | ||
| if err != nil { | ||
| return err | ||
| } | ||
| lines := strings.Split(string(data), "\n") | ||
| filter := logFilter{ | ||
| TaskID: logsOptions.taskID, | ||
| SessionID: logsOptions.sessionID, | ||
| Since: logsOptions.since, | ||
| Level: logsOptions.level, | ||
| } | ||
| if logsOptions.lastTask { | ||
| filter.TaskID = latestTaskID(lines) | ||
| } | ||
|
Comment on lines
+156
to
+158
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't fall back to the entire log when If 🤖 Prompt for AI Agents |
||
| events := filterLogLines(lines, filter) | ||
| if logsOptions.json { | ||
| for _, event := range events { | ||
| if event.raw == "" { | ||
| continue | ||
| } | ||
| if _, err := fmt.Fprintln(out, event.raw); err != nil { | ||
| return err | ||
| } | ||
| } | ||
| return nil | ||
| } | ||
| mode := colorMode(logsOptions.color) | ||
| if logsOptions.noColor { | ||
| mode = colorNever | ||
| } | ||
| _, err = io.WriteString(out, renderPrettyTimeline(events, mode)) | ||
| return err | ||
| } | ||
|
|
||
| func latestTaskID(lines []string) string { | ||
| events := filterLogLines(lines, logFilter{}) | ||
| for i := len(events) - 1; i >= 0; i-- { | ||
| if events[i].TaskID != "" { | ||
| return events[i].TaskID | ||
| } | ||
| } | ||
| return "" | ||
| } | ||
|
|
||
| func init() { | ||
| logsCmd.Flags().StringVar(&logsOptions.sessionID, "session", "", "show log events for one session") | ||
| logsCmd.Flags().StringVar(&logsOptions.taskID, "task", "", "show log events for one task") | ||
| logsCmd.Flags().BoolVar(&logsOptions.lastTask, "last-task", false, "show the latest known task in local logs") | ||
| logsCmd.Flags().DurationVar(&logsOptions.since, "since", 0, "limit logs by duration, for example 10m or 2h") | ||
| logsCmd.Flags().StringVar(&logsOptions.level, "level", "", "minimum level: debug, info, warn, error") | ||
| logsCmd.Flags().BoolVar(&logsOptions.pretty, "pretty", false, "render a human timeline") | ||
| logsCmd.Flags().BoolVar(&logsOptions.json, "json", false, "emit filtered structured JSON lines") | ||
| logsCmd.Flags().StringVar(&logsOptions.color, "color", "auto", "color mode: auto, always, never") | ||
| logsCmd.Flags().BoolVar(&logsOptions.noColor, "no-color", false, "disable color output") | ||
| rootCmd.AddCommand(logsCmd) | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,84 @@ | ||
| package cmd | ||
|
|
||
| import ( | ||
| "encoding/json" | ||
| "strings" | ||
| "time" | ||
| ) | ||
|
|
||
| type logFilter struct { | ||
| TaskID string | ||
| SessionID string | ||
| Since time.Duration | ||
| Level string | ||
| } | ||
|
|
||
| type logEvent struct { | ||
| Time string `json:"time"` | ||
| Level string `json:"level"` | ||
| Msg string `json:"msg"` | ||
| Event string `json:"event"` | ||
| Phase string `json:"phase"` | ||
| TaskID string `json:"taskId"` | ||
| SessionID string `json:"sessionId"` | ||
| RequestID string `json:"requestId"` | ||
| TraceID string `json:"traceId"` | ||
| AttemptID string `json:"attemptId"` | ||
| AttemptNumber int `json:"attemptNumber"` | ||
| TurnKind string `json:"turnKind"` | ||
| ElapsedMs int64 `json:"elapsedMs"` | ||
| Model string `json:"model"` | ||
| Provider string `json:"provider"` | ||
| PID int `json:"pid"` | ||
| FailureCode string `json:"failureCode"` | ||
| Retryable bool `json:"retryable"` | ||
| PromptPreview string `json:"promptPreview"` | ||
| Cleanup string `json:"cleanup"` | ||
| raw string | ||
| } | ||
|
|
||
| func filterLogLines(lines []string, filter logFilter) []logEvent { | ||
| out := make([]logEvent, 0, len(lines)) | ||
| cutoff := time.Time{} | ||
| if filter.Since > 0 { | ||
| cutoff = time.Now().Add(-filter.Since) | ||
| } | ||
| for _, line := range lines { | ||
| line = strings.TrimSpace(line) | ||
| if line == "" { | ||
| continue | ||
| } | ||
| var event logEvent | ||
| if err := json.Unmarshal([]byte(line), &event); err != nil { | ||
| continue | ||
| } | ||
| event.raw = line | ||
| if filter.TaskID != "" && event.TaskID != filter.TaskID { | ||
| continue | ||
| } | ||
| if filter.SessionID != "" && event.SessionID != filter.SessionID { | ||
| continue | ||
| } | ||
| if !cutoff.IsZero() { | ||
| parsed, err := time.Parse(time.RFC3339Nano, event.Time) | ||
| if err == nil && parsed.Before(cutoff) { | ||
| continue | ||
| } | ||
| } | ||
| if filter.Level != "" && !levelAtLeast(event.Level, filter.Level) { | ||
| continue | ||
| } | ||
| out = append(out, event) | ||
| } | ||
| return out | ||
| } | ||
|
|
||
| func levelAtLeast(got string, want string) bool { | ||
| order := map[string]int{"DEBUG": 0, "INFO": 1, "WARN": 2, "ERROR": 3} | ||
| gotLevel, gotOK := order[strings.ToUpper(got)] | ||
| wantLevel, wantOK := order[strings.ToUpper(want)] | ||
| if !gotOK || !wantOK { | ||
| return false | ||
| } | ||
| return gotLevel >= wantLevel | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,111 @@ | ||
| package cmd | ||
|
|
||
| import ( | ||
| "fmt" | ||
| "strings" | ||
| "time" | ||
| ) | ||
|
|
||
| type colorMode string | ||
|
|
||
| const ( | ||
| colorAuto colorMode = "auto" | ||
| colorAlways colorMode = "always" | ||
| colorNever colorMode = "never" | ||
| ) | ||
|
|
||
| func renderPrettyTimeline(events []logEvent, mode colorMode) string { | ||
| var b strings.Builder | ||
| for _, event := range events { | ||
| label := prettyPhase(event.Phase) | ||
| ts := prettyTime(event.Time) | ||
| line := fmt.Sprintf("%s %-18s", ts, label) | ||
| if event.TaskID != "" && event.Phase == "task_received" { | ||
| line += " task=" + shortID(event.TaskID) | ||
| } | ||
| if event.PID != 0 { | ||
| line += fmt.Sprintf(" pid=%d", event.PID) | ||
| } | ||
| if event.Model != "" { | ||
| line += " model=" + event.Model | ||
| } | ||
| if event.FailureCode != "" { | ||
| line += " " + event.FailureCode | ||
| } | ||
| if event.Retryable { | ||
| line += " retryable" | ||
| } | ||
| if event.ElapsedMs > 0 { | ||
| line += fmt.Sprintf(" elapsed=%.1fs", float64(event.ElapsedMs)/1000) | ||
| } | ||
| if event.PromptPreview != "" { | ||
| line += ` "` + event.PromptPreview + `"` | ||
| } | ||
| if event.Cleanup != "" { | ||
| line += " " + event.Cleanup | ||
| } | ||
| if mode != colorNever { | ||
| line = colorizePrettyLine(line, event.Phase) | ||
| } | ||
|
Comment on lines
+47
to
+49
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Every mode except 🤖 Prompt for AI Agents |
||
| b.WriteString(line) | ||
| b.WriteByte('\n') | ||
| } | ||
| return b.String() | ||
| } | ||
|
|
||
| func prettyTime(raw string) string { | ||
| parsed, err := time.Parse(time.RFC3339Nano, raw) | ||
| if err != nil { | ||
| return "--:--:--" | ||
| } | ||
| return parsed.Format("15:04:05") | ||
| } | ||
|
|
||
| func prettyPhase(phase string) string { | ||
| switch phase { | ||
| case "task_received": | ||
| return "task received" | ||
| case "pi_process_started": | ||
| return "pi started" | ||
| case "prompt_written": | ||
| return "prompt written" | ||
| case "first_event_seen": | ||
| return "first event seen" | ||
| case "first_visible_event_seen": | ||
| return "first visible event" | ||
| case "cleanup_started": | ||
| return "cleanup started" | ||
| case "cleanup_finished": | ||
| return "cleanup finished" | ||
| case "task_completed": | ||
| return "completed" | ||
| case "task_failed": | ||
| return "failed" | ||
| case "timed_out", "task_timed_out": | ||
| return "timed out" | ||
| default: | ||
| return strings.ReplaceAll(phase, "_", " ") | ||
| } | ||
| } | ||
|
|
||
| func shortID(id string) string { | ||
| if len(id) <= 8 { | ||
| return id | ||
| } | ||
| return id[:8] | ||
| } | ||
|
|
||
| func colorizePrettyLine(line string, phase string) string { | ||
| switch phase { | ||
| case "task_completed", "cleanup_finished": | ||
| return "\x1b[32m" + line + "\x1b[0m" | ||
| case "task_failed", "task_timed_out", "timed_out", "task_lost": | ||
| return "\x1b[31m" + line + "\x1b[0m" | ||
| case "waiting_input", "retry_scheduled": | ||
| return "\x1b[33m" + line + "\x1b[0m" | ||
| case "pi_process_started", "prompt_written", "first_event_seen", "first_visible_event_seen": | ||
| return "\x1b[36m" + line + "\x1b[0m" | ||
| default: | ||
| return line | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Keep color flags from changing the command mode.
hasStructuredMode()treats--colorand--no-coloras selectors for structured rendering, sogsd-cloud logs --no-colorstops tailing the raw file and switches to a one-shot pretty snapshot. These flags should only modify pretty output once structured mode is already selected, or validation should require--pretty.🤖 Prompt for AI Agents