asynkron
diff --git a/‎internal/core/runtime/command_executor.go‎
Lines changed: 44 additions & 17 deletions b/‎internal/core/runtime/command_executor.go‎
Lines changed: 44 additions & 17 deletions
diff --git a/‎internal/core/runtime/execution.go‎
Lines changed: 10 additions & 0 deletions b/‎internal/core/runtime/execution.go‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎internal/core/runtime/history.go‎
Lines changed: 23 additions & 1 deletion b/‎internal/core/runtime/history.go‎
Lines changed: 23 additions & 1 deletion
diff --git a/‎internal/core/runtime/loop.go‎
Lines changed: 6 additions & 6 deletions b/‎internal/core/runtime/loop.go‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎internal/core/runtime/loop_test.go‎
Lines changed: 3 additions & 3 deletions b/‎internal/core/runtime/loop_test.go‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎internal/core/runtime/openai_client.go‎
Lines changed: 18 additions & 11 deletions b/‎internal/core/runtime/openai_client.go‎
Lines changed: 18 additions & 11 deletions
diff --git a/‎internal/core/runtime/openai_client_test.go‎
Lines changed: 2 additions & 1 deletion b/‎internal/core/runtime/openai_client_test.go‎
Lines changed: 2 additions & 1 deletion
@@ -181,27 +181,39 @@ func (e *CommandExecutor) Execute(ctx context.Context, step PlanStep) (PlanObser
 		observation.Details = runErr.Error()
 	}
 
-	// If the command failed, persist a detailed failure report for inspection.
-	if runErr != nil {
-		_ = writeFailureLog(step, stdout, stderr, runErr)
-	}
-
 	duration := time.Since(start)
-	e.metrics.RecordCommandExecution(step.ID, duration, runErr == nil)
+
+	// If the command failed, persist a detailed failure report for inspection.
 	if runErr != nil {
+		if err := writeFailureLog(step, stdout, stderr, runErr); err != nil {
+			// Log warning but don't fail execution - failure logging is best-effort
+			e.logger.Warn(ctx, "Failed to write failure log",
+				Field("step_id", step.ID),
+				Field("error", err.Error()),
+			)
+		}
+		e.metrics.RecordCommandExecution(step.ID, duration, false)
 		e.logger.Error(ctx, "Command execution failed", runErr,
 			Field("step_id", step.ID),
 			Field("shell", step.Command.Shell),
 			Field("duration_ms", duration.Milliseconds()),
 		)
-	} else {
-		e.logger.Debug(ctx, "Command execution completed",
-			Field("step_id", step.ID),
-			Field("duration_ms", duration.Milliseconds()),
-		)
+		// Return error with step context
+		if exitErr == nil {
+			return observation, fmt.Errorf("command[%s]: execution failed: %w", step.ID, runErr)
+		}
+		// Exit errors include exit code in the wrapped error
+		return observation, fmt.Errorf("command[%s]: exited with code %d: %w", step.ID, *observation.ExitCode, runErr)
 	}
 
-	return observation, runErr
+	e.metrics.RecordCommandExecution(step.ID, duration, true)
+	e.logger.Debug(ctx, "Command execution completed",
+		Field("step_id", step.ID),
+		Field("duration_ms", duration.Milliseconds()),
+	)
+
+	// Success - no error to return
+	return observation, nil
 }
 
 // writeFailureLog persists a diagnostic file under .goagent/ whenever a command
@@ -266,26 +278,41 @@ func writeFailureLog(step PlanStep, fullStdout, fullStderr []byte, runErr error)
 		_, _ = b.Write([]byte("\n"))
 	}
 
-	return os.WriteFile(path, b.Bytes(), 0o644)
+	if err := os.WriteFile(path, b.Bytes(), 0o644); err != nil {
+		return fmt.Errorf("writeFailureLog: failed to write file %q: %w", path, err)
+	}
+	return nil
 }
 
 func (e *CommandExecutor) executeInternal(ctx context.Context, step PlanStep) (PlanObservationPayload, error) {
 	invocation, err := parseInternalInvocation(step)
 	if err != nil {
-		return PlanObservationPayload{}, fmt.Errorf("command: %w", err)
+		e.logger.Error(ctx, "Failed to parse internal command invocation", err,
+			Field("step_id", step.ID),
+			Field("command_run", step.Command.Run),
+		)
+		return PlanObservationPayload{}, fmt.Errorf("command[%s]: parse internal invocation: %w", step.ID, err)
 	}
 
 	handler, ok := e.internal[invocation.Name]
 	if !ok {
-		return PlanObservationPayload{}, fmt.Errorf("command: unknown internal command %q", invocation.Name)
+		e.logger.Error(ctx, "Unknown internal command", nil,
+			Field("step_id", step.ID),
+			Field("command_name", invocation.Name),
+		)
+		return PlanObservationPayload{}, fmt.Errorf("command[%s]: unknown internal command %q", step.ID, invocation.Name)
 	}
 
 	payload, execErr := handler(ctx, invocation)
 	if execErr != nil {
+		e.logger.Error(ctx, "Internal command execution failed", execErr,
+			Field("step_id", step.ID),
+			Field("command_name", invocation.Name),
+		)
 		if payload.Details == "" {
 			payload.Details = execErr.Error()
 		}
-		return payload, execErr
+		return payload, fmt.Errorf("command[%s]: internal command %q failed: %w", step.ID, invocation.Name, execErr)
 	}
 	if payload.ExitCode == nil {
 		zero := 0
@@ -298,7 +325,7 @@ func parseInternalInvocation(step PlanStep) (InternalCommandRequest, error) {
 	run := strings.TrimSpace(step.Command.Run)
 	tokens, err := tokenizeInternalCommand(run)
 	if err != nil {
-		return InternalCommandRequest{}, err
+		return InternalCommandRequest{}, fmt.Errorf("parse internal command %q: %w", run, err)
 	}
 	if len(tokens) == 0 {
 		return InternalCommandRequest{}, errors.New("internal command: missing command name")
 
@@ -250,10 +250,20 @@ func (r *Runtime) executePendingCommands(ctx context.Context, toolCall ToolCall)
 			PlanObservation: []StepObservation{stepResult},
 		}}
 		if updateErr := r.plan.UpdateStatus(step.ID, status, planObservation); updateErr != nil {
+			updateErr = fmt.Errorf("execution: failed to update plan status for step %q: %w", step.ID, updateErr)
+			r.options.Logger.Error(ctx, "Failed to update plan status", updateErr,
+				Field("step_id", step.ID),
+				Field("status", string(status)),
+			)
 			r.emit(RuntimeEvent{
 				Type:    EventTypeError,
 				Message: fmt.Sprintf("Failed to update plan status for step %s: %v", step.ID, updateErr),
 				Level:   StatusLevelError,
+				Metadata: map[string]any{
+					"step_id": step.ID,
+					"status":  string(status),
+					"error":   updateErr.Error(),
+				},
 			})
 			if finalErr == nil {
 				finalErr = updateErr
 
@@ -1,6 +1,7 @@
 package runtime
 
 import (
+	"context"
 	"encoding/json"
 	"fmt"
 	"os"
@@ -37,11 +38,32 @@ func (r *Runtime) planningHistorySnapshot() []ChatMessage {
 		total, per := estimateHistoryTokenUsage(r.history)
 		if total > limit {
 			beforeLen := len(r.history)
-			compactHistory(r.history, per, total, limit)
+			// Add safeguard: limit iterations to prevent infinite loops
+			// If summarization doesn't reduce tokens enough, we'll stop after max iterations
+			const maxCompactionIterations = 10
+			iterations := 0
+			for total > limit && iterations < maxCompactionIterations {
+				var changed bool
+				total, per, changed = compactHistory(r.history, per, total, limit)
+				iterations++
+				if !changed {
+					// No progress made - all eligible messages already summarized
+					// or we can't make progress. Break to avoid infinite loop.
+					break
+				}
+			}
 			afterLen := len(r.history)
 			removed := beforeLen - afterLen
 			// Note: removed might be 0 if we just summarized without removing entries
 			r.options.Metrics.RecordContextCompaction(removed, afterLen)
+
+			if iterations >= maxCompactionIterations && total > limit {
+				r.options.Logger.Warn(context.Background(), "History compaction reached max iterations without meeting budget",
+					Field("total_tokens", total),
+					Field("limit", limit),
+					Field("iterations", iterations),
+				)
+			}
 		}
 	}
 
 
@@ -154,10 +154,6 @@ func (r *Runtime) handlePrompt(ctx context.Context, evt InputEvent) error {
 		Field("prompt_length", len(prompt)),
 	)
 
-	r.options.Logger.Info(ctx, "Processing user prompt",
-		Field("prompt_length", len(prompt)),
-	)
-
 	r.emit(RuntimeEvent{
 		Type:    EventTypeStatus,
 		Message: fmt.Sprintf("Processing prompt with model %s…", r.options.Model),
@@ -217,12 +213,16 @@ func (r *Runtime) requestPlan(ctx context.Context) (*PlanResponse, ToolCall, err
 			toolCall, err = r.client.RequestPlan(ctx, history)
 		}
 		if err != nil {
-			return nil, ToolCall{}, err
+			r.options.Logger.Error(ctx, "Failed to request plan from OpenAI", err)
+			return nil, ToolCall{}, fmt.Errorf("requestPlan: API request failed: %w", err)
 		}
 
 		plan, retry, validationErr := r.validatePlanToolCall(toolCall)
 		if validationErr != nil {
-			return nil, ToolCall{}, validationErr
+			r.options.Logger.Error(ctx, "Plan validation failed", validationErr,
+				Field("tool_call_id", toolCall.ID),
+			)
+			return nil, ToolCall{}, fmt.Errorf("requestPlan: validation failed: %w", validationErr)
 		}
 		if retry {
 			retryCount++
 
@@ -140,7 +140,7 @@ func TestPlanExecutionLoopPausesForHumanInput(t *testing.T) {
 		"data: [DONE]\n\n"
 	transport := &stubTransport{body: []byte(sse), statusCode: http.StatusOK}
 
-	client, err := NewOpenAIClient("test-key", "gpt-4o", "", "", nil, nil)
+	client, err := NewOpenAIClient("test-key", "gpt-4o", "", "", nil, nil, nil, 120*time.Second)
 	if err != nil {
 		t.Fatalf("failed to create client: %v", err)
 	}
@@ -226,7 +226,7 @@ func TestPlanExecutionLoopHandsFreeCompletes(t *testing.T) {
 		"data: [DONE]\n\n"
 	transport := &stubTransport{body: []byte(sse), statusCode: http.StatusOK}
 
-	client, err := NewOpenAIClient("test-key", "gpt-4o", "", "", nil, nil)
+	client, err := NewOpenAIClient("test-key", "gpt-4o", "", "", nil, nil, nil, 120*time.Second)
 	if err != nil {
 		t.Fatalf("failed to create client: %v", err)
 	}
@@ -323,7 +323,7 @@ func TestPlanExecutionLoopHandsFreeStopsAtPassLimit(t *testing.T) {
 		"data: [DONE]\n\n"
 	transport := &stubTransport{body: []byte(sse), statusCode: http.StatusOK}
 
-	client, err := NewOpenAIClient("test-key", "gpt-4o", "", "", nil, nil)
+	client, err := NewOpenAIClient("test-key", "gpt-4o", "", "", nil, nil, nil, 120*time.Second)
 	if err != nil {
 		t.Fatalf("failed to create client: %v", err)
 	}
 
@@ -24,12 +24,13 @@ type OpenAIClient struct {
 	baseURL         string
 	logger          Logger
 	metrics         Metrics
+	retryConfig     *RetryConfig
 }
 
 const defaultOpenAIBaseURL = "https://api.openai.com/v1"
 
 // NewOpenAIClient configures the client with the provided API key and model identifier.
-func NewOpenAIClient(apiKey, model, reasoningEffort, baseURL string, logger Logger, metrics Metrics) (*OpenAIClient, error) {
+func NewOpenAIClient(apiKey, model, reasoningEffort, baseURL string, logger Logger, metrics Metrics, retryConfig *RetryConfig, httpTimeout time.Duration) (*OpenAIClient, error) {
 	if apiKey == "" {
 		return nil, errors.New("openai: API key is required")
 	}
@@ -55,12 +56,13 @@ func NewOpenAIClient(apiKey, model, reasoningEffort, baseURL string, logger Logg
 		model:           model,
 		reasoningEffort: reasoningEffort,
 		httpClient: &http.Client{
-			Timeout: 120 * time.Second,
+			Timeout: httpTimeout,
 		},
-		tool:    tool,
-		baseURL: baseURL,
-		logger:  logger,
-		metrics: metrics,
+		tool:        tool,
+		baseURL:     baseURL,
+		logger:      logger,
+		metrics:     metrics,
+		retryConfig: retryConfig,
 	}, nil
 }
 
@@ -94,13 +96,17 @@ func (c *OpenAIClient) RequestPlanStreamingResponses(ctx context.Context, histor
 	inputMsgs := buildMessagesFromHistory(history)
 	payload, err := c.buildRequestBody(inputMsgs)
 	if err != nil {
-		return ToolCall{}, fmt.Errorf("openai(responses): encode request: %w", err)
+		c.logger.Error(ctx, "Failed to build OpenAI request body", err,
+			Field("model", c.model),
+			Field("history_length", len(history)),
+		)
+		return ToolCall{}, fmt.Errorf("openai: build request body: %w", err)
 	}
 
-	// Execute request
-	resp, err := c.executeRequest(ctx, payload, start)
+	// Execute request with retry logic
+	resp, err := c.executeRequest(ctx, payload, start, c.retryConfig)
 	if err != nil {
-		return ToolCall{}, err
+		return ToolCall{}, fmt.Errorf("openai: request failed after retries: %w", err)
 	}
 	defer func() { _ = resp.Body.Close() }()
 
@@ -115,8 +121,9 @@ func (c *OpenAIClient) RequestPlanStreamingResponses(ctx context.Context, histor
 		c.metrics.RecordAPICall(duration, false)
 		c.logger.Error(ctx, "OpenAI API stream parsing failed", err,
 			Field("duration_ms", duration.Milliseconds()),
+			Field("model", c.model),
 		)
-		return ToolCall{}, err
+		return ToolCall{}, fmt.Errorf("openai: stream parsing failed: %w", err)
 	}
 
 	if toolCall.Name != "" {
 
@@ -7,6 +7,7 @@ import (
 	"net/http/httptest"
 	"net/url"
 	"testing"
+	"time"
 
 	"github.com/asynkron/goagent/internal/core/schema"
 )
@@ -51,7 +52,7 @@ func TestRequestPlanUsesFunctionToolShape(t *testing.T) {
 	}))
 	defer server.Close()
 
-	client, err := NewOpenAIClient("test-key", "test-model", "", server.URL, nil, nil)
+	client, err := NewOpenAIClient("test-key", "test-model", "", server.URL, nil, nil, nil, 120*time.Second)
 	if err != nil {
 		t.Fatalf("unexpected client error: %v", err)
 	}
Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,7 @@ import (`
`7`	`7`	`"net/http/httptest"`
`8`	`8`	`"net/url"`
`9`	`9`	`"testing"`
	`10`	`+ "time"`
`10`	`11`
`11`	`12`	`"github.com/asynkron/goagent/internal/core/schema"`
`12`	`13`	`)`
`@@ -51,7 +52,7 @@ func TestRequestPlanUsesFunctionToolShape(t *testing.T) {`
`51`	`52`	`}))`
`52`	`53`	`defer server.Close()`
`53`	`54`
`54`		`- client, err := NewOpenAIClient("test-key", "test-model", "", server.URL, nil, nil)`
	`55`	`+ client, err := NewOpenAIClient("test-key", "test-model", "", server.URL, nil, nil, nil, 120*time.Second)`
`55`	`56`	`if err != nil {`
`56`	`57`	`t.Fatalf("unexpected client error: %v", err)`
`57`	`58`	`}`