diff --git a/anton/chat.py b/anton/chat.py
index 5ce72fd7..9cd2ef67 100644
--- a/anton/chat.py
+++ b/anton/chat.py
@@ -1271,6 +1271,7 @@ async def _chat_loop(
         history_store=history_store,
         session_id=current_session_id,
         proactive_dashboards=settings.proactive_dashboards,
+        act_first=settings.act_first,
         output_dir=settings.artifacts_dir,
         tools=[CONNECT_DATASOURCE_TOOL, PUBLISH_TOOL],
         web_search_enabled=settings.web_search_enabled,
diff --git a/anton/chat_session.py b/anton/chat_session.py
index 0f7ac11a..200bd641 100644
--- a/anton/chat_session.py
+++ b/anton/chat_session.py
@@ -116,6 +116,7 @@ def rebuild_session(
         history_store=history_store,
         session_id=session_id,
         proactive_dashboards=settings.proactive_dashboards,
+        act_first=settings.act_first,
         output_dir=settings.artifacts_dir,
         web_search_enabled=settings.web_search_enabled,
         web_fetch_enabled=settings.web_fetch_enabled,
diff --git a/anton/config/settings.py b/anton/config/settings.py
index 31257809..fc930403 100644
--- a/anton/config/settings.py
+++ b/anton/config/settings.py
@@ -85,6 +85,10 @@ class AntonSettings(CoreSettings):
 
     proactive_dashboards: bool = False  # when True, build HTML dashboards; when False, CLI output only
 
+    # "Do first, ask later": act on reasonable defaults and surface assumptions
+    # inline instead of stopping to ask. False = cautious ask-first discipline.
+    act_first: bool = True
+
     theme: str = "auto"
 
     disable_autoupdates: bool = False
diff --git a/anton/core/backends/utils.py b/anton/core/backends/utils.py
index 07cd1796..0d87b8ff 100644
--- a/anton/core/backends/utils.py
+++ b/anton/core/backends/utils.py
@@ -8,7 +8,23 @@ def compute_timeouts(estimated_seconds: int) -> tuple[float, float]:
     """
     s = CoreSettings()
     if estimated_seconds <= 0:
-        return float(s.cell_timeout_default), float(s.cell_inactivity_timeout)
-    total = max(estimated_seconds * 2, estimated_seconds + 30)
-    inactivity = max(estimated_seconds * 0.5, 30)
-    return float(total), float(inactivity)
\ No newline at end of file
+        total = float(s.cell_timeout_default)
+        inactivity = float(s.cell_inactivity_timeout)
+    else:
+        total = float(max(estimated_seconds * 2, estimated_seconds + 30))
+        inactivity = float(max(estimated_seconds * 0.5, 30))
+    # Clamp the silence window: a large estimate must not buy minutes of
+    # undetected silence (an est=600 cell would otherwise allow 300s of no
+    # output before being killed). A cell quiet for cell_inactivity_max
+    # seconds is killed regardless of its estimate. stdout/progress() reset
+    # this window, so legitimate long-but-active cells — e.g. a batch loop
+    # pinging progress() — are unaffected; only genuinely stuck cells die.
+    inactivity = min(inactivity, float(s.cell_inactivity_max))
+    # The total is deliberately left scaling so long-but-active cells run to
+    # completion. cell_total_max (default 0 = off) is an optional absolute
+    # backstop for a runaway that keeps producing output forever (which the
+    # inactivity cap can't catch); set it only when that risk outweighs
+    # clipping a genuinely long batch job.
+    if s.cell_total_max > 0:
+        total = min(total, float(s.cell_total_max))
+    return total, inactivity
\ No newline at end of file
diff --git a/anton/core/dispatch/local_runtime.py b/anton/core/dispatch/local_runtime.py
index 5430f056..63e8d63a 100644
--- a/anton/core/dispatch/local_runtime.py
+++ b/anton/core/dispatch/local_runtime.py
@@ -442,6 +442,12 @@ def _safe_error_message(exc: Exception) -> str:
         """Render an exception as a user-facing error with API keys redacted."""
         try:
             from anton.core.runtime import safe_redact_error
+            from anton.core.llm.provider import TokenLimitExceeded
+            # A spent token allowance isn't a crash — surface anton's
+            # already-friendly quota message as-is, without the
+            # `[agent error]` prefix that reads like something broke.
+            if isinstance(exc, TokenLimitExceeded):
+                return safe_redact_error(exc)
             return f"[agent error] {safe_redact_error(exc)}"
         except Exception:
             return f"[agent error] {exc!r}"
diff --git a/anton/core/llm/anthropic.py b/anton/core/llm/anthropic.py
index cc284d2d..7ca41555 100644
--- a/anton/core/llm/anthropic.py
+++ b/anton/core/llm/anthropic.py
@@ -127,7 +127,7 @@ async def complete(
                 and exc.body.get("detail")
             ):
                 msg = f"Server returned 429 — {exc.body['detail']}"
-                msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
+                msg += " Visit https://console.mindshub.ai to upgrade or to top up your tokens."
                 from .provider import TokenLimitExceeded
 
                 raise TokenLimitExceeded(msg) from exc
@@ -274,7 +274,7 @@ async def stream(
                 and exc.body.get("detail")
             ):
                 msg = f"Server returned 429 — {exc.body['detail']}"
-                msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
+                msg += " Visit https://console.mindshub.ai to upgrade or to top up your tokens."
                 from .provider import TokenLimitExceeded
 
                 raise TokenLimitExceeded(msg) from exc
diff --git a/anton/core/llm/openai.py b/anton/core/llm/openai.py
index 89064e07..c2f0549c 100644
--- a/anton/core/llm/openai.py
+++ b/anton/core/llm/openai.py
@@ -683,7 +683,7 @@ async def complete(
                 and exc.body.get("detail")
             ):
                 msg = f"Server returned 429 — {exc.body['detail']}"
-                msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
+                msg += " Visit https://console.mindshub.ai to upgrade or to top up your tokens."
                 from .provider import TokenLimitExceeded
 
                 raise TokenLimitExceeded(msg) from exc
@@ -852,7 +852,7 @@ async def stream(
                 and exc.body.get("detail")
             ):
                 msg = f"Server returned 429 — {exc.body['detail']}"
-                msg += " Visit https://mdb.ai to upgrade or top up your tokens."
+                msg += " Visit https://console.mindshub.ai to upgrade or top up your tokens."
                 from .provider import TokenLimitExceeded
 
                 raise TokenLimitExceeded(msg) from exc
@@ -970,7 +970,7 @@ async def _complete_via_responses(
                 and exc.body.get("detail")
             ):
                 msg = f"Server returned 429 — {exc.body['detail']}"
-                msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
+                msg += " Visit https://console.mindshub.ai to upgrade or to top up your tokens."
                 from .provider import TokenLimitExceeded
 
                 raise TokenLimitExceeded(msg) from exc
@@ -1099,7 +1099,7 @@ async def _stream_via_responses(
                 and exc.body.get("detail")
             ):
                 msg = f"Server returned 429 — {exc.body['detail']}"
-                msg += " Visit https://mdb.ai to upgrade or top up your tokens."
+                msg += " Visit https://console.mindshub.ai to upgrade or top up your tokens."
                 from .provider import TokenLimitExceeded
 
                 raise TokenLimitExceeded(msg) from exc
diff --git a/anton/core/llm/prompt_builder.py b/anton/core/llm/prompt_builder.py
index 9d50a80c..9a4f0c0e 100644
--- a/anton/core/llm/prompt_builder.py
+++ b/anton/core/llm/prompt_builder.py
@@ -8,6 +8,8 @@
     BASE_VISUALIZATIONS_PROMPT,
     BACKEND_GENERATION_PROMPT,
     CHAT_SYSTEM_PROMPT,
+    CONVERSATION_DISCIPLINE_ACT_FIRST,
+    CONVERSATION_DISCIPLINE_ASK_FIRST,
     VISUALIZATIONS_MARKDOWN_OUTPUT_FORMAT_PROMPT,
     VISUALIZATIONS_HTML_OUTPUT_FORMAT_PROMPT,
 )
@@ -124,10 +126,12 @@ def _build_visualizations_section(
     def build(
         self,
         *,
+        conversation_started: str,
         current_datetime: str,
         system_prompt_context: SystemPromptContext,
         proactive_dashboards: bool,
         output_dir: str,
+        act_first: bool = True,
         tool_defs: list["ToolDef"] | None = None,
         memory_context: str = "",
         project_context: str = "",
@@ -146,11 +150,17 @@ def build(
         if prefix:
             prompt += f"{prefix}\n\n"
 
+        conversation_discipline = (
+            CONVERSATION_DISCIPLINE_ACT_FIRST if act_first
+            else CONVERSATION_DISCIPLINE_ASK_FIRST
+        )
+
         prompt += CHAT_SYSTEM_PROMPT.format(
             runtime_context=system_prompt_context.runtime_context,
             artifacts_section=ARTIFACTS_PROMPT,
             visualizations_section=visualizations_section,
-            current_datetime=current_datetime,
+            conversation_discipline=conversation_discipline,
+            conversation_started=conversation_started,
         )
 
         prompt += "\n\n" + BACKEND_GENERATION_PROMPT.format(output_dir=output_dir)
@@ -159,8 +169,8 @@ def build(
         if tool_prompts:
             prompt += tool_prompts
 
-        if memory_context:
-            prompt += memory_context
+        # Stable, per-session content goes before the volatile tail so the
+        # prefix stays cache-stable across turns.
         if project_context:
             prompt += project_context
         if self_awareness_context:
@@ -176,6 +186,18 @@ def build(
         if suffix:
             prompt += f"\n\n{suffix}"
 
+        # Volatile tail — LAST so everything above can be cached. The live
+        # clock and the relevance-filtered memory snapshot both change every
+        # turn, so they sit after the cache-stable prefix and never invalidate
+        # it. (The prefix carries only the fixed "conversation started" stamp.)
+        prompt += (
+            f"\n\nCurrent date and time: {current_datetime}\n"
+            "(Earlier messages are prefixed with the time they were sent; that "
+            "bracketed timestamp is metadata, not part of the message text.)"
+        )
+        if memory_context:
+            prompt += memory_context
+
         return prompt
 
 
diff --git a/anton/core/llm/prompts.py b/anton/core/llm/prompts.py
index 245a48c1..527642f2 100644
--- a/anton/core/llm/prompts.py
+++ b/anton/core/llm/prompts.py
@@ -7,7 +7,7 @@
 solve problems. You are NOT a code assistant or chatbot. You are a coworker with a \
 computer, and you use that computer to get things done.
 
-Current date and time: {current_datetime}
+Conversation started: {conversation_started}
 
 WHO YOU ARE:
 - You solve problems — not just write code. If someone needs emails classified, data \
@@ -160,15 +160,7 @@
 
 {visualizations_section}
 
-CONVERSATION DISCIPLINE (critical):
-- If you ask the user a question, STOP and WAIT for their reply. Never ask a question \
-and then act in the same turn — that skips the user's answer.
-- Only act when you have ALL the information you need. If you're unsure \
-about anything, ask first, then act in a LATER turn after receiving the answer.
-- When the user gives a vague answer (like "yeah", "the current one", "sure"), interpret \
-it in context of what you just asked. Do not ask them to repeat themselves.
-- Gather requirements incrementally through conversation. Do not front-load every \
-possible question at once — ask 1-3 at a time, then follow up.
+{conversation_discipline}
 
 RUNTIME IDENTITY:
 {runtime_context}
@@ -185,6 +177,8 @@
 different data sources for the same information, caching/retrying with backoff, etc.
 - Exhaust at least 2-3 genuinely different approaches before involving the user. Each \
 attempt should be a meaningfully different strategy — not just retrying the same thing.
+- If a scratchpad cell errors the same way twice, change strategy — don't re-run the \
+same code expecting a different result.
 - Only ask the user for things that truly require them: credentials they haven't shared, \
 ambiguous requirements you can't infer, access to private/internal systems, or a choice \
 between equally valid options.
@@ -192,6 +186,9 @@
 so the user has full context and doesn't suggest things you've already done.
 
 GENERAL RULES:
+- Validate your output before claiming the task is done — actually check the result \
+(inspect the data, run it, confirm the file/artifact exists and looks right) instead of \
+assuming it worked. Report what you verified, not what you intended.
 - Be conversational, concise, and direct. No filler. No bullet-point dumps unless asked.
 - Respond naturally to greetings, small talk, and follow-up questions.
 - When describing yourself, focus on problem-solving and collaboration — not listing \
@@ -210,6 +207,44 @@
 Only encode genuinely reusable knowledge — not transient conversation details.
 """
 
+# ---------------------------------------------------------------------------
+# Conversation discipline — two postures, selected by the `act_first` flag
+# (ChatSessionConfig.act_first → AntonSettings.act_first; default True).
+# Injected into CHAT_SYSTEM_PROMPT via {conversation_discipline}.
+# ---------------------------------------------------------------------------
+CONVERSATION_DISCIPLINE_ACT_FIRST = """CONVERSATION DISCIPLINE (critical):
+- Bias toward ACTION. When a request has a reasonable default interpretation, act on it \
+now — do not stall the task with a clarifying question. A delivered result the user can \
+correct beats a question that makes them wait.
+- STATE YOUR ASSUMPTIONS AS YOU MAKE THEM. Whenever you proceed on an assumption — a \
+default value, an interpretation of a vague request, a chosen approach, or a scope you \
+picked — say so plainly in the SAME response, right as you act, not buried at the end. \
+Phrase it like "Assuming you mean X (the common case), so I'll…" or "Going with monthly \
+granularity since you didn't specify." Surface each assumption as it happens so the user \
+can redirect mid-flight instead of being blocked up front. Acting silently is wrong; \
+acting out loud with your assumptions visible is right.
+- Only STOP and ASK when acting on a guess would be costly to undo or is genuinely \
+unknowable: destructive or irreversible actions (deleting data, spending money, sending \
+messages on the user's behalf), credentials or access you can't obtain, or a fork where \
+the options lead to materially different results and you have no basis to choose. Then ask \
+ONE tight question — and when you ask, STOP and WAIT for the reply; never ask and act in \
+the same turn, that skips their answer.
+- When the user gives a vague answer (like "yeah", "the current one", "sure"), interpret \
+it in context of what you just asked. Do not ask them to repeat themselves.
+- Don't front-load a questionnaire. Prefer acting on sensible defaults (stated out loud) \
+over interrogating the user; if something truly gates the work, ask at most 1-2 things."""
+
+CONVERSATION_DISCIPLINE_ASK_FIRST = """CONVERSATION DISCIPLINE (critical):
+- If you ask the user a question, STOP and WAIT for their reply. Never ask a question \
+and then act in the same turn — that skips the user's answer.
+- Only act when you have ALL the information you need. If you're unsure \
+about anything, ask first, then act in a LATER turn after receiving the answer.
+- When the user gives a vague answer (like "yeah", "the current one", "sure"), interpret \
+it in context of what you just asked. Do not ask them to repeat themselves.
+- Gather requirements incrementally through conversation. Do not front-load every \
+possible question at once — ask 1-3 at a time, then follow up."""
+
+
 # ---------------------------------------------------------------------------
 # Artifact contract — universal entry point for any user-facing output
 # ---------------------------------------------------------------------------
@@ -322,8 +357,8 @@
 Do NOT build a single 20KB+ HTML string in memory and write it at the end.
   3. CAP STRING SIZE PER CELL at ~5KB. Large-string scratchpad calls are the \
 single biggest cause of silent failures (the tool occasionally drops the \
-`code` payload on oversized inputs and returns "No code provided", which still \
-counts against the round cap). If a section is too big, split it.
+`code` payload on oversized inputs and the cell comes back with an empty-code \
+error, which still counts against the round cap). If a section is too big, split it.
   4. NEVER re-emit the full HTML mid-build. Append deltas, don't re-print \
 the world. Assembly is a one-line concat at the end, not a re-render of \
 everything you've written so far.
@@ -810,3 +845,22 @@ async def hello():
     "a public API, archive.org, an alternate library, or a completely different data source. "
     "Only involve the user if the problem truly requires something only they can provide."
 )
+
+# Scratchpad failures need different advice than the generic (scrape/fetch)
+# RESILIENCE_NUDGE above — telling the model to "try a public API / archive.org"
+# when a cell is too big or too slow just sends it renaming-and-retrying. These
+# are chosen by failure type in ChatSession._apply_error_tracking.
+SCRATCHPAD_SIZE_NUDGE = (
+    "\n\nSYSTEM: This scratchpad cell keeps failing on its size, not its logic. "
+    "Stop retrying the same large cell. Write the output to disk incrementally — "
+    "open(path, 'w') once, then open(path, 'a') to append each chunk, keeping each "
+    "cell's string under ~5KB — or generate the content inside the cell instead of "
+    "passing a large literal. Reuse the SAME scratchpad; do not rename it."
+)
+SCRATCHPAD_TIMEOUT_NUDGE = (
+    "\n\nSYSTEM: This scratchpad cell keeps timing out — the work is too heavy, not "
+    "the write. Make the next cell smaller: fewer rows/items per cell, split a long "
+    "loop across cells (process a batch, return, continue), or narrow the scope. Call "
+    "progress() inside long loops so active work isn't mistaken for a hang. Reuse the "
+    "SAME scratchpad; do not rename it."
+)
diff --git a/anton/core/memory/acc.py b/anton/core/memory/acc.py
index 17c35e72..61b88cb1 100644
--- a/anton/core/memory/acc.py
+++ b/anton/core/memory/acc.py
@@ -446,26 +446,33 @@ def detect_reset_churn(events: Sequence[Event]) -> Lesson | None:
 
 
 def detect_kill_loop(events: Sequence[Event]) -> Lesson | None:
-    """The same scratchpad name had >= N cells killed (timeout/cancel/OOM).
+    """>= N scratchpad cells were killed (timeout/cancel/OOM) in one turn.
+
+    Fires when a single scratchpad is killed >= N times (a per-pad loop) OR
+    when >= N cells are killed across the turn regardless of name. The
+    name-agnostic count is deliberate: renaming the scratchpad between failed
+    attempts (`build_pres` → `write_html` → …) used to split the kill count
+    across buckets and hide the loop. A kill is a kill, and the right lesson
+    (make the next cell smaller) is the same either way.
 
     Reads `kind == "scratchpad_killed"`; looks at `detail.name`.
     """
+    killed = [e for e in events if e.kind == "scratchpad_killed"]
     by_name: defaultdict[str, int] = defaultdict(int)
-    for e in events:
-        if e.kind != "scratchpad_killed":
-            continue
+    for e in killed:
         n = e.detail.get("name") or ""
         if n:
             by_name[n] += 1
-    if not by_name or max(by_name.values()) < _KILL_LOOP_THRESHOLD:
+    per_name_max = max(by_name.values()) if by_name else 0
+    if per_name_max < _KILL_LOOP_THRESHOLD and len(killed) < _KILL_LOOP_THRESHOLD:
         return None
     return Lesson(
         rule=(
-            "When a scratchpad cell is killed (timeout, cancel, OOM), "
-            "the next cell on the same scratchpad needs to be smaller — "
-            "fewer rows, smaller batch, explicit timeout, narrower scope. "
-            "Two kills on the same scratchpad means the approach itself is "
-            "too heavy, not that the same cell needs another try."
+            "When a scratchpad cell is killed (timeout, cancel, OOM), the next "
+            "cell needs to be smaller — fewer rows, smaller batch, explicit "
+            "timeout, narrower scope — and stay on the SAME scratchpad. Two "
+            "kills in a turn (even across renamed scratchpads) mean the approach "
+            "is too heavy, not that the same cell needs another try."
         ),
         kind="when",
         triggers=("scratchpad_killed",),
diff --git a/anton/core/runtime.py b/anton/core/runtime.py
index 5f10f510..82783506 100644
--- a/anton/core/runtime.py
+++ b/anton/core/runtime.py
@@ -185,6 +185,7 @@ async def build_chat_session(
         history_store=history_store,
         session_id=session_id,
         proactive_dashboards=settings.proactive_dashboards,
+        act_first=settings.act_first,
         tools=list(extra_tools) if extra_tools else [],
     )
     return ChatSession(config)
diff --git a/anton/core/session.py b/anton/core/session.py
index 58abf956..742ee377 100644
--- a/anton/core/session.py
+++ b/anton/core/session.py
@@ -3,6 +3,7 @@
 import asyncio
 from collections.abc import AsyncIterator, Callable
 from dataclasses import asdict, dataclass, field
+from datetime import datetime
 import json
 import re
 from typing import TYPE_CHECKING, List
@@ -17,7 +18,11 @@
 from anton.core.memory.cerebellum import Cerebellum
 from anton.core.memory.skills import SkillStore
 from anton.core.tools.recall_skill import RECALL_SKILL_TOOL
-from anton.core.llm.prompts import RESILIENCE_NUDGE
+from anton.core.llm.prompts import (
+    RESILIENCE_NUDGE,
+    SCRATCHPAD_SIZE_NUDGE,
+    SCRATCHPAD_TIMEOUT_NUDGE,
+)
 from anton.core.llm.provider import (
     ContextOverflowError,
     StreamComplete,
@@ -48,7 +53,11 @@
     UPDATE_ARTIFACT_METADATA_TOOL,
     ToolDef,
 )
-from anton.core.utils.scratchpad import prepare_scratchpad_exec, format_cell_result
+from anton.core.utils.scratchpad import (
+    prepare_scratchpad_exec,
+    format_cell_result,
+    observe_scratchpad_cell,
+)
 
 from anton.explainability import ExplainabilityCollector, ExplainabilityStore
 
@@ -59,6 +68,11 @@
 from anton.core.settings import CoreSettings
 
 
+# Sentinel prefixing a compacted-history summary so later compactions can
+# recognize and update it in place rather than summarize a summary.
+_COMPACTED_MARKER = "[COMPACTED CONTEXT — REFERENCE ONLY]"
+
+
 if TYPE_CHECKING:
     from rich.console import Console
     from anton.context.self_awareness import SelfAwarenessContext
@@ -112,6 +126,10 @@ class ChatSessionConfig:
     # host didn't identify itself.
     harness: str | None = None
     proactive_dashboards: bool = False
+    # When True (default), Anton acts on reasonable defaults and surfaces its
+    # assumptions inline instead of stopping to ask ("do first, ask later").
+    # When False, it falls back to the cautious ask-first discipline.
+    act_first: bool = True
     tools: list[ToolDef] = field(default_factory=list)
     output_dir: str = ".anton/output"
     # Web tools — on by default. Each is independently resolved at session
@@ -120,6 +138,13 @@ class ChatSessionConfig:
     # (registered on the tool registry). See ChatSession.__init__.
     web_search_enabled: bool = True
     web_fetch_enabled: bool = True
+    # When the task (conversation) was created. Rendered as a fixed
+    # "Conversation started: …" line in the cache-stable prompt prefix — it
+    # never changes across turns, so it doesn't bust the prefix cache. The
+    # LIVE current time goes in the volatile tail instead (see _build_system_prompt),
+    # so resuming a conversation days later still reports the real "now".
+    # None → fall back to today.
+    started_at: datetime | None = None
 
 
 class ChatSession:
@@ -145,6 +170,8 @@ def __init__(self, config: ChatSessionConfig) -> None:
         self._system_prompt_context = config.system_prompt_context
         self._output_dir = config.output_dir
         self._proactive_dashboards = config.proactive_dashboards
+        self._act_first = config.act_first
+        self._started_at = config.started_at
         self._extra_tools = config.tools
         self._workspace = config.workspace
         self._data_vault = config.data_vault
@@ -225,16 +252,17 @@ def _acc_has_similar(rule: str) -> bool:
         # turn. Mirrors ANTON_MEMORY_MODE for shape consistency:
         #   "off"     — ACC observes nothing (skipped at every emit site).
         #   "passive" — Layer 1: lessons drain to memory at end-of-turn,
-        #               next turn's system prompt picks them up. SAFE
-        #               DEFAULT — adds no surface-area to the turn loop.
-        #   "active"  — Layer 2: ALSO inject lessons inline as text
-        #               blocks in tool_results so the LLM sees them on
-        #               the very next round. Stronger learning signal,
-        #               but more invasive — the LLM has to handle the
-        #               nudge gracefully without confusing it for a
-        #               user instruction.
-        _mode_raw = os.environ.get("ANTON_ACC_MODE", "passive").strip().lower()
-        self._acc_mode = _mode_raw if _mode_raw in ("off", "passive", "active") else "passive"
+        #               next turn's system prompt picks them up. No
+        #               surface-area on the turn loop.
+        #   "active"  — Layer 2 (DEFAULT): ALSO inject lessons inline as
+        #               text blocks in tool_results so the LLM sees them on
+        #               the very next round and can self-correct mid-task.
+        #               Stronger signal; the nudge is clearly labelled as an
+        #               automatic self-check (not a user instruction). Set
+        #               ANTON_ACC_MODE=passive to revert to learn-next-turn,
+        #               or =off to disable, if it ever causes trouble.
+        _mode_raw = os.environ.get("ANTON_ACC_MODE", "active").strip().lower()
+        self._acc_mode = _mode_raw if _mode_raw in ("off", "passive", "active") else "active"
         # Scratchpad observers — list of objects with on_pre_execute /
         # on_post_execute. Fired by handle_scratchpad around pad.execute.
         # The runtime never sees this list; observation lives at the
@@ -303,8 +331,10 @@ def _apply_error_tracking(
 
         streak = error_streak.get(tool_name, 0)
         if streak >= self._resilience_nudge_at and tool_name not in resilience_nudged:
-            result_text += RESILIENCE_NUDGE
-            resilience_nudged.add(tool_name)
+            nudge = self._select_resilience_nudge(tool_name, result_text)
+            if nudge:
+                result_text += nudge
+                resilience_nudged.add(tool_name)
 
         if streak >= self._max_consecutive_errors:
             result_text += (
@@ -315,6 +345,34 @@ def _apply_error_tracking(
 
         return result_text
 
+    @staticmethod
+    def _select_resilience_nudge(tool_name: str, result_text: str) -> str:
+        """Pick the right soft-nudge for a repeated failure.
+
+        The generic RESILIENCE_NUDGE is scrape/fetch advice ("try a public
+        API / archive.org / different headers"). That actively misdirects a
+        scratchpad failure: a cell that's too big or too slow doesn't need a
+        different data source, it needs to be chunked or scoped down. Route
+        scratchpad failures to size/timeout-specific guidance by inspecting
+        the error text; a generic scratchpad error (e.g. a SyntaxError) and
+        every non-scratchpad tool keep the generic nudge.
+        """
+        if tool_name != "scratchpad":
+            return RESILIENCE_NUDGE
+        low = result_text.lower()
+        if "timed out" in low or "inactivity" in low:
+            return SCRATCHPAD_TIMEOUT_NUDGE
+        # Match the empty-code dispatcher message specifically — generic
+        # phrases like "too large"/"truncated" appear in unrelated errors
+        # (e.g. a MySQL "Data truncated for column" warning) and would
+        # misfire the chunking advice.
+        if "argument was empty" in low:
+            return SCRATCHPAD_SIZE_NUDGE
+        # Other scratchpad failures (syntax/runtime errors): the generic
+        # "you've failed twice, change approach" nudge still applies — only
+        # the size/timeout cases get specialised advice.
+        return RESILIENCE_NUDGE
+
     def repair_history(self) -> None:
         """Fix dangling tool_use blocks left by mid-stream cancellation.
 
@@ -536,8 +594,16 @@ def _record_cell_explainability(
     async def _build_system_prompt(self, user_message: str = "") -> str:
         import datetime as _dt
 
-        _now = _dt.datetime.now()
-        _current_datetime = _now.strftime("%A, %B %d, %Y at %I:%M %p")
+        # Two stamps, deliberately split for cache-stability AND correctness:
+        #  • conversation_started — the task's creation time (self._started_at),
+        #    a FIXED fact rendered in the cache-stable prefix; identical every
+        #    turn so it never busts the prefix cache.
+        #  • current_datetime — the real wall clock, rendered in the VOLATILE
+        #    tail (after the cached prefix) so it's always accurate even when a
+        #    conversation is resumed days/weeks later, without touching the cache.
+        _started = self._started_at or _dt.datetime.now()
+        _conversation_started = _started.strftime("%A, %B %d, %Y")
+        _current_datetime = _dt.datetime.now().strftime("%A, %B %d, %Y at %I:%M %p")
 
         # Inject memory context (replaces old self_awareness)
         memory_section = ""
@@ -562,9 +628,11 @@ async def _build_system_prompt(self, user_message: str = "") -> str:
 
         prompt_builder = ChatSystemPromptBuilder()
         prompt = prompt_builder.build(
+            conversation_started=_conversation_started,
             current_datetime=_current_datetime,
             system_prompt_context=self._system_prompt_context,
             proactive_dashboards=self._proactive_dashboards,
+            act_first=self._act_first,
             output_dir=self._output_dir,
             tool_defs=self.tool_registry.get_tool_defs(),
             memory_context=memory_section,
@@ -763,12 +831,18 @@ async def _summarize_history(self) -> None:
         old_turns = self._history[:split]
         recent_turns = self._history[split:]
 
-        # Serialize old turns into text for summarization
+        # Serialize old turns. Pull out any prior compacted summary so we
+        # UPDATE it in place rather than summarize a summary (which compounds
+        # loss every compaction).
+        prior_summary = ""
         lines: list[str] = []
         for msg in old_turns:
             role = msg.get("role", "unknown")
             content = msg.get("content", "")
             if isinstance(content, str):
+                if content.lstrip().startswith(_COMPACTED_MARKER):
+                    prior_summary = content
+                    continue
                 lines.append(f"[{role}]: {content[:2000]}")
             elif isinstance(content, list):
                 for block in content:
@@ -789,17 +863,43 @@ async def _summarize_history(self) -> None:
         if len(old_text) > 8000:
             old_text = old_text[:8000] + "\n... (truncated)"
 
+        if prior_summary:
+            user_content = (
+                "PREVIOUS SUMMARY (update this in place — merge the new turns into it, "
+                "don't restate it verbatim):\n"
+                f"{prior_summary}\n\n"
+                "NEW TURNS TO FOLD IN:\n"
+                f"{old_text}"
+            )
+        else:
+            user_content = old_text
+
         try:
+            # 3b-full: a structured, in-place-updated STATE RECORD rather than a
+            # freeform blob — so "Remaining" work survives compaction instead of
+            # being flattened into prose.
             summary_response = await self._llm.code(
                 system=(
-                    "Summarize this conversation history concisely. Preserve:\n"
-                    "- Key decisions and conclusions\n"
-                    "- Important data/results discovered\n"
-                    "- Variable names and values that are still relevant\n"
-                    "- Errors encountered and how they were resolved\n"
-                    "Keep it under 2000 tokens. Use bullet points."
+                    "You compact an agent's earlier conversation into a terse, factual "
+                    "STATE RECORD (not prose). Output only these sections, omitting any "
+                    "that are empty:\n"
+                    "## Goal — what the user ultimately wants\n"
+                    "## Constraints — explicit requirements / preferences / do-nots\n"
+                    "## Completed — work already done, each as `action → outcome`\n"
+                    "## Active state — variables, data, files/artifacts in play and their "
+                    "current values or paths\n"
+                    "## Blocked — anything stuck and why\n"
+                    "## Decisions — choices made and the reason\n"
+                    "## Remaining — what is still left to do\n\n"
+                    "Preserve the date/time of key events when it matters (e.g. "
+                    "`Completed (2026-06-05): …`) — the raw per-message timestamps are "
+                    "gone after compaction, so keep the ones that anchor the timeline.\n"
+                    "If a PREVIOUS SUMMARY is provided, update it with the new turns "
+                    "instead of starting over. If the user changed direction, narrowed "
+                    "scope, or cancelled something, reflect that — drop superseded items "
+                    "from Remaining, don't keep them. Keep it under ~2000 tokens."
                 ),
-                messages=[{"role": "user", "content": old_text}],
+                messages=[{"role": "user", "content": user_content}],
                 max_tokens=2048,
             )
             summary = summary_response.content or "(summary unavailable)"
@@ -807,17 +907,26 @@ async def _summarize_history(self) -> None:
             # If summarization fails, just do a simple truncation
             summary = f"(Earlier conversation with {len(old_turns)} turns — summarization failed)"
 
-        summary_msg = {
-            "role": "user",
-            "content": f"[Context summary of earlier conversation]\n{summary}",
-        }
+        # 3b-light: reference-only framing so the model treats this as compacted
+        # history, not a fresh instruction, and never resumes superseded/cancelled
+        # work after a compaction (which Anton's auto-continue verifier would
+        # otherwise be nudged to do).
+        summary_body = (
+            f"{_COMPACTED_MARKER}\n"
+            "Compacted record of earlier conversation, for REFERENCE ONLY — not a new "
+            "request. The most recent user message takes priority; if the user changed "
+            "direction, narrowed scope, or cancelled something, follow that and do NOT "
+            "resume superseded work described below.\n\n"
+            f"{summary}"
+        )
+        summary_msg = {"role": "user", "content": summary_body}
 
         # If the recent portion starts with a user message, insert a minimal
         # assistant separator to avoid consecutive user messages (API error).
         if recent_turns and recent_turns[0].get("role") == "user":
             self._history = [
                 summary_msg,
-                {"role": "assistant", "content": "Understood."},
+                {"role": "assistant", "content": "Understood — using that as reference."},
                 *recent_turns,
             ]
         else:
@@ -1791,6 +1900,15 @@ async def _stream_and_handle_tools(
                                         description=description,
                                         cell=cell,
                                     )
+                                    # Same post-execute ACC event as the CLI
+                                    # path (handle_scratchpad) — this inline
+                                    # streaming exec bypasses that handler, so
+                                    # without this scratchpad_killed/result
+                                    # would never fire here and detect_kill_loop
+                                    # would be blind in the streaming product.
+                                    observe_scratchpad_cell(
+                                        self, tc.input.get("name", ""), cell
+                                    )
                                     yield StreamToolResult(
                                         name=tc.name,
                                         action="exec",
diff --git a/anton/core/settings.py b/anton/core/settings.py
index fb631a1b..46e6b07b 100644
--- a/anton/core/settings.py
+++ b/anton/core/settings.py
@@ -17,6 +17,8 @@ class CoreSettings(BaseSettings):
     cell_timeout_default: int = 120  # Total timeout when no estimate given (s)
     cell_inactivity_timeout: int = 30  # Max silence between output lines (s)
     cell_inactivity_after_progress: int = 60  # Grace window after progress() call (s)
+    cell_inactivity_max: int = 60  # Ceiling on the silence window even when a large estimate scales it up (s)
+    cell_total_max: int = 0  # Optional absolute ceiling on total cell runtime (s); 0 = off (let it scale)
     cell_install_timeout: int = 120  # pip/uv install timeout (s)
     cell_keep_recent: int = 5  # Recent cells preserved during compaction
 
diff --git a/anton/core/tools/tool_defs.py b/anton/core/tools/tool_defs.py
index 2b0b182c..c37a24b5 100644
--- a/anton/core/tools/tool_defs.py
+++ b/anton/core/tools/tool_defs.py
@@ -93,6 +93,10 @@ class ToolDef:
                 "type": "integer",
                 "description": "Estimated execution time in seconds. Drives the total timeout (roughly 2x estimate). Use progress() for long cells.",
             },
+            "confirm_new_scratchpad": {
+                "type": "boolean",
+                "description": "Set true only to deliberately create a SECOND scratchpad while one is already in use this task. Normally reuse one scratchpad name for the whole task — each name is a separate isolated environment, so a new one loses all existing state. Leave unset/false unless you truly need isolation.",
+            },
         },
         "required": ["action", "name"],
     },
diff --git a/anton/core/tools/tool_handlers.py b/anton/core/tools/tool_handlers.py
index c23ca94e..6c8625a2 100644
--- a/anton/core/tools/tool_handlers.py
+++ b/anton/core/tools/tool_handlers.py
@@ -4,7 +4,11 @@
 from typing import TYPE_CHECKING
 
 from anton.core.backends.base import Cell
-from anton.core.utils.scratchpad import prepare_scratchpad_exec, format_cell_result
+from anton.core.utils.scratchpad import (
+    prepare_scratchpad_exec,
+    format_cell_result,
+    observe_scratchpad_cell,
+)
 
 if TYPE_CHECKING:
     from anton.chat_session import ChatSession
@@ -408,25 +412,17 @@ def _acc_observe(kind: str, detail: dict, *, severity: int = 1) -> None:
             fn(kind, detail, severity=severity)
 
     if action == "exec":
+        # The single-scratchpad guard and the pre-execute ACC events
+        # (scratchpad_empty_code / scratchpad_call) live in
+        # prepare_scratchpad_exec — the SHARED entry point that the streaming
+        # path (ChatSession.turn_stream) also calls — so they fire on both
+        # paths. A str return is a message the call should not run past
+        # (empty code, single-scratchpad challenge, or install failure).
         result = await prepare_scratchpad_exec(session, tc_input)
         if isinstance(result, str):
-            # Empty / malformed code parameter — the dispatcher rejected
-            # it before reaching the runtime. This is exactly the
-            # "silent code-clip" failure mode the ACC's
-            # detect_oversized_cell watches for.
-            _acc_observe("scratchpad_empty_code", {"name": name}, severity=7)
             return result
         pad, code, description, estimated_time, estimated_seconds = result
 
-        _acc_observe(
-            "scratchpad_call",
-            {
-                "name": name,
-                "code_len": len(code or ""),
-                "one_line_description": description or "",
-            },
-        )
-
         # Notify pre-execute observers (e.g. cerebellum). The runtime
         # never sees these — observation is an orchestration concern,
         # so it lives at the dispatcher layer where the data is most
@@ -452,31 +448,9 @@ def _acc_observe(kind: str, detail: dict, *, severity: int = 1) -> None:
                 pad_name=name, description=description, cell=cell,
             )
             await _fire_post_execute(session, cell)
-            # ACC: distinguish "killed" (timeout/cancel/OOM) from a
-            # plain runtime error. The local backend sets cell.error
-            # to a string starting with "Cancelled" or matching the
-            # "Cell timed out"/"Cell killed" prefixes from the
-            # asyncio.TimeoutError path. Everything else (NameError,
-            # ImportError, …) is a regular result with success=False.
-            err = (cell.error or "").strip()
-            if err.startswith(("Cancelled", "Cell timed out", "Cell killed")):
-                _acc_observe(
-                    "scratchpad_killed",
-                    {"name": name, "reason": err[:120]},
-                    severity=6,
-                )
-            else:
-                success = not err and not (cell.stderr or "").strip()
-                _acc_observe(
-                    "scratchpad_result",
-                    {
-                        "name": name,
-                        "success": success,
-                        "stdout_len": len(cell.stdout or ""),
-                        "error": err[:300] if err else "",
-                    },
-                    severity=5 if not success else 1,
-                )
+            # Post-execute ACC event (killed vs result) via the shared helper —
+            # the streaming path emits the same.
+            observe_scratchpad_cell(session, name, cell)
         return format_cell_result(cell)
 
     elif action == "view":
diff --git a/anton/core/utils/scratchpad.py b/anton/core/utils/scratchpad.py
index da518ff4..2cf15bc6 100644
--- a/anton/core/utils/scratchpad.py
+++ b/anton/core/utils/scratchpad.py
@@ -5,16 +5,107 @@
     from anton.core.session import ChatSession
 
 
+def _acc_observe(session, kind: str, detail: dict, *, severity: int = 1) -> None:
+    """Safe ACC emit — no-op if the session has no observer wired."""
+    fn = getattr(session, "_acc_observe", None)
+    if fn is not None:
+        fn(kind, detail, severity=severity)
+
+
+def observe_scratchpad_cell(session, name: str, cell) -> None:
+    """Emit the post-execute ACC event for a finished cell.
+
+    Distinguishes a kill (timeout/cancel/OOM) from a plain runtime error so
+    detect_kill_loop sees `scratchpad_killed`. Shared by both exec paths —
+    `handle_scratchpad` (CLI `turn()`) and the inline streaming exec in
+    `ChatSession.turn_stream` — so the ACC instrumentation is identical
+    regardless of which path ran the cell.
+    """
+    if cell is None:
+        return
+    err = (cell.error or "").strip()
+    if err.startswith(("Cancelled", "Cell timed out", "Cell killed")):
+        _acc_observe(session, "scratchpad_killed", {"name": name, "reason": err[:120]}, severity=6)
+    else:
+        success = not err and not (cell.stderr or "").strip()
+        _acc_observe(
+            session,
+            "scratchpad_result",
+            {
+                "name": name,
+                "success": success,
+                "stdout_len": len(cell.stdout or ""),
+                "error": err[:300] if err else "",
+            },
+            severity=5 if not success else 1,
+        )
+
+
 async def prepare_scratchpad_exec(session: ChatSession, tc_input: dict):
     """Validate and prepare a scratchpad exec call.
 
     Returns (pad, code, description, estimated_time, estimated_seconds) or
-    a str error message if validation fails.
+    a str message if the call should not run (empty code, a single-scratchpad
+    challenge, or a failed package install).
+
+    This is the SHARED entry point for both exec paths — `handle_scratchpad`
+    (CLI) and the inline streaming exec in `ChatSession.turn_stream` (cowork)
+    both call it — so the single-scratchpad guard and the pre-execute ACC
+    events live here, not in `handle_scratchpad` (which the streaming path
+    bypasses).
     """
     name = tc_input.get("name", "")
     code = tc_input.get("code", "")
     if not code or not code.strip():
-        return "No code provided."
+        # An empty `code` on an exec call is almost never the model meaning
+        # to run nothing — it's the large-payload drop: an oversized `code`
+        # argument gets truncated to "" in transit. Returning a bare "no
+        # code" here used to read as a no-op, so the model would retry the
+        # same oversized cell. Make the failure self-correcting and ensure
+        # it reads as an error (note the word "failed") so the per-tool
+        # error streak in _apply_error_tracking counts it toward the
+        # circuit breaker instead of silently resetting.
+        _acc_observe(session, "scratchpad_empty_code", {"name": name}, severity=7)
+        return (
+            "Scratchpad exec failed: the `code` argument was empty. This usually "
+            "means the code payload was too large and got truncated in transit. "
+            "Do NOT retry the same large cell — instead write the output to disk in "
+            "small append steps (open(path, 'a'), keep each cell's string under ~5KB), "
+            "or generate the content inside the cell rather than passing a big literal."
+        )
+
+    # Single-scratchpad guard: the agent should reuse ONE scratchpad per task.
+    # A new name spins up a separate, empty process — state from the existing
+    # pad isn't visible there — a common source of wasted rounds (re-import,
+    # re-fetch, shuffling state across pads). Challenge a new name when the
+    # agent already has a working scratchpad this session, unless it confirms
+    # it needs isolation. Tracked names are ones the agent has exec'd here —
+    # NOT session._scratchpads.pads, which also holds system-created pads
+    # (e.g. the artifact backend launcher's slug pad), which must never count
+    # against the agent. Challenge AT MOST ONCE per session: the challenge is
+    # not an error (it resets no streak), so re-challenging every new name
+    # could loop to the round cap with nothing to stop it; one firm nudge is
+    # the enforcement, then respect the model's choice. `is True` (not
+    # truthiness) so a MagicMock attr in tests doesn't read as "challenged".
+    seen = getattr(session, "_agent_scratchpad_names", None)
+    if not isinstance(seen, set):
+        seen = set()
+        session._agent_scratchpad_names = seen
+    confirm_new = bool(tc_input.get("confirm_new_scratchpad", False))
+    challenged_before = getattr(session, "_scratchpad_challenged", False) is True
+    if name not in seen and seen and not confirm_new and not challenged_before:
+        session._scratchpad_challenged = True
+        existing = "', '".join(sorted(seen))
+        return (
+            f"You already have an active scratchpad ('{existing}') with live state "
+            f"(imports, variables, fetched data). Starting a new one named '{name}' "
+            "creates a SEPARATE, empty environment — nothing from the existing "
+            "scratchpad is available there, so you'd re-import and re-fetch. Reuse the "
+            "existing scratchpad for this task; it is stateful across cells. If you "
+            "genuinely need an isolated environment, call scratchpad exec again with "
+            "confirm_new_scratchpad=true."
+        )
+    seen.add(name)
 
     pad = await session._scratchpads.get_or_create(name)
 
@@ -34,6 +125,15 @@ async def prepare_scratchpad_exec(session: ChatSession, tc_input: dict):
             estimated_seconds = 0
 
     estimated_time = f"{estimated_seconds}s" if estimated_seconds > 0 else ""
+    _acc_observe(
+        session,
+        "scratchpad_call",
+        {
+            "name": name,
+            "code_len": len(code or ""),
+            "one_line_description": description or "",
+        },
+    )
     return pad, code, description, estimated_time, estimated_seconds
 
 
diff --git a/tests/e2e/scenarios/test_loop_safety.py b/tests/e2e/scenarios/test_loop_safety.py
index 61d40deb..25fdf5a6 100644
--- a/tests/e2e/scenarios/test_loop_safety.py
+++ b/tests/e2e/scenarios/test_loop_safety.py
@@ -63,9 +63,12 @@ def test_session_exits_within_timeout(cfg, stub, tmp_path):
 
 @pytest.mark.stub_only
 def test_resilience_nudge_injected_after_two_errors(cfg, stub, tmp_path):
+    # Reuse ONE scratchpad name: a realistic retry loop is the same cell
+    # failing twice. (Distinct names would instead trip the single-scratchpad
+    # guard, which is exercised separately.)
     bad_code = "def oops(:\n    pass"
-    stub.queue_tool_call("scratchpad", {"action": "exec", "name": "bad1", "code": bad_code})
-    stub.queue_tool_call("scratchpad", {"action": "exec", "name": "bad2", "code": bad_code})
+    stub.queue_tool_call("scratchpad", {"action": "exec", "name": "bad", "code": bad_code})
+    stub.queue_tool_call("scratchpad", {"action": "exec", "name": "bad", "code": bad_code})
     stub.queue_text("NUDGE_RECEIVED")
     stub.queue_verification_ok()
     result = run_anton(["--folder", str(tmp_path)], ["do bad stuff", "exit"],
@@ -82,9 +85,12 @@ def test_resilience_nudge_injected_after_two_errors(cfg, stub, tmp_path):
 
 @pytest.mark.stub_only
 def test_circuit_breaker_fires_after_five_consecutive_errors(cfg, stub, tmp_path):
+    # Reuse ONE scratchpad name so this exercises the consecutive-error
+    # circuit breaker, not the single-scratchpad guard (distinct names would
+    # trigger a guard challenge that resets the streak).
     bad_code = "def bad(:\n    pass"
     for i in range(5):
-        stub.queue_tool_call("scratchpad", {"action": "exec", "name": f"err_{i}", "code": bad_code})
+        stub.queue_tool_call("scratchpad", {"action": "exec", "name": "err", "code": bad_code})
     stub.queue_text("ERRORS_EXHAUSTED")
     stub.queue_verification_ok()
     result = run_anton(["--folder", str(tmp_path)], ["break everything", "exit"],
diff --git a/tests/test_acc.py b/tests/test_acc.py
index 2ed7f114..448b2133 100644
--- a/tests/test_acc.py
+++ b/tests/test_acc.py
@@ -244,12 +244,16 @@ def test_fires_on_two_kills_same_name(self):
         assert lesson is not None
         assert lesson.detector == "detect_kill_loop"
 
-    def test_silent_when_kills_are_for_different_names(self):
+    def test_fires_on_kills_across_different_names(self):
+        # Renaming the scratchpad between failed attempts must NOT hide the
+        # loop — two kills in a turn fire regardless of name.
         events = [
             Event("scratchpad_killed", 6, {"name": "a", "reason": "timeout"}, 1),
             Event("scratchpad_killed", 6, {"name": "b", "reason": "timeout"}, 2),
         ]
-        assert detect_kill_loop(events) is None
+        lesson = detect_kill_loop(events)
+        assert lesson is not None
+        assert lesson.detector == "detect_kill_loop"
 
     def test_silent_on_single_kill(self):
         events = [Event("scratchpad_killed", 6, {"name": "compute"}, 3)]
diff --git a/tests/test_dispatch_error_message.py b/tests/test_dispatch_error_message.py
new file mode 100644
index 00000000..04d7ea75
--- /dev/null
+++ b/tests/test_dispatch_error_message.py
@@ -0,0 +1,32 @@
+"""`_safe_error_message` framing policy for the local dispatch loop.
+
+A spent token allowance is a quota condition, not a crash, so it must
+surface anton's already-friendly message verbatim — without the
+`[agent error]` prefix that reads like something broke. Every other
+failure keeps the prefix (and the API-key redaction it already applied).
+"""
+
+from __future__ import annotations
+
+from anton.core.dispatch.local_runtime import LocalScratchpadOrchestrator
+from anton.core.llm.provider import TokenLimitExceeded
+
+
+_TOKEN_LIMIT_MESSAGE = (
+    "Server returned 429 — Monthly limit exceeded for tokens: 5000000/5000000 "
+    "Visit https://console.mindshub.ai to upgrade or to top up your tokens."
+)
+
+
+def test_token_limit_message_has_no_agent_error_prefix():
+    rendered = LocalScratchpadOrchestrator._safe_error_message(
+        TokenLimitExceeded(_TOKEN_LIMIT_MESSAGE)
+    )
+    assert rendered == _TOKEN_LIMIT_MESSAGE
+    assert "[agent error]" not in rendered
+
+
+def test_generic_error_keeps_agent_error_prefix():
+    rendered = LocalScratchpadOrchestrator._safe_error_message(ValueError("boom"))
+    assert rendered.startswith("[agent error]")
+    assert "boom" in rendered
diff --git a/tests/test_resilience_nudge.py b/tests/test_resilience_nudge.py
new file mode 100644
index 00000000..7a62a959
--- /dev/null
+++ b/tests/test_resilience_nudge.py
@@ -0,0 +1,45 @@
+"""Tests for ChatSession._select_resilience_nudge — failure-type-aware nudging.
+
+The generic RESILIENCE_NUDGE is scrape/fetch advice and misdirects scratchpad
+failures (a too-big or too-slow cell doesn't need a different data source). The
+selector routes scratchpad size/timeout failures to specific guidance and keeps
+the generic nudge for everything else.
+"""
+
+from __future__ import annotations
+
+from anton.core.llm.prompts import (
+    RESILIENCE_NUDGE,
+    SCRATCHPAD_SIZE_NUDGE,
+    SCRATCHPAD_TIMEOUT_NUDGE,
+)
+from anton.core.session import ChatSession
+
+_select = ChatSession._select_resilience_nudge
+
+
+class TestSelectResilienceNudge:
+    def test_non_scratchpad_tool_gets_generic_nudge(self):
+        assert _select("web_fetch", "failed to fetch the page") == RESILIENCE_NUDGE
+
+    def test_scratchpad_timeout_gets_timeout_nudge(self):
+        assert _select("scratchpad", "Cell timed out after 180s total") == SCRATCHPAD_TIMEOUT_NUDGE
+
+    def test_scratchpad_inactivity_gets_timeout_nudge(self):
+        msg = "Cell killed after 60s of inactivity (no output or progress() calls)"
+        assert _select("scratchpad", msg) == SCRATCHPAD_TIMEOUT_NUDGE
+
+    def test_scratchpad_empty_code_gets_size_nudge(self):
+        msg = "Scratchpad exec failed: the `code` argument was empty. ..."
+        assert _select("scratchpad", msg) == SCRATCHPAD_SIZE_NUDGE
+
+    def test_scratchpad_generic_error_gets_generic_nudge(self):
+        # A NameError-style failure is neither size nor timeout; it still gets
+        # the generic "failed twice, change approach" nudge (only size/timeout
+        # get specialised scratchpad advice).
+        assert _select("scratchpad", "[error]\nNameError: name 'data' is not defined") == RESILIENCE_NUDGE
+
+    def test_scratchpad_nudges_never_mention_scraping(self):
+        for nudge in (SCRATCHPAD_SIZE_NUDGE, SCRATCHPAD_TIMEOUT_NUDGE):
+            assert "archive.org" not in nudge
+            assert "data source" not in nudge
diff --git a/tests/test_scratchpad.py b/tests/test_scratchpad.py
index cd08d65a..ea9d580b 100644
--- a/tests/test_scratchpad.py
+++ b/tests/test_scratchpad.py
@@ -831,28 +831,53 @@ async def test_compute_timeouts_no_estimate(self):
         assert inactivity == 30.0
 
     async def test_compute_timeouts_with_estimate(self):
-        """Estimate should scale total timeout and inactivity with no hard cap."""
+        """Estimate scales the total with no cap; inactivity is clamped to cell_inactivity_max (default 60)."""
         from anton.core.backends.utils import compute_timeouts as _compute_timeouts
 
         # Small estimate: max(10*2, 10+30) = max(20, 40) = 40
         total, inactivity = _compute_timeouts(10)
         assert total == 40.0
-        assert inactivity == 30.0  # max(5, 30) = 30
+        assert inactivity == 30.0  # max(5, 30) = 30, under the cap
 
         # Medium estimate: max(60*2, 60+30) = max(120, 90) = 120
         total, inactivity = _compute_timeouts(60)
         assert total == 120.0
-        assert inactivity == 30.0  # max(30, 30) = 30
+        assert inactivity == 30.0  # max(30, 30) = 30, under the cap
 
-        # Large estimate: max(300*2, 300+30) = max(600, 330) = 600
+        # Large estimate: total still scales, inactivity is capped at 60
         total, inactivity = _compute_timeouts(300)
         assert total == 600.0
-        assert inactivity == 150.0  # max(150, 30) = 150
+        assert inactivity == 60.0  # min(max(150, 30), 60) = 60
 
-        # Very large estimate: scales with estimate
+        # Very large estimate: total keeps scaling so long-but-active cells
+        # can run; the silence window stays capped.
         total, inactivity = _compute_timeouts(1000)
         assert total == 2000.0
-        assert inactivity == 500.0  # max(500, 30) = 500
+        assert inactivity == 60.0  # min(max(500, 30), 60) = 60
+
+    async def test_compute_timeouts_inactivity_cap_is_configurable(self):
+        """cell_inactivity_max bounds the silence window regardless of estimate."""
+        from anton.core.backends import utils as _utils
+        from anton.core.settings import CoreSettings
+
+        # est=300 would scale inactivity to 150s without the cap; with the
+        # default cap (60) it is clamped, and the cap is tunable via settings.
+        total, inactivity = _utils.compute_timeouts(300)
+        assert inactivity == float(CoreSettings().cell_inactivity_max)
+        assert total == 600.0  # total is intentionally left uncapped
+
+    async def test_compute_timeouts_total_max_off_by_default(self):
+        """cell_total_max defaults to 0 — the total is uncapped out of the box."""
+        from anton.core.settings import CoreSettings
+        assert CoreSettings().cell_total_max == 0
+
+    async def test_compute_timeouts_total_max_backstop(self, monkeypatch):
+        """When set, cell_total_max bounds the total; inactivity stays capped."""
+        from anton.core.backends.utils import compute_timeouts as _compute_timeouts
+        monkeypatch.setenv("ANTON_CELL_TOTAL_MAX", "300")
+        total, inactivity = _compute_timeouts(1000)
+        assert total == 300.0  # min(2000, 300)
+        assert inactivity == 60.0
 
 
 class TestSampleFunction:
diff --git a/tests/test_scratchpad_observer_dispatch.py b/tests/test_scratchpad_observer_dispatch.py
index 1a99c4a5..f80a2a13 100644
--- a/tests/test_scratchpad_observer_dispatch.py
+++ b/tests/test_scratchpad_observer_dispatch.py
@@ -23,6 +23,59 @@
     _fire_pre_execute,
     handle_scratchpad,
 )
+from anton.core.utils.scratchpad import observe_scratchpad_cell
+
+
+class _RecordingAccSession:
+    """Session stub that records ACC observations."""
+
+    def __init__(self):
+        self.events: list[tuple] = []
+
+    def _acc_observe(self, kind, detail, *, severity=1):
+        self.events.append((kind, detail, severity))
+
+
+class TestObserveScratchpadCell:
+    """observe_scratchpad_cell is the shared post-exec ACC emitter used by
+    BOTH the CLI (handle_scratchpad) and streaming (turn_stream) paths."""
+
+    def test_timeout_kill_emits_scratchpad_killed(self):
+        s = _RecordingAccSession()
+        cell = Cell(code="x", stdout="", stderr="", error="Cell timed out after 180s total. Process killed")
+        observe_scratchpad_cell(s, "dash", cell)
+        assert s.events[0][0] == "scratchpad_killed"
+        assert s.events[0][1]["name"] == "dash"
+
+    def test_inactivity_kill_emits_scratchpad_killed(self):
+        s = _RecordingAccSession()
+        cell = Cell(code="x", stdout="", stderr="", error="Cell killed after 60s of inactivity")
+        observe_scratchpad_cell(s, "dash", cell)
+        assert s.events[0][0] == "scratchpad_killed"
+
+    def test_runtime_error_emits_result_failure(self):
+        s = _RecordingAccSession()
+        cell = Cell(code="x", stdout="", stderr="", error="Traceback...\nNameError: x")
+        observe_scratchpad_cell(s, "dash", cell)
+        assert s.events[0][0] == "scratchpad_result"
+        assert s.events[0][1]["success"] is False
+
+    def test_success_emits_result_success(self):
+        s = _RecordingAccSession()
+        cell = Cell(code="x", stdout="42", stderr="", error=None)
+        observe_scratchpad_cell(s, "dash", cell)
+        assert s.events[0][0] == "scratchpad_result"
+        assert s.events[0][1]["success"] is True
+
+    def test_none_cell_emits_nothing(self):
+        s = _RecordingAccSession()
+        observe_scratchpad_cell(s, "dash", None)
+        assert s.events == []
+
+    def test_no_acc_observer_is_noop(self):
+        # A session without _acc_observe (e.g. ACC off) must not raise.
+        observe_scratchpad_cell(SimpleNamespace(), "dash",
+                                Cell(code="x", stdout="", stderr="", error=None))
 
 
 # ─────────────────────────────────────────────────────────────────────────────
@@ -288,3 +341,82 @@ async def test_non_exec_actions_do_not_fire_observers(self):
 
         assert obs.pre_calls == []
         assert obs.post_calls == []
+
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Single-scratchpad guard — challenge a second distinct scratchpad per task
+# ─────────────────────────────────────────────────────────────────────────────
+
+_CHALLENGE_MARK = "confirm_new_scratchpad=true"
+
+
+class TestSingleScratchpadGuard:
+    def _exec(self, name: str, **extra: object) -> dict:
+        tc = {
+            "action": "exec",
+            "name": name,
+            "code": "print(1)",
+            "one_line_description": "do a thing",
+            "estimated_execution_time_seconds": 5,
+        }
+        tc.update(extra)
+        return tc
+
+    @pytest.mark.asyncio
+    async def test_first_scratchpad_not_challenged(self):
+        session, _ = _fake_session()
+        result = await handle_scratchpad(session, self._exec("dash"))
+        assert _CHALLENGE_MARK not in result
+        assert "dash" in session._agent_scratchpad_names
+
+    @pytest.mark.asyncio
+    async def test_reusing_same_name_not_challenged(self):
+        session, _ = _fake_session()
+        session._agent_scratchpad_names = {"dash"}
+        result = await handle_scratchpad(session, self._exec("dash"))
+        assert _CHALLENGE_MARK not in result
+
+    @pytest.mark.asyncio
+    async def test_second_distinct_name_is_challenged(self):
+        session, _ = _fake_session()
+        session._agent_scratchpad_names = {"dash"}
+        result = await handle_scratchpad(session, self._exec("report"))
+        assert _CHALLENGE_MARK in result
+        # The challenged name must NOT be recorded, so a later confirm works.
+        assert "report" not in session._agent_scratchpad_names
+        # A challenge is not a failure — it must not contain an error marker
+        # that would trip the per-tool circuit breaker.
+        assert "failed" not in result and "[error]" not in result
+
+    @pytest.mark.asyncio
+    async def test_confirm_allows_second_scratchpad(self):
+        session, _ = _fake_session()
+        session._agent_scratchpad_names = {"dash"}
+        result = await handle_scratchpad(
+            session, self._exec("report", confirm_new_scratchpad=True)
+        )
+        assert _CHALLENGE_MARK not in result
+        assert "report" in session._agent_scratchpad_names
+
+    @pytest.mark.asyncio
+    async def test_challenge_fires_at_most_once_per_session(self):
+        # The challenge must not be able to induce its own loop: a model that
+        # keeps requesting new names without confirming is nudged once, then
+        # allowed (the challenge isn't an error, so nothing else would stop it).
+        session, _ = _fake_session()
+        session._agent_scratchpad_names = {"dash"}
+        first = await handle_scratchpad(session, self._exec("report"))
+        assert _CHALLENGE_MARK in first
+        second = await handle_scratchpad(session, self._exec("report2"))
+        assert _CHALLENGE_MARK not in second
+        assert "report2" in session._agent_scratchpad_names
+
+    @pytest.mark.asyncio
+    async def test_system_pads_do_not_count_against_agent(self):
+        # A system-created pad (e.g. the artifact backend launcher's slug pad)
+        # lives in _scratchpads.pads but never in _agent_scratchpad_names, so
+        # the agent's first real scratchpad is not challenged by its presence.
+        session, _ = _fake_session()
+        session._scratchpads.pads = {"my-artifact-slug": MagicMock()}
+        result = await handle_scratchpad(session, self._exec("dash"))
+        assert _CHALLENGE_MARK not in result
diff --git a/uv.lock b/uv.lock
index 002d1055..08c2abc4 100644
--- a/uv.lock
+++ b/uv.lock
@@ -164,7 +164,7 @@ wheels = [
 ]
 
 [[package]]
-name = "anton"
+name = "anton-agent"
 source = { editable = "." }
 dependencies = [
     { name = "aiohttp" },