mindsdb · torrmal · Jun 20, 2026 · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026
diff --git a/anton/chat.py b/anton/chat.py
@@ -1271,6 +1271,7 @@ async def _chat_loop(
         history_store=history_store,
         session_id=current_session_id,
         proactive_dashboards=settings.proactive_dashboards,
+        act_first=settings.act_first,
         output_dir=settings.artifacts_dir,
         tools=[CONNECT_DATASOURCE_TOOL, PUBLISH_TOOL],
         web_search_enabled=settings.web_search_enabled,

diff --git a/anton/chat_session.py b/anton/chat_session.py
@@ -116,6 +116,7 @@ def rebuild_session(
         history_store=history_store,
         session_id=session_id,
         proactive_dashboards=settings.proactive_dashboards,
+        act_first=settings.act_first,
         output_dir=settings.artifacts_dir,
         web_search_enabled=settings.web_search_enabled,
         web_fetch_enabled=settings.web_fetch_enabled,

diff --git a/anton/config/settings.py b/anton/config/settings.py
@@ -85,6 +85,10 @@ class AntonSettings(CoreSettings):
 
     proactive_dashboards: bool = False  # when True, build HTML dashboards; when False, CLI output only
 
+    # "Do first, ask later": act on reasonable defaults and surface assumptions
+    # inline instead of stopping to ask. False = cautious ask-first discipline.
+    act_first: bool = True
+
     theme: str = "auto"
 
     disable_autoupdates: bool = False

diff --git a/anton/core/backends/utils.py b/anton/core/backends/utils.py
@@ -8,7 +8,23 @@ def compute_timeouts(estimated_seconds: int) -> tuple[float, float]:
     """
     s = CoreSettings()
     if estimated_seconds <= 0:
-        return float(s.cell_timeout_default), float(s.cell_inactivity_timeout)
-    total = max(estimated_seconds * 2, estimated_seconds + 30)
-    inactivity = max(estimated_seconds * 0.5, 30)
-    return float(total), float(inactivity)
+        total = float(s.cell_timeout_default)
+        inactivity = float(s.cell_inactivity_timeout)
+    else:
+        total = float(max(estimated_seconds * 2, estimated_seconds + 30))
+        inactivity = float(max(estimated_seconds * 0.5, 30))
+    # Clamp the silence window: a large estimate must not buy minutes of
+    # undetected silence (an est=600 cell would otherwise allow 300s of no
+    # output before being killed). A cell quiet for cell_inactivity_max
+    # seconds is killed regardless of its estimate. stdout/progress() reset
+    # this window, so legitimate long-but-active cells — e.g. a batch loop
+    # pinging progress() — are unaffected; only genuinely stuck cells die.
+    inactivity = min(inactivity, float(s.cell_inactivity_max))
+    # The total is deliberately left scaling so long-but-active cells run to
+    # completion. cell_total_max (default 0 = off) is an optional absolute
+    # backstop for a runaway that keeps producing output forever (which the
+    # inactivity cap can't catch); set it only when that risk outweighs
+    # clipping a genuinely long batch job.
+    if s.cell_total_max > 0:
+        total = min(total, float(s.cell_total_max))
+    return total, inactivity
diff --git a/anton/core/dispatch/local_runtime.py b/anton/core/dispatch/local_runtime.py
@@ -442,6 +442,12 @@ def _safe_error_message(exc: Exception) -> str:
         """Render an exception as a user-facing error with API keys redacted."""
         try:
             from anton.core.runtime import safe_redact_error
+            from anton.core.llm.provider import TokenLimitExceeded
+            # A spent token allowance isn't a crash — surface anton's
+            # already-friendly quota message as-is, without the
+            # `[agent error]` prefix that reads like something broke.
+            if isinstance(exc, TokenLimitExceeded):
+                return safe_redact_error(exc)
             return f"[agent error] {safe_redact_error(exc)}"
         except Exception:
             return f"[agent error] {exc!r}"
diff --git a/anton/core/llm/anthropic.py b/anton/core/llm/anthropic.py
@@ -127,7 +127,7 @@ async def complete(
                 and exc.body.get("detail")
             ):
                 msg = f"Server returned 429 — {exc.body['detail']}"
-                msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
+                msg += " Visit https://console.mindshub.ai to upgrade or to top up your tokens."
                 from .provider import TokenLimitExceeded
 
                 raise TokenLimitExceeded(msg) from exc
@@ -274,7 +274,7 @@ async def stream(
                 and exc.body.get("detail")
             ):
                 msg = f"Server returned 429 — {exc.body['detail']}"
-                msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
+                msg += " Visit https://console.mindshub.ai to upgrade or to top up your tokens."
                 from .provider import TokenLimitExceeded
 
                 raise TokenLimitExceeded(msg) from exc

diff --git a/anton/core/llm/openai.py b/anton/core/llm/openai.py
@@ -683,7 +683,7 @@ async def complete(
                 and exc.body.get("detail")
             ):
                 msg = f"Server returned 429 — {exc.body['detail']}"
-                msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
+                msg += " Visit https://console.mindshub.ai to upgrade or to top up your tokens."
                 from .provider import TokenLimitExceeded
 
                 raise TokenLimitExceeded(msg) from exc
@@ -852,7 +852,7 @@ async def stream(
                 and exc.body.get("detail")
             ):
                 msg = f"Server returned 429 — {exc.body['detail']}"
-                msg += " Visit https://mdb.ai to upgrade or top up your tokens."
+                msg += " Visit https://console.mindshub.ai to upgrade or top up your tokens."
                 from .provider import TokenLimitExceeded
 
                 raise TokenLimitExceeded(msg) from exc
@@ -970,7 +970,7 @@ async def _complete_via_responses(
                 and exc.body.get("detail")
             ):
                 msg = f"Server returned 429 — {exc.body['detail']}"
-                msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
+                msg += " Visit https://console.mindshub.ai to upgrade or to top up your tokens."
                 from .provider import TokenLimitExceeded
 
                 raise TokenLimitExceeded(msg) from exc
@@ -1099,7 +1099,7 @@ async def _stream_via_responses(
                 and exc.body.get("detail")
             ):
                 msg = f"Server returned 429 — {exc.body['detail']}"
-                msg += " Visit https://mdb.ai to upgrade or top up your tokens."
+                msg += " Visit https://console.mindshub.ai to upgrade or top up your tokens."
                 from .provider import TokenLimitExceeded
 
                 raise TokenLimitExceeded(msg) from exc

diff --git a/anton/core/llm/prompt_builder.py b/anton/core/llm/prompt_builder.py
@@ -8,6 +8,8 @@
     BASE_VISUALIZATIONS_PROMPT,
     BACKEND_GENERATION_PROMPT,
     CHAT_SYSTEM_PROMPT,
+    CONVERSATION_DISCIPLINE_ACT_FIRST,
+    CONVERSATION_DISCIPLINE_ASK_FIRST,
     VISUALIZATIONS_MARKDOWN_OUTPUT_FORMAT_PROMPT,
     VISUALIZATIONS_HTML_OUTPUT_FORMAT_PROMPT,
 )
@@ -124,10 +126,12 @@ def _build_visualizations_section(
     def build(
         self,
         *,
+        conversation_started: str,
         current_datetime: str,
         system_prompt_context: SystemPromptContext,
         proactive_dashboards: bool,
         output_dir: str,
+        act_first: bool = True,
         tool_defs: list["ToolDef"] | None = None,
         memory_context: str = "",
         project_context: str = "",
@@ -146,11 +150,17 @@ def build(
         if prefix:
             prompt += f"{prefix}\n\n"
 
+        conversation_discipline = (
+            CONVERSATION_DISCIPLINE_ACT_FIRST if act_first
+            else CONVERSATION_DISCIPLINE_ASK_FIRST
+        )
+
         prompt += CHAT_SYSTEM_PROMPT.format(
             runtime_context=system_prompt_context.runtime_context,
             artifacts_section=ARTIFACTS_PROMPT,
             visualizations_section=visualizations_section,
-            current_datetime=current_datetime,
+            conversation_discipline=conversation_discipline,
+            conversation_started=conversation_started,
         )
 
         prompt += "\n\n" + BACKEND_GENERATION_PROMPT.format(output_dir=output_dir)
@@ -159,8 +169,8 @@ def build(
         if tool_prompts:
             prompt += tool_prompts
 
-        if memory_context:
-            prompt += memory_context
+        # Stable, per-session content goes before the volatile tail so the
+        # prefix stays cache-stable across turns.
         if project_context:
             prompt += project_context
         if self_awareness_context:
@@ -176,6 +186,18 @@ def build(
         if suffix:
             prompt += f"\n\n{suffix}"
 
+        # Volatile tail — LAST so everything above can be cached. The live
+        # clock and the relevance-filtered memory snapshot both change every
+        # turn, so they sit after the cache-stable prefix and never invalidate
+        # it. (The prefix carries only the fixed "conversation started" stamp.)
+        prompt += (
+            f"\n\nCurrent date and time: {current_datetime}\n"
+            "(Earlier messages are prefixed with the time they were sent; that "
+            "bracketed timestamp is metadata, not part of the message text.)"
+        )
+        if memory_context:
+            prompt += memory_context
+
         return prompt
 
 

diff --git a/anton/core/llm/prompts.py b/anton/core/llm/prompts.py
@@ -7,7 +7,7 @@
 solve problems. You are NOT a code assistant or chatbot. You are a coworker with a \
 computer, and you use that computer to get things done.
 
-Current date and time: {current_datetime}
+Conversation started: {conversation_started}
 
 WHO YOU ARE:
 - You solve problems — not just write code. If someone needs emails classified, data \
@@ -160,15 +160,7 @@
 
 {visualizations_section}
 
-CONVERSATION DISCIPLINE (critical):
-- If you ask the user a question, STOP and WAIT for their reply. Never ask a question \
-and then act in the same turn — that skips the user's answer.
-- Only act when you have ALL the information you need. If you're unsure \
-about anything, ask first, then act in a LATER turn after receiving the answer.
-- When the user gives a vague answer (like "yeah", "the current one", "sure"), interpret \
-it in context of what you just asked. Do not ask them to repeat themselves.
-- Gather requirements incrementally through conversation. Do not front-load every \
-possible question at once — ask 1-3 at a time, then follow up.
+{conversation_discipline}
 
 RUNTIME IDENTITY:
 {runtime_context}
@@ -185,13 +177,18 @@
 different data sources for the same information, caching/retrying with backoff, etc.
 - Exhaust at least 2-3 genuinely different approaches before involving the user. Each \
 attempt should be a meaningfully different strategy — not just retrying the same thing.
+- If a scratchpad cell errors the same way twice, change strategy — don't re-run the \
+same code expecting a different result.
 - Only ask the user for things that truly require them: credentials they haven't shared, \
 ambiguous requirements you can't infer, access to private/internal systems, or a choice \
 between equally valid options.
 - When you do ask for help, briefly explain what you already tried and why it didn't work \
 so the user has full context and doesn't suggest things you've already done.
 
 GENERAL RULES:
+- Validate your output before claiming the task is done — actually check the result \
+(inspect the data, run it, confirm the file/artifact exists and looks right) instead of \
+assuming it worked. Report what you verified, not what you intended.
 - Be conversational, concise, and direct. No filler. No bullet-point dumps unless asked.
 - Respond naturally to greetings, small talk, and follow-up questions.
 - When describing yourself, focus on problem-solving and collaboration — not listing \
@@ -210,6 +207,44 @@
 Only encode genuinely reusable knowledge — not transient conversation details.
 """
 
+# ---------------------------------------------------------------------------
+# Conversation discipline — two postures, selected by the `act_first` flag
+# (ChatSessionConfig.act_first → AntonSettings.act_first; default True).
+# Injected into CHAT_SYSTEM_PROMPT via {conversation_discipline}.
+# ---------------------------------------------------------------------------
+CONVERSATION_DISCIPLINE_ACT_FIRST = """CONVERSATION DISCIPLINE (critical):
+- Bias toward ACTION. When a request has a reasonable default interpretation, act on it \
+now — do not stall the task with a clarifying question. A delivered result the user can \
+correct beats a question that makes them wait.
+- STATE YOUR ASSUMPTIONS AS YOU MAKE THEM. Whenever you proceed on an assumption — a \
+default value, an interpretation of a vague request, a chosen approach, or a scope you \
+picked — say so plainly in the SAME response, right as you act, not buried at the end. \
+Phrase it like "Assuming you mean X (the common case), so I'll…" or "Going with monthly \
+granularity since you didn't specify." Surface each assumption as it happens so the user \
+can redirect mid-flight instead of being blocked up front. Acting silently is wrong; \
+acting out loud with your assumptions visible is right.
+- Only STOP and ASK when acting on a guess would be costly to undo or is genuinely \
+unknowable: destructive or irreversible actions (deleting data, spending money, sending \
+messages on the user's behalf), credentials or access you can't obtain, or a fork where \
+the options lead to materially different results and you have no basis to choose. Then ask \
+ONE tight question — and when you ask, STOP and WAIT for the reply; never ask and act in \
+the same turn, that skips their answer.
+- When the user gives a vague answer (like "yeah", "the current one", "sure"), interpret \
+it in context of what you just asked. Do not ask them to repeat themselves.
+- Don't front-load a questionnaire. Prefer acting on sensible defaults (stated out loud) \
+over interrogating the user; if something truly gates the work, ask at most 1-2 things."""
+
+CONVERSATION_DISCIPLINE_ASK_FIRST = """CONVERSATION DISCIPLINE (critical):
+- If you ask the user a question, STOP and WAIT for their reply. Never ask a question \
+and then act in the same turn — that skips the user's answer.
+- Only act when you have ALL the information you need. If you're unsure \
+about anything, ask first, then act in a LATER turn after receiving the answer.
+- When the user gives a vague answer (like "yeah", "the current one", "sure"), interpret \
+it in context of what you just asked. Do not ask them to repeat themselves.
+- Gather requirements incrementally through conversation. Do not front-load every \
+possible question at once — ask 1-3 at a time, then follow up."""
+
+
 # ---------------------------------------------------------------------------
 # Artifact contract — universal entry point for any user-facing output
 # ---------------------------------------------------------------------------
@@ -322,8 +357,8 @@
 Do NOT build a single 20KB+ HTML string in memory and write it at the end.
   3. CAP STRING SIZE PER CELL at ~5KB. Large-string scratchpad calls are the \
 single biggest cause of silent failures (the tool occasionally drops the \
-`code` payload on oversized inputs and returns "No code provided", which still \
-counts against the round cap). If a section is too big, split it.
+`code` payload on oversized inputs and the cell comes back with an empty-code \
+error, which still counts against the round cap). If a section is too big, split it.
   4. NEVER re-emit the full HTML mid-build. Append deltas, don't re-print \
 the world. Assembly is a one-line concat at the end, not a re-render of \
 everything you've written so far.
@@ -810,3 +845,22 @@ async def hello():
     "a public API, archive.org, an alternate library, or a completely different data source. "
     "Only involve the user if the problem truly requires something only they can provide."
 )
+
+# Scratchpad failures need different advice than the generic (scrape/fetch)
+# RESILIENCE_NUDGE above — telling the model to "try a public API / archive.org"
+# when a cell is too big or too slow just sends it renaming-and-retrying. These
+# are chosen by failure type in ChatSession._apply_error_tracking.
+SCRATCHPAD_SIZE_NUDGE = (
+    "\n\nSYSTEM: This scratchpad cell keeps failing on its size, not its logic. "
+    "Stop retrying the same large cell. Write the output to disk incrementally — "
+    "open(path, 'w') once, then open(path, 'a') to append each chunk, keeping each "
+    "cell's string under ~5KB — or generate the content inside the cell instead of "
+    "passing a large literal. Reuse the SAME scratchpad; do not rename it."
+)
+SCRATCHPAD_TIMEOUT_NUDGE = (
+    "\n\nSYSTEM: This scratchpad cell keeps timing out — the work is too heavy, not "
+    "the write. Make the next cell smaller: fewer rows/items per cell, split a long "
+    "loop across cells (process a batch, return, continue), or narrow the scope. Call "
+    "progress() inside long loops so active work isn't mistaken for a hang. Reuse the "
+    "SAME scratchpad; do not rename it."
+)
diff --git a/anton/core/memory/acc.py b/anton/core/memory/acc.py
@@ -446,26 +446,33 @@ def detect_reset_churn(events: Sequence[Event]) -> Lesson | None:
 
 
 def detect_kill_loop(events: Sequence[Event]) -> Lesson | None:
-    """The same scratchpad name had >= N cells killed (timeout/cancel/OOM).
+    """>= N scratchpad cells were killed (timeout/cancel/OOM) in one turn.
+
+    Fires when a single scratchpad is killed >= N times (a per-pad loop) OR
+    when >= N cells are killed across the turn regardless of name. The
+    name-agnostic count is deliberate: renaming the scratchpad between failed
+    attempts (`build_pres` → `write_html` → …) used to split the kill count
+    across buckets and hide the loop. A kill is a kill, and the right lesson
+    (make the next cell smaller) is the same either way.
 
     Reads `kind == "scratchpad_killed"`; looks at `detail.name`.
     """
+    killed = [e for e in events if e.kind == "scratchpad_killed"]
     by_name: defaultdict[str, int] = defaultdict(int)
-    for e in events:
-        if e.kind != "scratchpad_killed":
-            continue
+    for e in killed:
         n = e.detail.get("name") or ""
         if n:
             by_name[n] += 1
-    if not by_name or max(by_name.values()) < _KILL_LOOP_THRESHOLD:
+    per_name_max = max(by_name.values()) if by_name else 0
+    if per_name_max < _KILL_LOOP_THRESHOLD and len(killed) < _KILL_LOOP_THRESHOLD:
         return None
     return Lesson(
         rule=(
-            "When a scratchpad cell is killed (timeout, cancel, OOM), "
-            "the next cell on the same scratchpad needs to be smaller — "
-            "fewer rows, smaller batch, explicit timeout, narrower scope. "
-            "Two kills on the same scratchpad means the approach itself is "
-            "too heavy, not that the same cell needs another try."
+            "When a scratchpad cell is killed (timeout, cancel, OOM), the next "
+            "cell needs to be smaller — fewer rows, smaller batch, explicit "
+            "timeout, narrower scope — and stay on the SAME scratchpad. Two "
+            "kills in a turn (even across renamed scratchpads) mean the approach "
+            "is too heavy, not that the same cell needs another try."
         ),
         kind="when",
         triggers=("scratchpad_killed",),

diff --git a/anton/core/runtime.py b/anton/core/runtime.py
@@ -185,6 +185,7 @@ async def build_chat_session(
         history_store=history_store,
         session_id=session_id,
         proactive_dashboards=settings.proactive_dashboards,
+        act_first=settings.act_first,
         tools=list(extra_tools) if extra_tools else [],
     )
     return ChatSession(config)