Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions anton/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -1271,6 +1271,7 @@ async def _chat_loop(
history_store=history_store,
session_id=current_session_id,
proactive_dashboards=settings.proactive_dashboards,
act_first=settings.act_first,
output_dir=settings.artifacts_dir,
tools=[CONNECT_DATASOURCE_TOOL, PUBLISH_TOOL],
web_search_enabled=settings.web_search_enabled,
Expand Down
1 change: 1 addition & 0 deletions anton/chat_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def rebuild_session(
history_store=history_store,
session_id=session_id,
proactive_dashboards=settings.proactive_dashboards,
act_first=settings.act_first,
output_dir=settings.artifacts_dir,
web_search_enabled=settings.web_search_enabled,
web_fetch_enabled=settings.web_fetch_enabled,
Expand Down
4 changes: 4 additions & 0 deletions anton/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ class AntonSettings(CoreSettings):

proactive_dashboards: bool = False # when True, build HTML dashboards; when False, CLI output only

# "Do first, ask later": act on reasonable defaults and surface assumptions
# inline instead of stopping to ask. False = cautious ask-first discipline.
act_first: bool = True

theme: str = "auto"

disable_autoupdates: bool = False
Expand Down
24 changes: 20 additions & 4 deletions anton/core/backends/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,23 @@ def compute_timeouts(estimated_seconds: int) -> tuple[float, float]:
"""
s = CoreSettings()
if estimated_seconds <= 0:
return float(s.cell_timeout_default), float(s.cell_inactivity_timeout)
total = max(estimated_seconds * 2, estimated_seconds + 30)
inactivity = max(estimated_seconds * 0.5, 30)
return float(total), float(inactivity)
total = float(s.cell_timeout_default)
inactivity = float(s.cell_inactivity_timeout)
else:
total = float(max(estimated_seconds * 2, estimated_seconds + 30))
inactivity = float(max(estimated_seconds * 0.5, 30))
# Clamp the silence window: a large estimate must not buy minutes of
# undetected silence (an est=600 cell would otherwise allow 300s of no
# output before being killed). A cell quiet for cell_inactivity_max
# seconds is killed regardless of its estimate. stdout/progress() reset
# this window, so legitimate long-but-active cells — e.g. a batch loop
# pinging progress() — are unaffected; only genuinely stuck cells die.
inactivity = min(inactivity, float(s.cell_inactivity_max))
# The total is deliberately left scaling so long-but-active cells run to
# completion. cell_total_max (default 0 = off) is an optional absolute
# backstop for a runaway that keeps producing output forever (which the
# inactivity cap can't catch); set it only when that risk outweighs
# clipping a genuinely long batch job.
if s.cell_total_max > 0:
total = min(total, float(s.cell_total_max))
return total, inactivity
6 changes: 6 additions & 0 deletions anton/core/dispatch/local_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,12 @@ def _safe_error_message(exc: Exception) -> str:
"""Render an exception as a user-facing error with API keys redacted."""
try:
from anton.core.runtime import safe_redact_error
from anton.core.llm.provider import TokenLimitExceeded
# A spent token allowance isn't a crash — surface anton's
# already-friendly quota message as-is, without the
# `[agent error]` prefix that reads like something broke.
if isinstance(exc, TokenLimitExceeded):
return safe_redact_error(exc)
return f"[agent error] {safe_redact_error(exc)}"
except Exception:
return f"[agent error] {exc!r}"
4 changes: 2 additions & 2 deletions anton/core/llm/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ async def complete(
and exc.body.get("detail")
):
msg = f"Server returned 429 — {exc.body['detail']}"
msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
msg += " Visit https://console.mindshub.ai to upgrade or to top up your tokens."
from .provider import TokenLimitExceeded

raise TokenLimitExceeded(msg) from exc
Expand Down Expand Up @@ -274,7 +274,7 @@ async def stream(
and exc.body.get("detail")
):
msg = f"Server returned 429 — {exc.body['detail']}"
msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
msg += " Visit https://console.mindshub.ai to upgrade or to top up your tokens."
from .provider import TokenLimitExceeded

raise TokenLimitExceeded(msg) from exc
Expand Down
8 changes: 4 additions & 4 deletions anton/core/llm/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ async def complete(
and exc.body.get("detail")
):
msg = f"Server returned 429 — {exc.body['detail']}"
msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
msg += " Visit https://console.mindshub.ai to upgrade or to top up your tokens."
from .provider import TokenLimitExceeded

raise TokenLimitExceeded(msg) from exc
Expand Down Expand Up @@ -852,7 +852,7 @@ async def stream(
and exc.body.get("detail")
):
msg = f"Server returned 429 — {exc.body['detail']}"
msg += " Visit https://mdb.ai to upgrade or top up your tokens."
msg += " Visit https://console.mindshub.ai to upgrade or top up your tokens."
from .provider import TokenLimitExceeded

raise TokenLimitExceeded(msg) from exc
Expand Down Expand Up @@ -970,7 +970,7 @@ async def _complete_via_responses(
and exc.body.get("detail")
):
msg = f"Server returned 429 — {exc.body['detail']}"
msg += " Visit https://mdb.ai to upgrade or to top up your tokens."
msg += " Visit https://console.mindshub.ai to upgrade or to top up your tokens."
from .provider import TokenLimitExceeded

raise TokenLimitExceeded(msg) from exc
Expand Down Expand Up @@ -1099,7 +1099,7 @@ async def _stream_via_responses(
and exc.body.get("detail")
):
msg = f"Server returned 429 — {exc.body['detail']}"
msg += " Visit https://mdb.ai to upgrade or top up your tokens."
msg += " Visit https://console.mindshub.ai to upgrade or top up your tokens."
from .provider import TokenLimitExceeded

raise TokenLimitExceeded(msg) from exc
Expand Down
28 changes: 25 additions & 3 deletions anton/core/llm/prompt_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
BASE_VISUALIZATIONS_PROMPT,
BACKEND_GENERATION_PROMPT,
CHAT_SYSTEM_PROMPT,
CONVERSATION_DISCIPLINE_ACT_FIRST,
CONVERSATION_DISCIPLINE_ASK_FIRST,
VISUALIZATIONS_MARKDOWN_OUTPUT_FORMAT_PROMPT,
VISUALIZATIONS_HTML_OUTPUT_FORMAT_PROMPT,
)
Expand Down Expand Up @@ -124,10 +126,12 @@ def _build_visualizations_section(
def build(
self,
*,
conversation_started: str,
current_datetime: str,
system_prompt_context: SystemPromptContext,
proactive_dashboards: bool,
output_dir: str,
act_first: bool = True,
tool_defs: list["ToolDef"] | None = None,
memory_context: str = "",
project_context: str = "",
Expand All @@ -146,11 +150,17 @@ def build(
if prefix:
prompt += f"{prefix}\n\n"

conversation_discipline = (
CONVERSATION_DISCIPLINE_ACT_FIRST if act_first
else CONVERSATION_DISCIPLINE_ASK_FIRST
)

prompt += CHAT_SYSTEM_PROMPT.format(
runtime_context=system_prompt_context.runtime_context,
artifacts_section=ARTIFACTS_PROMPT,
visualizations_section=visualizations_section,
current_datetime=current_datetime,
conversation_discipline=conversation_discipline,
conversation_started=conversation_started,
)

prompt += "\n\n" + BACKEND_GENERATION_PROMPT.format(output_dir=output_dir)
Expand All @@ -159,8 +169,8 @@ def build(
if tool_prompts:
prompt += tool_prompts

if memory_context:
prompt += memory_context
# Stable, per-session content goes before the volatile tail so the
# prefix stays cache-stable across turns.
if project_context:
prompt += project_context
if self_awareness_context:
Expand All @@ -176,6 +186,18 @@ def build(
if suffix:
prompt += f"\n\n{suffix}"

# Volatile tail — LAST so everything above can be cached. The live
# clock and the relevance-filtered memory snapshot both change every
# turn, so they sit after the cache-stable prefix and never invalidate
# it. (The prefix carries only the fixed "conversation started" stamp.)
prompt += (
f"\n\nCurrent date and time: {current_datetime}\n"
"(Earlier messages are prefixed with the time they were sent; that "
"bracketed timestamp is metadata, not part of the message text.)"
)
if memory_context:
prompt += memory_context

return prompt


Expand Down
78 changes: 66 additions & 12 deletions anton/core/llm/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
solve problems. You are NOT a code assistant or chatbot. You are a coworker with a \
computer, and you use that computer to get things done.

Current date and time: {current_datetime}
Conversation started: {conversation_started}

WHO YOU ARE:
- You solve problems — not just write code. If someone needs emails classified, data \
Expand Down Expand Up @@ -160,15 +160,7 @@

{visualizations_section}

CONVERSATION DISCIPLINE (critical):
- If you ask the user a question, STOP and WAIT for their reply. Never ask a question \
and then act in the same turn — that skips the user's answer.
- Only act when you have ALL the information you need. If you're unsure \
about anything, ask first, then act in a LATER turn after receiving the answer.
- When the user gives a vague answer (like "yeah", "the current one", "sure"), interpret \
it in context of what you just asked. Do not ask them to repeat themselves.
- Gather requirements incrementally through conversation. Do not front-load every \
possible question at once — ask 1-3 at a time, then follow up.
{conversation_discipline}

RUNTIME IDENTITY:
{runtime_context}
Expand All @@ -185,13 +177,18 @@
different data sources for the same information, caching/retrying with backoff, etc.
- Exhaust at least 2-3 genuinely different approaches before involving the user. Each \
attempt should be a meaningfully different strategy — not just retrying the same thing.
- If a scratchpad cell errors the same way twice, change strategy — don't re-run the \
same code expecting a different result.
- Only ask the user for things that truly require them: credentials they haven't shared, \
ambiguous requirements you can't infer, access to private/internal systems, or a choice \
between equally valid options.
- When you do ask for help, briefly explain what you already tried and why it didn't work \
so the user has full context and doesn't suggest things you've already done.

GENERAL RULES:
- Validate your output before claiming the task is done — actually check the result \
(inspect the data, run it, confirm the file/artifact exists and looks right) instead of \
assuming it worked. Report what you verified, not what you intended.
- Be conversational, concise, and direct. No filler. No bullet-point dumps unless asked.
- Respond naturally to greetings, small talk, and follow-up questions.
- When describing yourself, focus on problem-solving and collaboration — not listing \
Expand All @@ -210,6 +207,44 @@
Only encode genuinely reusable knowledge — not transient conversation details.
"""

# ---------------------------------------------------------------------------
# Conversation discipline — two postures, selected by the `act_first` flag
# (ChatSessionConfig.act_first → AntonSettings.act_first; default True).
# Injected into CHAT_SYSTEM_PROMPT via {conversation_discipline}.
# ---------------------------------------------------------------------------
CONVERSATION_DISCIPLINE_ACT_FIRST = """CONVERSATION DISCIPLINE (critical):
- Bias toward ACTION. When a request has a reasonable default interpretation, act on it \
now — do not stall the task with a clarifying question. A delivered result the user can \
correct beats a question that makes them wait.
- STATE YOUR ASSUMPTIONS AS YOU MAKE THEM. Whenever you proceed on an assumption — a \
default value, an interpretation of a vague request, a chosen approach, or a scope you \
picked — say so plainly in the SAME response, right as you act, not buried at the end. \
Phrase it like "Assuming you mean X (the common case), so I'll…" or "Going with monthly \
granularity since you didn't specify." Surface each assumption as it happens so the user \
can redirect mid-flight instead of being blocked up front. Acting silently is wrong; \
acting out loud with your assumptions visible is right.
- Only STOP and ASK when acting on a guess would be costly to undo or is genuinely \
unknowable: destructive or irreversible actions (deleting data, spending money, sending \
messages on the user's behalf), credentials or access you can't obtain, or a fork where \
the options lead to materially different results and you have no basis to choose. Then ask \
ONE tight question — and when you ask, STOP and WAIT for the reply; never ask and act in \
the same turn, that skips their answer.
- When the user gives a vague answer (like "yeah", "the current one", "sure"), interpret \
it in context of what you just asked. Do not ask them to repeat themselves.
- Don't front-load a questionnaire. Prefer acting on sensible defaults (stated out loud) \
over interrogating the user; if something truly gates the work, ask at most 1-2 things."""

CONVERSATION_DISCIPLINE_ASK_FIRST = """CONVERSATION DISCIPLINE (critical):
- If you ask the user a question, STOP and WAIT for their reply. Never ask a question \
and then act in the same turn — that skips the user's answer.
- Only act when you have ALL the information you need. If you're unsure \
about anything, ask first, then act in a LATER turn after receiving the answer.
- When the user gives a vague answer (like "yeah", "the current one", "sure"), interpret \
it in context of what you just asked. Do not ask them to repeat themselves.
- Gather requirements incrementally through conversation. Do not front-load every \
possible question at once — ask 1-3 at a time, then follow up."""


# ---------------------------------------------------------------------------
# Artifact contract — universal entry point for any user-facing output
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -322,8 +357,8 @@
Do NOT build a single 20KB+ HTML string in memory and write it at the end.
3. CAP STRING SIZE PER CELL at ~5KB. Large-string scratchpad calls are the \
single biggest cause of silent failures (the tool occasionally drops the \
`code` payload on oversized inputs and returns "No code provided", which still \
counts against the round cap). If a section is too big, split it.
`code` payload on oversized inputs and the cell comes back with an empty-code \
error, which still counts against the round cap). If a section is too big, split it.
4. NEVER re-emit the full HTML mid-build. Append deltas, don't re-print \
the world. Assembly is a one-line concat at the end, not a re-render of \
everything you've written so far.
Expand Down Expand Up @@ -810,3 +845,22 @@ async def hello():
"a public API, archive.org, an alternate library, or a completely different data source. "
"Only involve the user if the problem truly requires something only they can provide."
)

# Scratchpad failures need different advice than the generic (scrape/fetch)
# RESILIENCE_NUDGE above — telling the model to "try a public API / archive.org"
# when a cell is too big or too slow just sends it renaming-and-retrying. These
# are chosen by failure type in ChatSession._apply_error_tracking.
SCRATCHPAD_SIZE_NUDGE = (
"\n\nSYSTEM: This scratchpad cell keeps failing on its size, not its logic. "
"Stop retrying the same large cell. Write the output to disk incrementally — "
"open(path, 'w') once, then open(path, 'a') to append each chunk, keeping each "
"cell's string under ~5KB — or generate the content inside the cell instead of "
"passing a large literal. Reuse the SAME scratchpad; do not rename it."
)
SCRATCHPAD_TIMEOUT_NUDGE = (
"\n\nSYSTEM: This scratchpad cell keeps timing out — the work is too heavy, not "
"the write. Make the next cell smaller: fewer rows/items per cell, split a long "
"loop across cells (process a batch, return, continue), or narrow the scope. Call "
"progress() inside long loops so active work isn't mistaken for a hang. Reuse the "
"SAME scratchpad; do not rename it."
)
27 changes: 17 additions & 10 deletions anton/core/memory/acc.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,26 +446,33 @@ def detect_reset_churn(events: Sequence[Event]) -> Lesson | None:


def detect_kill_loop(events: Sequence[Event]) -> Lesson | None:
"""The same scratchpad name had >= N cells killed (timeout/cancel/OOM).
""">= N scratchpad cells were killed (timeout/cancel/OOM) in one turn.

Fires when a single scratchpad is killed >= N times (a per-pad loop) OR
when >= N cells are killed across the turn regardless of name. The
name-agnostic count is deliberate: renaming the scratchpad between failed
attempts (`build_pres` → `write_html` → …) used to split the kill count
across buckets and hide the loop. A kill is a kill, and the right lesson
(make the next cell smaller) is the same either way.

Reads `kind == "scratchpad_killed"`; looks at `detail.name`.
"""
killed = [e for e in events if e.kind == "scratchpad_killed"]
by_name: defaultdict[str, int] = defaultdict(int)
for e in events:
if e.kind != "scratchpad_killed":
continue
for e in killed:
n = e.detail.get("name") or ""
if n:
by_name[n] += 1
if not by_name or max(by_name.values()) < _KILL_LOOP_THRESHOLD:
per_name_max = max(by_name.values()) if by_name else 0
if per_name_max < _KILL_LOOP_THRESHOLD and len(killed) < _KILL_LOOP_THRESHOLD:
return None
return Lesson(
rule=(
"When a scratchpad cell is killed (timeout, cancel, OOM), "
"the next cell on the same scratchpad needs to be smaller — "
"fewer rows, smaller batch, explicit timeout, narrower scope. "
"Two kills on the same scratchpad means the approach itself is "
"too heavy, not that the same cell needs another try."
"When a scratchpad cell is killed (timeout, cancel, OOM), the next "
"cell needs to be smaller — fewer rows, smaller batch, explicit "
"timeout, narrower scope — and stay on the SAME scratchpad. Two "
"kills in a turn (even across renamed scratchpads) mean the approach "
"is too heavy, not that the same cell needs another try."
),
kind="when",
triggers=("scratchpad_killed",),
Expand Down
1 change: 1 addition & 0 deletions anton/core/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ async def build_chat_session(
history_store=history_store,
session_id=session_id,
proactive_dashboards=settings.proactive_dashboards,
act_first=settings.act_first,
tools=list(extra_tools) if extra_tools else [],
)
return ChatSession(config)
Expand Down
Loading
Loading