diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..a0b3ad74 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,16 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" + + - package-ecosystem: "pip" + directory: "/mcp" + schedule: + interval: "weekly" + # Only alert on our direct dependencies, not transitive ones + # we can't control (pulled in by the mcp SDK). + allow: + - dependency-name: "mcp" + - dependency-name: "pytest" diff --git a/.github/workflows/mcp.yml b/.github/workflows/mcp.yml new file mode 100644 index 00000000..5bf7f73f --- /dev/null +++ b/.github/workflows/mcp.yml @@ -0,0 +1,44 @@ +name: MCP Tests + +on: + workflow_dispatch: + pull_request: + branches: [master] + paths: ['mcp/**'] + push: + branches: [master] + paths: ['mcp/**'] + +jobs: + mcp-tests: + runs-on: ubuntu-22.04 + timeout-minutes: 30 + name: MCP unit & smoke tests + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Install opam & build HOL Light + run: | + sudo apt update && sudo apt install -y opam xdot + opam init --disable-sandboxing + make switch + eval $(opam env) + make + + - name: Install uv + uses: astral-sh/setup-uv@v4 + + - name: Install MCP dependencies + run: cd mcp && uv sync + + - name: Run unit tests + run: | + eval $(opam env) + cd mcp && uv run pytest test_server.py -v + + - name: Run smoke tests + run: | + eval $(opam env) + cd mcp && uv run python smoke_test.py diff --git a/.gitignore b/.gitignore index 0eb6e3ea..2dcfa717 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,4 @@ unit_tests_inlined.ml unit_tests.byte unit_tests.native hol-*.ckpt +__pycache__/ diff --git a/mcp/README.md b/mcp/README.md index cafe3514..86c85116 100644 --- a/mcp/README.md +++ b/mcp/README.md @@ -47,6 +47,8 @@ See [TUTORIAL.md](TUTORIAL.md) for more examples (including s2n-bignum ARM proof | `hol_restart` | Kill and restart the HOL Light subprocess | Status message | | `hol_status` | Check process health, uptime, config, checkpoint | Structured JSON | | `hol_help` | Return tactic reference and proof guide (SKILL.md) | Markdown text | +| `start_recording` | Start recording proof tactics to a JSONL file | Status message | +| `stop_recording` | Stop recording and return the file path | Status message | `eval` returns `{"success", "output", "output_truncated", "full_output_chars", "time_seconds"}`. Large outputs are truncated to `max_output_chars` (default 4000, configurable). Override per-call with `max_output_chars=N`. @@ -76,7 +78,7 @@ Create named checkpoints: ```bash export LD_LIBRARY_PATH="$HOME/.local/lib:$LD_LIBRARY_PATH" -# Bare HOL Light (~75s) +# Base HOL Light (~75s) python3 mcp/make_checkpoint.py --name base # With s2n-bignum ARM infrastructure (~5-10min) @@ -136,8 +138,8 @@ The server includes a built-in `hol_help` tool that returns the full tactic refe ```bash cd mcp -uv run pytest test_server.py -v # 38 unit tests -uv run python smoke_test.py # 34 MCP integration checks +uv run pytest test_server.py -v # 48 unit tests +uv run python smoke_test.py # 37 MCP integration checks ``` First run includes HOL Light startup (~75s cold, ~2s with checkpoint). diff --git a/mcp/TUTORIAL.md b/mcp/TUTORIAL.md index 7459f68e..5b2e64af 100644 --- a/mcp/TUTORIAL.md +++ b/mcp/TUTORIAL.md @@ -140,7 +140,7 @@ apply_tactic "DISJ2_TAC THEN REWRITE_TAC[EVEN_ADD] THEN ASM_REWRITE_TAC[NOT_EVE 8. **hol_status** — check if HOL Light is alive (useful for debugging) 9. **hol_restart** — restart HOL Light if it has died or is in a bad state -**Utility tools:** **hol_type** (get term types), **hol_load** (load files), **hol_interrupt** (cancel hung tactics), **hol_help** (tactic reference) +**Utility tools:** **hol_type** (get term types), **hol_load** (load files), **hol_interrupt** (cancel hung tactics), **hol_help** (tactic reference), **start_recording** / **stop_recording** (record tactics to JSONL for replay) --- diff --git a/mcp/hol-mcp.toml b/mcp/hol-mcp.toml index 59996c6e..280db1ce 100644 --- a/mcp/hol-mcp.toml +++ b/mcp/hol-mcp.toml @@ -4,7 +4,7 @@ # Which checkpoint to use (looks for hol-.ckpt/ in HOL Light root). # Create with: python3 mcp/make_checkpoint.py --name [options] -checkpoint = "noledit" +checkpoint = "base" # Timeout in seconds for HOL Light commands. timeout = 600 diff --git a/mcp/make_checkpoint.py b/mcp/make_checkpoint.py index e4682a7b..9faf8a94 100644 --- a/mcp/make_checkpoint.py +++ b/mcp/make_checkpoint.py @@ -1,130 +1,176 @@ #!/usr/bin/env python3 -"""Create a DMTCP checkpoint of HOL Light with piped stdin. +"""Create a DMTCP checkpoint of HOL Light. -Usage: - python3 make_checkpoint.py # bare HOL Light - python3 make_checkpoint.py --name base # named checkpoint - python3 make_checkpoint.py --name s2n -I /path/to/s2n-bignum 'needs "arm/proofs/base.ml"' +Examples: + python3 mcp/make_checkpoint.py # base HOL Light + python3 mcp/make_checkpoint.py --name s2n \\ + -I /path/to/s2n-bignum 'needs "arm/proofs/base.ml"' """ -import subprocess, time, os, socket, sys, glob +import argparse +import glob +import os +import shutil +import socket +import subprocess +import sys +import time HOL_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) SENTINEL = "HOL_MCP_CKPT_READY" -# Parse args -include_dirs = [] -extra_loads = [] -ckpt_name = "noledit" -i = 1 -while i < len(sys.argv): - if sys.argv[i] == "-I" and i + 1 < len(sys.argv): - include_dirs.append(sys.argv[i + 1]) - i += 2 - elif sys.argv[i] == "--name" and i + 1 < len(sys.argv): - ckpt_name = sys.argv[i + 1] - i += 2 - else: - extra_loads.append(sys.argv[i]) - i += 1 - -CKPT_DIR = os.path.join(HOL_DIR, f"hol-{ckpt_name}.ckpt") - -# Clean and recreate -subprocess.run(['rm', '-rf', CKPT_DIR]) -os.makedirs(CKPT_DIR) - -s = socket.socket(); s.bind(('',0)); port = s.getsockname()[1]; s.close() -os.environ['DMTCP_COORD_PORT'] = str(port) -print(f"DMTCP port: {port}", flush=True) - -env = os.environ.copy() -try: - r = subprocess.run(['opam', 'env', '--switch', HOL_DIR + '/', '--set-switch'], - capture_output=True, text=True) - for line in r.stdout.strip().split('\n'): - if '=' in line and "'" in line: - key = line.split('=', 1)[0].strip() - val = line.split("'")[1] - env[key] = val -except FileNotFoundError: - pass - -cmd = [ - 'dmtcp_launch', '--new-coordinator', '--ckptdir', CKPT_DIR, - os.path.join(HOL_DIR, 'ocaml-hol'), '-init', - os.path.join(HOL_DIR, 'hol.ml'), '-I', HOL_DIR, -] -for d in include_dirs: - cmd.extend(['-I', d]) - -# Set HOLLIGHT_LOAD_PATH so needs/loadt can find files in include dirs -if include_dirs: - existing = env.get('HOLLIGHT_LOAD_PATH', '') - extra = ':'.join(include_dirs) - env['HOLLIGHT_LOAD_PATH'] = f"{extra}:{existing}" if existing else extra - -print(f"HOL_DIR: {HOL_DIR}", flush=True) -if include_dirs: - print(f"Include dirs: {include_dirs}", flush=True) - -p = subprocess.Popen( - cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, - text=True, bufsize=1, cwd=HOL_DIR, env=env) - -print("Waiting for HOL Light to load...", flush=True) -while True: - line = p.stdout.readline() - if not line: - print("ERROR: EOF before HOL Light loaded", flush=True) - sys.exit(1) - if 'Camlp5' in line: - print("HOL Light loaded.", flush=True) - break - -for load_cmd in extra_loads: - print(f"Loading: {load_cmd}", flush=True) - p.stdin.write(f'{load_cmd};;\nPrintf.printf "{SENTINEL}\\n%!";;\n') - p.stdin.flush() + +def fatal(msg): + print(f"ERROR: {msg}", file=sys.stderr, flush=True) + sys.exit(1) + + +def parse_args(): + p = argparse.ArgumentParser( + description="Create a DMTCP checkpoint of HOL Light.", + epilog="Extra positional arguments are OCaml expressions evaluated after HOL Light loads " + "(e.g., 'needs \"arm/proofs/base.ml\"').", + ) + p.add_argument("--name", default="base", + help="Checkpoint name (creates hol-.ckpt/). Default: base") + p.add_argument("-I", dest="include_dirs", action="append", default=[], + help="Add OCaml include directory (can be repeated)") + p.add_argument("extra_loads", nargs="*", metavar="EXPR", + help="OCaml expressions to evaluate before checkpointing") + return p.parse_args() + + +def build_env(): + """Build environment with opam switch and LD_LIBRARY_PATH.""" + env = os.environ.copy() + ld_path = os.path.expanduser("~/.local/lib") + if os.path.isdir(ld_path): + env["LD_LIBRARY_PATH"] = ld_path + ":" + env.get("LD_LIBRARY_PATH", "") + try: + r = subprocess.run( + ["opam", "env", "--switch", HOL_DIR + "/", "--set-switch"], + capture_output=True, text=True, + ) + for line in r.stdout.strip().split("\n"): + if "=" in line and "'" in line: + key = line.split("=", 1)[0].strip() + val = line.split("'")[1] + env[key] = val + except FileNotFoundError: + pass + return env + + +def wait_for_line(proc, marker, error_msg): + """Read stdout until a line contains marker. Dies on EOF.""" while True: - line = p.stdout.readline() + line = proc.stdout.readline() if not line: - print("ERROR: EOF during extra loading", flush=True) - sys.exit(1) - if SENTINEL in line: - print(f" done.", flush=True) - break - -print("Compacting GC before checkpoint...", flush=True) -p.stdin.write(f'Gc.compact ();;\nPrintf.printf "{SENTINEL}\\n%!";;\n') -p.stdin.flush() -while True: - line = p.stdout.readline() - if not line: - print("ERROR: EOF during Gc.compact", flush=True) - sys.exit(1) - if SENTINEL in line: + fatal(error_msg) + if marker in line: + return + + +def send_and_wait(proc, code, error_msg): + """Send OCaml code and wait for sentinel.""" + proc.stdin.write(f'{code};;\nPrintf.printf "{SENTINEL}\\n%!";;\n') + proc.stdin.flush() + wait_for_line(proc, SENTINEL, error_msg) + + +def main(): + args = parse_args() + ckpt_dir = os.path.join(HOL_DIR, f"hol-{args.name}.ckpt") + ocaml_hol = os.path.join(HOL_DIR, "ocaml-hol") + + # Validate prerequisites + if not os.path.isfile(ocaml_hol): + fatal(f"ocaml-hol not found at {ocaml_hol}\n Run: make switch && eval $(opam env) && make") + if not shutil.which("dmtcp_launch"): + fatal("dmtcp_launch not found on PATH\n See mcp/README.md for install instructions") + for d in args.include_dirs: + if not os.path.isdir(d): + fatal(f"Include directory not found: {d}") + + # Clean and recreate checkpoint dir + if os.path.exists(ckpt_dir): + shutil.rmtree(ckpt_dir) + os.makedirs(ckpt_dir) + + # Find a free port for DMTCP coordinator + s = socket.socket() + s.bind(("", 0)) + port = s.getsockname()[1] + s.close() + os.environ["DMTCP_COORD_PORT"] = str(port) + + env = build_env() + + # Build command + cmd = [ + "dmtcp_launch", "--new-coordinator", "--ckptdir", ckpt_dir, + ocaml_hol, "-init", os.path.join(HOL_DIR, "hol.ml"), "-I", HOL_DIR, + ] + for d in args.include_dirs: + cmd.extend(["-I", d]) + + if args.include_dirs: + existing = env.get("HOLLIGHT_LOAD_PATH", "") + extra = ":".join(args.include_dirs) + env["HOLLIGHT_LOAD_PATH"] = f"{extra}:{existing}" if existing else extra + + # Print plan + print(f"Checkpoint: hol-{args.name}.ckpt/", flush=True) + if args.include_dirs: + print(f"Include dirs: {args.include_dirs}", flush=True) + if args.extra_loads: + print(f"Extra loads: {args.extra_loads}", flush=True) + + # Launch HOL Light under DMTCP + p = subprocess.Popen( + cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + text=True, bufsize=1, cwd=HOL_DIR, env=env, + ) + + print("Waiting for HOL Light to load (~75s)...", flush=True) + wait_for_line(p, "Camlp5", "HOL Light died before loading. Check that 'make' succeeded.") + print("HOL Light loaded.", flush=True) + + for expr in args.extra_loads: + print(f"Loading: {expr}", flush=True) + send_and_wait(p, expr, f"HOL Light died while loading: {expr}") print(" done.", flush=True) - break -time.sleep(2) + print("Compacting GC...", flush=True) + send_and_wait(p, "Gc.compact ()", "HOL Light died during Gc.compact") + print(" done.", flush=True) -print("Checkpointing...", flush=True) -r = subprocess.run(['dmtcp_command', '--port', str(port), '-bc'], - capture_output=True, text=True, env=env) -print(f" rc: {r.returncode}", flush=True) + time.sleep(2) -for _ in range(10): - files = glob.glob(os.path.join(CKPT_DIR, 'ckpt_*.dmtcp')) - if files: - break - time.sleep(1) + print("Checkpointing...", flush=True) + r = subprocess.run( + ["dmtcp_command", "--port", str(port), "-bc"], + capture_output=True, text=True, env=env, + ) + if r.returncode != 0: + fatal(f"dmtcp_command failed (rc={r.returncode}): {r.stderr.strip()}") + + # Wait for checkpoint file to appear + for _ in range(10): + files = glob.glob(os.path.join(ckpt_dir, "ckpt_*.dmtcp")) + if files: + break + time.sleep(1) + else: + fatal("No checkpoint files created") + + subprocess.run( + ["dmtcp_command", "--port", str(port), "-k"], + capture_output=True, text=True, env=env, + ) + + size_mb = sum(os.path.getsize(f) for f in files) / (1024 * 1024) + print(f"Done. {ckpt_dir}/ ({size_mb:.0f}MB)", flush=True) -files = glob.glob(os.path.join(CKPT_DIR, 'ckpt_*.dmtcp')) -if not files: - print("ERROR: No checkpoint files created", flush=True) - sys.exit(1) -print("Killing coordinator...", flush=True) -subprocess.run(['dmtcp_command', '--port', str(port), '-k'], - capture_output=True, text=True, env=env) -print(f"Done. Checkpoint files: {[os.path.basename(f) for f in files]}", flush=True) +if __name__ == "__main__": + main() diff --git a/mcp/mcp_helpers.ml b/mcp/mcp_helpers.ml index f44bce10..8262a201 100644 --- a/mcp/mcp_helpers.ml +++ b/mcp/mcp_helpers.ml @@ -137,9 +137,32 @@ let mcp_json_apply_tactics (tacs : tactic list) = "{\"proved\":true,\"theorem\":" ^ mcp_json_string (string_of_thm th) ^ ",\"steps\":" ^ string_of_int !steps ^ "}" else - let gs = mcp_json_goalstate () in - let n = String.length gs in - String.sub gs 0 (n - 1) ^ ",\"steps\":" ^ string_of_int !steps ^ "}" + let buf = Buffer.create 256 in + (match !current_goalstack with + | [] -> + Buffer.add_string buf "{\"goals\":[],\"num_subgoals\":0,\"total_goals\":0" + | [_, gl, _] -> + let n = List.length gl in + Buffer.add_string buf "{\"goals\":"; + mcp_buf_goals buf gl; + Buffer.add_string buf ",\"num_subgoals\":"; + Buffer.add_string buf (string_of_int (min 1 n)); + Buffer.add_string buf ",\"total_goals\":"; + Buffer.add_string buf (string_of_int n) + | (_, gl, _) :: (_, gl0, _) :: _ -> + let n = List.length gl in + let p = n - List.length gl0 in + let num_sub = if p < 1 then 1 else p + 1 in + Buffer.add_string buf "{\"goals\":"; + mcp_buf_goals buf gl; + Buffer.add_string buf ",\"num_subgoals\":"; + Buffer.add_string buf (string_of_int num_sub); + Buffer.add_string buf ",\"total_goals\":"; + Buffer.add_string buf (string_of_int n)); + Buffer.add_string buf ",\"steps\":"; + Buffer.add_string buf (string_of_int !steps); + Buffer.add_char buf '}'; + Buffer.contents buf with | Failure msg -> "{\"error\":" ^ mcp_json_string msg ^ diff --git a/mcp/pyproject.toml b/mcp/pyproject.toml index 3853369d..28a9f86e 100644 --- a/mcp/pyproject.toml +++ b/mcp/pyproject.toml @@ -5,7 +5,7 @@ description = "MCP server for HOL Light theorem prover" readme = "README.md" requires-python = ">=3.11" dependencies = [ - "mcp[cli]>=1.26.0", + "mcp>=1.26.0", ] [project.scripts] diff --git a/mcp/server.py b/mcp/server.py index dfc8e6e4..af297daf 100644 --- a/mcp/server.py +++ b/mcp/server.py @@ -40,7 +40,7 @@ def _load_config(): _config, CONFIG_PATH = _load_config() TIMEOUT = _config.get("timeout", int(os.environ.get("HOL_TIMEOUT", "600"))) -CHECKPOINT_NAME = _config.get("checkpoint", os.environ.get("HOL_CHECKPOINT", "noledit")) +CHECKPOINT_NAME = _config.get("checkpoint", os.environ.get("HOL_CHECKPOINT", "base")) MAX_OUTPUT_CHARS = _config.get("max_output_chars", int(os.environ.get("HOL_MAX_OUTPUT", "4000"))) from mcp.server.fastmcp import FastMCP @@ -213,7 +213,14 @@ def _replay_prefix(): if not line: continue entry = json.loads(line) - if entry.get("action") == "tactic": + if entry.get("action") == "backtrack": + steps = entry.get("steps", 1) + removed = 0 + while removed < steps and replayed: + replayed.pop() + _eval_raw("b()") + removed += 1 + elif entry.get("action") == "tactic": result, _ = _eval_raw(f'e({entry["tactic"]})') if _is_error_output(_strip_ansi(result)): for _ in range(len(replayed)): @@ -224,9 +231,9 @@ def _replay_prefix(): for _ in range(len(replayed)): _eval_raw("b()") return - global _recording + global _recording, _recording_flushed _recording = replayed - _flush_recording() + _recording_flushed = len(replayed) # entries already on disk def _eval_raw(code: str, timeout: int = None) -> tuple[str, float]: @@ -526,6 +533,8 @@ def hol_restart() -> str: pass _proc = None _helpers_loaded = False + global _recording_flushed + _recording_flushed = len(_recording) _drain_queue() _reader_buf.clear() _start_hol() @@ -575,14 +584,21 @@ def _json_quote(s: str) -> str: # --- Proof recording helpers --- def _flush_recording(): - """Write the current recording to disk. - Rewrites the full file each time so backtrack deletions are reflected. + """Append new entries to the recording file. + Only writes entries added since the last flush (tracked by _recording_flushed). + Backtrack writes a marker so replay can skip undone tactics. """ import json - if _recording_path: - with open(_recording_path, 'w') as f: - for entry in _recording: - f.write(json.dumps(entry) + "\n") + if not _recording_path: + return + with open(_recording_path, 'a') as f: + global _recording_flushed + for entry in _recording[_recording_flushed:]: + f.write(json.dumps(entry) + "\n") + _recording_flushed = len(_recording) + + +_recording_flushed = 0 # index of first unflushed entry def _record_tactic(tactic_str, result_json_str): @@ -602,16 +618,21 @@ def _record_tactic(tactic_str, result_json_str): def _record_backtrack(steps): - """Remove the last N tactic entries from the recording.""" + """Record a backtrack marker and remove entries from in-memory list.""" + global _recording_flushed if not _recording_path: return removed = 0 while removed < steps and _recording: if _recording[-1]["action"] == "tactic": _recording.pop() + if _recording_flushed > len(_recording): + _recording_flushed = len(_recording) removed += 1 else: break + if removed > 0: + _recording.append({"action": "backtrack", "steps": removed}) _flush_recording() @@ -625,14 +646,18 @@ def _record_tactics_batch(tactics, result_json_str): except (json.JSONDecodeError, TypeError): return if "error" in result and "step" in result: - # Error at step N: record tactics 0..N-1 (0-indexed, step is 1-indexed) - succeeded = result["step"] - 1 + # step = number of tactics that succeeded before the error + succeeded = result["step"] elif "steps" in result: succeeded = result["steps"] else: return - for tac in tactics[:succeeded]: - _recording.append({"action": "tactic", "tactic": tac, "total_goals": 0}) + for i, tac in enumerate(tactics[:succeeded]): + if i == succeeded - 1: + total = 0 if result.get("proved") else result.get("total_goals", 0) + else: + total = 0 + _recording.append({"action": "tactic", "tactic": tac, "total_goals": total}) if succeeded > 0: _flush_recording() @@ -647,22 +672,43 @@ def _extract_e_tactic(code: str) -> str | None: depth = 1 in_str = False in_backtick = False + in_comment = 0 esc = False - for i in range(start, len(stripped)): + i = start + while i < len(stripped): c = stripped[i] if esc: esc = False + i += 1 + continue + if in_comment > 0: + if c == '(' and i + 1 < len(stripped) and stripped[i + 1] == '*': + in_comment += 1 + i += 2 + elif c == '*' and i + 1 < len(stripped) and stripped[i + 1] == ')': + in_comment -= 1 + i += 2 + else: + i += 1 continue if c == '\\' and in_str: esc = True + i += 1 continue if c == '"' and not in_backtick: in_str = not in_str + i += 1 continue if c == '`' and not in_str: in_backtick = not in_backtick + i += 1 continue if in_str or in_backtick: + i += 1 + continue + if c == '(' and i + 1 < len(stripped) and stripped[i + 1] == '*': + in_comment = 1 + i += 2 continue if c == '(': depth += 1 @@ -670,6 +716,7 @@ def _extract_e_tactic(code: str) -> str | None: depth -= 1 if depth == 0: return stripped[start:i].strip() + i += 1 return None @@ -730,7 +777,9 @@ def start_recording(path: str) -> str: os.makedirs(os.path.dirname(path) or ".", exist_ok=True) _recording_path = path _recording = [] - _flush_recording() + global _recording_flushed + _recording_flushed = 0 + open(path, 'w').close() # truncate any existing file return f"Recording started: {path}" diff --git a/mcp/smoke_test.py b/mcp/smoke_test.py index 68eef409..dff2dc0e 100644 --- a/mcp/smoke_test.py +++ b/mcp/smoke_test.py @@ -36,9 +36,10 @@ async def main(): # Check tools registered tools = await session.list_tools() - tool_names = sorted(t.name for t in tools.tools) - check("tools registered", tool_names == ["apply_tactic", "apply_tactics", "backtrack", "eval", "goal_state", "hol_help", "hol_interrupt", "hol_load", "hol_restart", "hol_status", "hol_type", "prove", "search_theorems", "set_goal", "start_recording", "stop_recording"], - f"got {tool_names}") + tool_names = set(t.name for t in tools.tools) + expected_tools = {"apply_tactic", "apply_tactics", "backtrack", "eval", "goal_state", "hol_help", "hol_interrupt", "hol_load", "hol_restart", "hol_status", "hol_type", "prove", "search_theorems", "set_goal", "start_recording", "stop_recording"} + check("tools registered", expected_tools.issubset(tool_names), + f"missing: {expected_tools - tool_names}") # eval — basic arithmetic (now returns JSON) r = await session.call_tool("eval", {"code": "ARITH_RULE `1 + 1 = 2`"}) diff --git a/mcp/test_server.py b/mcp/test_server.py index b4fb85c0..e15b7ba2 100644 --- a/mcp/test_server.py +++ b/mcp/test_server.py @@ -349,7 +349,10 @@ def test_recording_backtrack_removes_entry(tmp_path): server.stop_recording() with open(path) as f: entries = [json.loads(line) for line in f if line.strip()] - assert len(entries) == 0 + assert len(entries) == 2 + assert entries[0]["action"] == "tactic" + assert entries[1]["action"] == "backtrack" + assert entries[1]["steps"] == 1 def test_recording_skips_failed_tactic(tmp_path): @@ -405,5 +408,8 @@ def test_recording_backtrack_removes_last(tmp_path): server.stop_recording() with open(path) as f: entries = [json.loads(line) for line in f if line.strip()] - assert len(entries) == 1 + assert len(entries) == 3 assert entries[0]["tactic"] == "GEN_TAC" + assert entries[1]["tactic"] == "GEN_TAC" + assert entries[2]["action"] == "backtrack" + assert entries[2]["steps"] == 1 diff --git a/mcp/uv.lock b/mcp/uv.lock index b300154f..0f131ddf 100644 --- a/mcp/uv.lock +++ b/mcp/uv.lock @@ -2,15 +2,6 @@ version = 1 revision = 3 requires-python = ">=3.11" -[[package]] -name = "annotated-doc" -version = "0.0.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, -] - [[package]] name = "annotated-types" version = "0.7.0" @@ -215,7 +206,7 @@ name = "hol-light-mcp" version = "0.1.0" source = { editable = "." } dependencies = [ - { name = "mcp", extra = ["cli"] }, + { name = "mcp" }, ] [package.dev-dependencies] @@ -224,7 +215,7 @@ dev = [ ] [package.metadata] -requires-dist = [{ name = "mcp", extras = ["cli"], specifier = ">=1.26.0" }] +requires-dist = [{ name = "mcp", specifier = ">=1.26.0" }] [package.metadata.requires-dev] dev = [{ name = "pytest", specifier = ">=9.0.2" }] @@ -311,18 +302,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, ] -[[package]] -name = "markdown-it-py" -version = "4.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "mdurl" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" }, -] - [[package]] name = "mcp" version = "1.27.0" @@ -348,21 +327,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9c/46/f6b4ad632c67ef35209a66127e4bddc95759649dd595f71f13fba11bdf9a/mcp-1.27.0-py3-none-any.whl", hash = "sha256:5ce1fa81614958e267b21fb2aa34e0aea8e2c6ede60d52aba45fd47246b4d741", size = 215967, upload-time = "2026-04-02T14:48:07.24Z" }, ] -[package.optional-dependencies] -cli = [ - { name = "python-dotenv" }, - { name = "typer" }, -] - -[[package]] -name = "mdurl" -version = "0.1.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, -] - [[package]] name = "packaging" version = "26.0" @@ -606,19 +570,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" }, ] -[[package]] -name = "rich" -version = "14.3.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "markdown-it-py" }, - { name = "pygments" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b3/c6/f3b320c27991c46f43ee9d856302c70dc2d0fb2dba4842ff739d5f46b393/rich-14.3.3.tar.gz", hash = "sha256:b8daa0b9e4eef54dd8cf7c86c03713f53241884e814f4e2f5fb342fe520f639b", size = 230582, upload-time = "2026-02-19T17:23:12.474Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" }, -] - [[package]] name = "rpds-py" version = "0.30.0" @@ -727,15 +678,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/b7/b95708304cd49b7b6f82fdd039f1748b66ec2b21d6a45180910802f1abf1/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e", size = 562191, upload-time = "2025-11-30T20:24:36.853Z" }, ] -[[package]] -name = "shellingham" -version = "1.5.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, -] - [[package]] name = "sse-starlette" version = "3.3.4" @@ -762,21 +704,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl", hash = "sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b", size = 72651, upload-time = "2026-03-22T18:29:45.111Z" }, ] -[[package]] -name = "typer" -version = "0.24.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "annotated-doc" }, - { name = "click" }, - { name = "rich" }, - { name = "shellingham" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" }, -] - [[package]] name = "typing-extensions" version = "4.15.0"