diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 00000000..a0b3ad74
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,16 @@
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+
+  - package-ecosystem: "pip"
+    directory: "/mcp"
+    schedule:
+      interval: "weekly"
+    # Only alert on our direct dependencies, not transitive ones
+    # we can't control (pulled in by the mcp SDK).
+    allow:
+      - dependency-name: "mcp"
+      - dependency-name: "pytest"
diff --git a/.github/workflows/mcp.yml b/.github/workflows/mcp.yml
new file mode 100644
index 00000000..5bf7f73f
--- /dev/null
+++ b/.github/workflows/mcp.yml
@@ -0,0 +1,44 @@
+name: MCP Tests
+
+on:
+  workflow_dispatch:
+  pull_request:
+    branches: [master]
+    paths: ['mcp/**']
+  push:
+    branches: [master]
+    paths: ['mcp/**']
+
+jobs:
+  mcp-tests:
+    runs-on: ubuntu-22.04
+    timeout-minutes: 30
+    name: MCP unit & smoke tests
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install opam & build HOL Light
+        run: |
+          sudo apt update && sudo apt install -y opam xdot
+          opam init --disable-sandboxing
+          make switch
+          eval $(opam env)
+          make
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Install MCP dependencies
+        run: cd mcp && uv sync
+
+      - name: Run unit tests
+        run: |
+          eval $(opam env)
+          cd mcp && uv run pytest test_server.py -v
+
+      - name: Run smoke tests
+        run: |
+          eval $(opam env)
+          cd mcp && uv run python smoke_test.py
diff --git a/.gitignore b/.gitignore
index 0eb6e3ea..2dcfa717 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,3 +27,4 @@ unit_tests_inlined.ml
 unit_tests.byte
 unit_tests.native
 hol-*.ckpt
+__pycache__/
diff --git a/mcp/README.md b/mcp/README.md
index cafe3514..86c85116 100644
--- a/mcp/README.md
+++ b/mcp/README.md
@@ -47,6 +47,8 @@ See [TUTORIAL.md](TUTORIAL.md) for more examples (including s2n-bignum ARM proof
 | `hol_restart` | Kill and restart the HOL Light subprocess | Status message |
 | `hol_status` | Check process health, uptime, config, checkpoint | Structured JSON |
 | `hol_help` | Return tactic reference and proof guide (SKILL.md) | Markdown text |
+| `start_recording` | Start recording proof tactics to a JSONL file | Status message |
+| `stop_recording` | Stop recording and return the file path | Status message |
 
 `eval` returns `{"success", "output", "output_truncated", "full_output_chars", "time_seconds"}`. Large outputs are truncated to `max_output_chars` (default 4000, configurable). Override per-call with `max_output_chars=N`.
 
@@ -76,7 +78,7 @@ Create named checkpoints:
 ```bash
 export LD_LIBRARY_PATH="$HOME/.local/lib:$LD_LIBRARY_PATH"
 
-# Bare HOL Light (~75s)
+# Base HOL Light (~75s)
 python3 mcp/make_checkpoint.py --name base
 
 # With s2n-bignum ARM infrastructure (~5-10min)
@@ -136,8 +138,8 @@ The server includes a built-in `hol_help` tool that returns the full tactic refe
 
 ```bash
 cd mcp
-uv run pytest test_server.py -v       # 38 unit tests
-uv run python smoke_test.py           # 34 MCP integration checks
+uv run pytest test_server.py -v       # 48 unit tests
+uv run python smoke_test.py           # 37 MCP integration checks
 ```
 
 First run includes HOL Light startup (~75s cold, ~2s with checkpoint).
diff --git a/mcp/TUTORIAL.md b/mcp/TUTORIAL.md
index 7459f68e..5b2e64af 100644
--- a/mcp/TUTORIAL.md
+++ b/mcp/TUTORIAL.md
@@ -140,7 +140,7 @@ apply_tactic  "DISJ2_TAC THEN REWRITE_TAC[EVEN_ADD] THEN ASM_REWRITE_TAC[NOT_EVE
 8. **hol_status** — check if HOL Light is alive (useful for debugging)
 9. **hol_restart** — restart HOL Light if it has died or is in a bad state
 
-**Utility tools:** **hol_type** (get term types), **hol_load** (load files), **hol_interrupt** (cancel hung tactics), **hol_help** (tactic reference)
+**Utility tools:** **hol_type** (get term types), **hol_load** (load files), **hol_interrupt** (cancel hung tactics), **hol_help** (tactic reference), **start_recording** / **stop_recording** (record tactics to JSONL for replay)
 
 ---
 
diff --git a/mcp/hol-mcp.toml b/mcp/hol-mcp.toml
index 59996c6e..280db1ce 100644
--- a/mcp/hol-mcp.toml
+++ b/mcp/hol-mcp.toml
@@ -4,7 +4,7 @@
 
 # Which checkpoint to use (looks for hol-<name>.ckpt/ in HOL Light root).
 # Create with: python3 mcp/make_checkpoint.py --name <name> [options]
-checkpoint = "noledit"
+checkpoint = "base"
 
 # Timeout in seconds for HOL Light commands.
 timeout = 600
diff --git a/mcp/make_checkpoint.py b/mcp/make_checkpoint.py
index e4682a7b..9faf8a94 100644
--- a/mcp/make_checkpoint.py
+++ b/mcp/make_checkpoint.py
@@ -1,130 +1,176 @@
 #!/usr/bin/env python3
-"""Create a DMTCP checkpoint of HOL Light with piped stdin.
+"""Create a DMTCP checkpoint of HOL Light.
 
-Usage:
-    python3 make_checkpoint.py                                              # bare HOL Light
-    python3 make_checkpoint.py --name base                                  # named checkpoint
-    python3 make_checkpoint.py --name s2n -I /path/to/s2n-bignum 'needs "arm/proofs/base.ml"'
+Examples:
+    python3 mcp/make_checkpoint.py                          # base HOL Light
+    python3 mcp/make_checkpoint.py --name s2n \\
+        -I /path/to/s2n-bignum 'needs "arm/proofs/base.ml"'
 """
-import subprocess, time, os, socket, sys, glob
+import argparse
+import glob
+import os
+import shutil
+import socket
+import subprocess
+import sys
+import time
 
 HOL_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 SENTINEL = "HOL_MCP_CKPT_READY"
 
-# Parse args
-include_dirs = []
-extra_loads = []
-ckpt_name = "noledit"
-i = 1
-while i < len(sys.argv):
-    if sys.argv[i] == "-I" and i + 1 < len(sys.argv):
-        include_dirs.append(sys.argv[i + 1])
-        i += 2
-    elif sys.argv[i] == "--name" and i + 1 < len(sys.argv):
-        ckpt_name = sys.argv[i + 1]
-        i += 2
-    else:
-        extra_loads.append(sys.argv[i])
-        i += 1
-
-CKPT_DIR = os.path.join(HOL_DIR, f"hol-{ckpt_name}.ckpt")
-
-# Clean and recreate
-subprocess.run(['rm', '-rf', CKPT_DIR])
-os.makedirs(CKPT_DIR)
-
-s = socket.socket(); s.bind(('',0)); port = s.getsockname()[1]; s.close()
-os.environ['DMTCP_COORD_PORT'] = str(port)
-print(f"DMTCP port: {port}", flush=True)
-
-env = os.environ.copy()
-try:
-    r = subprocess.run(['opam', 'env', '--switch', HOL_DIR + '/', '--set-switch'],
-                       capture_output=True, text=True)
-    for line in r.stdout.strip().split('\n'):
-        if '=' in line and "'" in line:
-            key = line.split('=', 1)[0].strip()
-            val = line.split("'")[1]
-            env[key] = val
-except FileNotFoundError:
-    pass
-
-cmd = [
-    'dmtcp_launch', '--new-coordinator', '--ckptdir', CKPT_DIR,
-    os.path.join(HOL_DIR, 'ocaml-hol'), '-init',
-    os.path.join(HOL_DIR, 'hol.ml'), '-I', HOL_DIR,
-]
-for d in include_dirs:
-    cmd.extend(['-I', d])
-
-# Set HOLLIGHT_LOAD_PATH so needs/loadt can find files in include dirs
-if include_dirs:
-    existing = env.get('HOLLIGHT_LOAD_PATH', '')
-    extra = ':'.join(include_dirs)
-    env['HOLLIGHT_LOAD_PATH'] = f"{extra}:{existing}" if existing else extra
-
-print(f"HOL_DIR: {HOL_DIR}", flush=True)
-if include_dirs:
-    print(f"Include dirs: {include_dirs}", flush=True)
-
-p = subprocess.Popen(
-    cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-    text=True, bufsize=1, cwd=HOL_DIR, env=env)
-
-print("Waiting for HOL Light to load...", flush=True)
-while True:
-    line = p.stdout.readline()
-    if not line:
-        print("ERROR: EOF before HOL Light loaded", flush=True)
-        sys.exit(1)
-    if 'Camlp5' in line:
-        print("HOL Light loaded.", flush=True)
-        break
-
-for load_cmd in extra_loads:
-    print(f"Loading: {load_cmd}", flush=True)
-    p.stdin.write(f'{load_cmd};;\nPrintf.printf "{SENTINEL}\\n%!";;\n')
-    p.stdin.flush()
+
+def fatal(msg):
+    print(f"ERROR: {msg}", file=sys.stderr, flush=True)
+    sys.exit(1)
+
+
+def parse_args():
+    p = argparse.ArgumentParser(
+        description="Create a DMTCP checkpoint of HOL Light.",
+        epilog="Extra positional arguments are OCaml expressions evaluated after HOL Light loads "
+               "(e.g., 'needs \"arm/proofs/base.ml\"').",
+    )
+    p.add_argument("--name", default="base",
+                   help="Checkpoint name (creates hol-<name>.ckpt/). Default: base")
+    p.add_argument("-I", dest="include_dirs", action="append", default=[],
+                   help="Add OCaml include directory (can be repeated)")
+    p.add_argument("extra_loads", nargs="*", metavar="EXPR",
+                   help="OCaml expressions to evaluate before checkpointing")
+    return p.parse_args()
+
+
+def build_env():
+    """Build environment with opam switch and LD_LIBRARY_PATH."""
+    env = os.environ.copy()
+    ld_path = os.path.expanduser("~/.local/lib")
+    if os.path.isdir(ld_path):
+        env["LD_LIBRARY_PATH"] = ld_path + ":" + env.get("LD_LIBRARY_PATH", "")
+    try:
+        r = subprocess.run(
+            ["opam", "env", "--switch", HOL_DIR + "/", "--set-switch"],
+            capture_output=True, text=True,
+        )
+        for line in r.stdout.strip().split("\n"):
+            if "=" in line and "'" in line:
+                key = line.split("=", 1)[0].strip()
+                val = line.split("'")[1]
+                env[key] = val
+    except FileNotFoundError:
+        pass
+    return env
+
+
+def wait_for_line(proc, marker, error_msg):
+    """Read stdout until a line contains marker. Dies on EOF."""
     while True:
-        line = p.stdout.readline()
+        line = proc.stdout.readline()
         if not line:
-            print("ERROR: EOF during extra loading", flush=True)
-            sys.exit(1)
-        if SENTINEL in line:
-            print(f"  done.", flush=True)
-            break
-
-print("Compacting GC before checkpoint...", flush=True)
-p.stdin.write(f'Gc.compact ();;\nPrintf.printf "{SENTINEL}\\n%!";;\n')
-p.stdin.flush()
-while True:
-    line = p.stdout.readline()
-    if not line:
-        print("ERROR: EOF during Gc.compact", flush=True)
-        sys.exit(1)
-    if SENTINEL in line:
+            fatal(error_msg)
+        if marker in line:
+            return
+
+
+def send_and_wait(proc, code, error_msg):
+    """Send OCaml code and wait for sentinel."""
+    proc.stdin.write(f'{code};;\nPrintf.printf "{SENTINEL}\\n%!";;\n')
+    proc.stdin.flush()
+    wait_for_line(proc, SENTINEL, error_msg)
+
+
+def main():
+    args = parse_args()
+    ckpt_dir = os.path.join(HOL_DIR, f"hol-{args.name}.ckpt")
+    ocaml_hol = os.path.join(HOL_DIR, "ocaml-hol")
+
+    # Validate prerequisites
+    if not os.path.isfile(ocaml_hol):
+        fatal(f"ocaml-hol not found at {ocaml_hol}\n  Run: make switch && eval $(opam env) && make")
+    if not shutil.which("dmtcp_launch"):
+        fatal("dmtcp_launch not found on PATH\n  See mcp/README.md for install instructions")
+    for d in args.include_dirs:
+        if not os.path.isdir(d):
+            fatal(f"Include directory not found: {d}")
+
+    # Clean and recreate checkpoint dir
+    if os.path.exists(ckpt_dir):
+        shutil.rmtree(ckpt_dir)
+    os.makedirs(ckpt_dir)
+
+    # Find a free port for DMTCP coordinator
+    s = socket.socket()
+    s.bind(("", 0))
+    port = s.getsockname()[1]
+    s.close()
+    os.environ["DMTCP_COORD_PORT"] = str(port)
+
+    env = build_env()
+
+    # Build command
+    cmd = [
+        "dmtcp_launch", "--new-coordinator", "--ckptdir", ckpt_dir,
+        ocaml_hol, "-init", os.path.join(HOL_DIR, "hol.ml"), "-I", HOL_DIR,
+    ]
+    for d in args.include_dirs:
+        cmd.extend(["-I", d])
+
+    if args.include_dirs:
+        existing = env.get("HOLLIGHT_LOAD_PATH", "")
+        extra = ":".join(args.include_dirs)
+        env["HOLLIGHT_LOAD_PATH"] = f"{extra}:{existing}" if existing else extra
+
+    # Print plan
+    print(f"Checkpoint: hol-{args.name}.ckpt/", flush=True)
+    if args.include_dirs:
+        print(f"Include dirs: {args.include_dirs}", flush=True)
+    if args.extra_loads:
+        print(f"Extra loads: {args.extra_loads}", flush=True)
+
+    # Launch HOL Light under DMTCP
+    p = subprocess.Popen(
+        cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+        text=True, bufsize=1, cwd=HOL_DIR, env=env,
+    )
+
+    print("Waiting for HOL Light to load (~75s)...", flush=True)
+    wait_for_line(p, "Camlp5", "HOL Light died before loading. Check that 'make' succeeded.")
+    print("HOL Light loaded.", flush=True)
+
+    for expr in args.extra_loads:
+        print(f"Loading: {expr}", flush=True)
+        send_and_wait(p, expr, f"HOL Light died while loading: {expr}")
         print("  done.", flush=True)
-        break
 
-time.sleep(2)
+    print("Compacting GC...", flush=True)
+    send_and_wait(p, "Gc.compact ()", "HOL Light died during Gc.compact")
+    print("  done.", flush=True)
 
-print("Checkpointing...", flush=True)
-r = subprocess.run(['dmtcp_command', '--port', str(port), '-bc'],
-                    capture_output=True, text=True, env=env)
-print(f"  rc: {r.returncode}", flush=True)
+    time.sleep(2)
 
-for _ in range(10):
-    files = glob.glob(os.path.join(CKPT_DIR, 'ckpt_*.dmtcp'))
-    if files:
-        break
-    time.sleep(1)
+    print("Checkpointing...", flush=True)
+    r = subprocess.run(
+        ["dmtcp_command", "--port", str(port), "-bc"],
+        capture_output=True, text=True, env=env,
+    )
+    if r.returncode != 0:
+        fatal(f"dmtcp_command failed (rc={r.returncode}): {r.stderr.strip()}")
+
+    # Wait for checkpoint file to appear
+    for _ in range(10):
+        files = glob.glob(os.path.join(ckpt_dir, "ckpt_*.dmtcp"))
+        if files:
+            break
+        time.sleep(1)
+    else:
+        fatal("No checkpoint files created")
+
+    subprocess.run(
+        ["dmtcp_command", "--port", str(port), "-k"],
+        capture_output=True, text=True, env=env,
+    )
+
+    size_mb = sum(os.path.getsize(f) for f in files) / (1024 * 1024)
+    print(f"Done. {ckpt_dir}/ ({size_mb:.0f}MB)", flush=True)
 
-files = glob.glob(os.path.join(CKPT_DIR, 'ckpt_*.dmtcp'))
-if not files:
-    print("ERROR: No checkpoint files created", flush=True)
-    sys.exit(1)
 
-print("Killing coordinator...", flush=True)
-subprocess.run(['dmtcp_command', '--port', str(port), '-k'],
-               capture_output=True, text=True, env=env)
-print(f"Done. Checkpoint files: {[os.path.basename(f) for f in files]}", flush=True)
+if __name__ == "__main__":
+    main()
diff --git a/mcp/mcp_helpers.ml b/mcp/mcp_helpers.ml
index f44bce10..8262a201 100644
--- a/mcp/mcp_helpers.ml
+++ b/mcp/mcp_helpers.ml
@@ -137,9 +137,32 @@ let mcp_json_apply_tactics (tacs : tactic list) =
       "{\"proved\":true,\"theorem\":" ^ mcp_json_string (string_of_thm th) ^
       ",\"steps\":" ^ string_of_int !steps ^ "}"
     else
-      let gs = mcp_json_goalstate () in
-      let n = String.length gs in
-      String.sub gs 0 (n - 1) ^ ",\"steps\":" ^ string_of_int !steps ^ "}"
+      let buf = Buffer.create 256 in
+      (match !current_goalstack with
+      | [] ->
+        Buffer.add_string buf "{\"goals\":[],\"num_subgoals\":0,\"total_goals\":0"
+      | [_, gl, _] ->
+        let n = List.length gl in
+        Buffer.add_string buf "{\"goals\":";
+        mcp_buf_goals buf gl;
+        Buffer.add_string buf ",\"num_subgoals\":";
+        Buffer.add_string buf (string_of_int (min 1 n));
+        Buffer.add_string buf ",\"total_goals\":";
+        Buffer.add_string buf (string_of_int n)
+      | (_, gl, _) :: (_, gl0, _) :: _ ->
+        let n = List.length gl in
+        let p = n - List.length gl0 in
+        let num_sub = if p < 1 then 1 else p + 1 in
+        Buffer.add_string buf "{\"goals\":";
+        mcp_buf_goals buf gl;
+        Buffer.add_string buf ",\"num_subgoals\":";
+        Buffer.add_string buf (string_of_int num_sub);
+        Buffer.add_string buf ",\"total_goals\":";
+        Buffer.add_string buf (string_of_int n));
+      Buffer.add_string buf ",\"steps\":";
+      Buffer.add_string buf (string_of_int !steps);
+      Buffer.add_char buf '}';
+      Buffer.contents buf
   with
   | Failure msg ->
     "{\"error\":" ^ mcp_json_string msg ^
diff --git a/mcp/pyproject.toml b/mcp/pyproject.toml
index 3853369d..28a9f86e 100644
--- a/mcp/pyproject.toml
+++ b/mcp/pyproject.toml
@@ -5,7 +5,7 @@ description = "MCP server for HOL Light theorem prover"
 readme = "README.md"
 requires-python = ">=3.11"
 dependencies = [
-    "mcp[cli]>=1.26.0",
+    "mcp>=1.26.0",
 ]
 
 [project.scripts]
diff --git a/mcp/server.py b/mcp/server.py
index dfc8e6e4..af297daf 100644
--- a/mcp/server.py
+++ b/mcp/server.py
@@ -40,7 +40,7 @@ def _load_config():
 
 _config, CONFIG_PATH = _load_config()
 TIMEOUT = _config.get("timeout", int(os.environ.get("HOL_TIMEOUT", "600")))
-CHECKPOINT_NAME = _config.get("checkpoint", os.environ.get("HOL_CHECKPOINT", "noledit"))
+CHECKPOINT_NAME = _config.get("checkpoint", os.environ.get("HOL_CHECKPOINT", "base"))
 MAX_OUTPUT_CHARS = _config.get("max_output_chars", int(os.environ.get("HOL_MAX_OUTPUT", "4000")))
 
 from mcp.server.fastmcp import FastMCP
@@ -213,7 +213,14 @@ def _replay_prefix():
                 if not line:
                     continue
                 entry = json.loads(line)
-                if entry.get("action") == "tactic":
+                if entry.get("action") == "backtrack":
+                    steps = entry.get("steps", 1)
+                    removed = 0
+                    while removed < steps and replayed:
+                        replayed.pop()
+                        _eval_raw("b()")
+                        removed += 1
+                elif entry.get("action") == "tactic":
                     result, _ = _eval_raw(f'e({entry["tactic"]})')
                     if _is_error_output(_strip_ansi(result)):
                         for _ in range(len(replayed)):
@@ -224,9 +231,9 @@ def _replay_prefix():
         for _ in range(len(replayed)):
             _eval_raw("b()")
         return
-    global _recording
+    global _recording, _recording_flushed
     _recording = replayed
-    _flush_recording()
+    _recording_flushed = len(replayed)  # entries already on disk
 
 
 def _eval_raw(code: str, timeout: int = None) -> tuple[str, float]:
@@ -526,6 +533,8 @@ def hol_restart() -> str:
                 pass
             _proc = None
         _helpers_loaded = False
+        global _recording_flushed
+        _recording_flushed = len(_recording)
         _drain_queue()
         _reader_buf.clear()
         _start_hol()
@@ -575,14 +584,21 @@ def _json_quote(s: str) -> str:
 # --- Proof recording helpers ---
 
 def _flush_recording():
-    """Write the current recording to disk.
-    Rewrites the full file each time so backtrack deletions are reflected.
+    """Append new entries to the recording file.
+    Only writes entries added since the last flush (tracked by _recording_flushed).
+    Backtrack writes a marker so replay can skip undone tactics.
     """
     import json
-    if _recording_path:
-        with open(_recording_path, 'w') as f:
-            for entry in _recording:
-                f.write(json.dumps(entry) + "\n")
+    if not _recording_path:
+        return
+    with open(_recording_path, 'a') as f:
+        global _recording_flushed
+        for entry in _recording[_recording_flushed:]:
+            f.write(json.dumps(entry) + "\n")
+        _recording_flushed = len(_recording)
+
+
+_recording_flushed = 0  # index of first unflushed entry
 
 
 def _record_tactic(tactic_str, result_json_str):
@@ -602,16 +618,21 @@ def _record_tactic(tactic_str, result_json_str):
 
 
 def _record_backtrack(steps):
-    """Remove the last N tactic entries from the recording."""
+    """Record a backtrack marker and remove entries from in-memory list."""
+    global _recording_flushed
     if not _recording_path:
         return
     removed = 0
     while removed < steps and _recording:
         if _recording[-1]["action"] == "tactic":
             _recording.pop()
+            if _recording_flushed > len(_recording):
+                _recording_flushed = len(_recording)
             removed += 1
         else:
             break
+    if removed > 0:
+        _recording.append({"action": "backtrack", "steps": removed})
     _flush_recording()
 
 
@@ -625,14 +646,18 @@ def _record_tactics_batch(tactics, result_json_str):
     except (json.JSONDecodeError, TypeError):
         return
     if "error" in result and "step" in result:
-        # Error at step N: record tactics 0..N-1 (0-indexed, step is 1-indexed)
-        succeeded = result["step"] - 1
+        # step = number of tactics that succeeded before the error
+        succeeded = result["step"]
     elif "steps" in result:
         succeeded = result["steps"]
     else:
         return
-    for tac in tactics[:succeeded]:
-        _recording.append({"action": "tactic", "tactic": tac, "total_goals": 0})
+    for i, tac in enumerate(tactics[:succeeded]):
+        if i == succeeded - 1:
+            total = 0 if result.get("proved") else result.get("total_goals", 0)
+        else:
+            total = 0
+        _recording.append({"action": "tactic", "tactic": tac, "total_goals": total})
     if succeeded > 0:
         _flush_recording()
 
@@ -647,22 +672,43 @@ def _extract_e_tactic(code: str) -> str | None:
     depth = 1
     in_str = False
     in_backtick = False
+    in_comment = 0
     esc = False
-    for i in range(start, len(stripped)):
+    i = start
+    while i < len(stripped):
         c = stripped[i]
         if esc:
             esc = False
+            i += 1
+            continue
+        if in_comment > 0:
+            if c == '(' and i + 1 < len(stripped) and stripped[i + 1] == '*':
+                in_comment += 1
+                i += 2
+            elif c == '*' and i + 1 < len(stripped) and stripped[i + 1] == ')':
+                in_comment -= 1
+                i += 2
+            else:
+                i += 1
             continue
         if c == '\\' and in_str:
             esc = True
+            i += 1
             continue
         if c == '"' and not in_backtick:
             in_str = not in_str
+            i += 1
             continue
         if c == '`' and not in_str:
             in_backtick = not in_backtick
+            i += 1
             continue
         if in_str or in_backtick:
+            i += 1
+            continue
+        if c == '(' and i + 1 < len(stripped) and stripped[i + 1] == '*':
+            in_comment = 1
+            i += 2
             continue
         if c == '(':
             depth += 1
@@ -670,6 +716,7 @@ def _extract_e_tactic(code: str) -> str | None:
             depth -= 1
             if depth == 0:
                 return stripped[start:i].strip()
+        i += 1
     return None
 
 
@@ -730,7 +777,9 @@ def start_recording(path: str) -> str:
         os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
         _recording_path = path
         _recording = []
-        _flush_recording()
+        global _recording_flushed
+        _recording_flushed = 0
+        open(path, 'w').close()  # truncate any existing file
     return f"Recording started: {path}"
 
 
diff --git a/mcp/smoke_test.py b/mcp/smoke_test.py
index 68eef409..dff2dc0e 100644
--- a/mcp/smoke_test.py
+++ b/mcp/smoke_test.py
@@ -36,9 +36,10 @@ async def main():
 
             # Check tools registered
             tools = await session.list_tools()
-            tool_names = sorted(t.name for t in tools.tools)
-            check("tools registered", tool_names == ["apply_tactic", "apply_tactics", "backtrack", "eval", "goal_state", "hol_help", "hol_interrupt", "hol_load", "hol_restart", "hol_status", "hol_type", "prove", "search_theorems", "set_goal", "start_recording", "stop_recording"],
-                  f"got {tool_names}")
+            tool_names = set(t.name for t in tools.tools)
+            expected_tools = {"apply_tactic", "apply_tactics", "backtrack", "eval", "goal_state", "hol_help", "hol_interrupt", "hol_load", "hol_restart", "hol_status", "hol_type", "prove", "search_theorems", "set_goal", "start_recording", "stop_recording"}
+            check("tools registered", expected_tools.issubset(tool_names),
+                  f"missing: {expected_tools - tool_names}")
 
             # eval — basic arithmetic (now returns JSON)
             r = await session.call_tool("eval", {"code": "ARITH_RULE `1 + 1 = 2`"})
diff --git a/mcp/test_server.py b/mcp/test_server.py
index b4fb85c0..e15b7ba2 100644
--- a/mcp/test_server.py
+++ b/mcp/test_server.py
@@ -349,7 +349,10 @@ def test_recording_backtrack_removes_entry(tmp_path):
     server.stop_recording()
     with open(path) as f:
         entries = [json.loads(line) for line in f if line.strip()]
-    assert len(entries) == 0
+    assert len(entries) == 2
+    assert entries[0]["action"] == "tactic"
+    assert entries[1]["action"] == "backtrack"
+    assert entries[1]["steps"] == 1
 
 
 def test_recording_skips_failed_tactic(tmp_path):
@@ -405,5 +408,8 @@ def test_recording_backtrack_removes_last(tmp_path):
     server.stop_recording()
     with open(path) as f:
         entries = [json.loads(line) for line in f if line.strip()]
-    assert len(entries) == 1
+    assert len(entries) == 3
     assert entries[0]["tactic"] == "GEN_TAC"
+    assert entries[1]["tactic"] == "GEN_TAC"
+    assert entries[2]["action"] == "backtrack"
+    assert entries[2]["steps"] == 1
diff --git a/mcp/uv.lock b/mcp/uv.lock
index b300154f..0f131ddf 100644
--- a/mcp/uv.lock
+++ b/mcp/uv.lock
@@ -2,15 +2,6 @@ version = 1
 revision = 3
 requires-python = ">=3.11"
 
-[[package]]
-name = "annotated-doc"
-version = "0.0.4"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" },
-]
-
 [[package]]
 name = "annotated-types"
 version = "0.7.0"
@@ -215,7 +206,7 @@ name = "hol-light-mcp"
 version = "0.1.0"
 source = { editable = "." }
 dependencies = [
-    { name = "mcp", extra = ["cli"] },
+    { name = "mcp" },
 ]
 
 [package.dev-dependencies]
@@ -224,7 +215,7 @@ dev = [
 ]
 
 [package.metadata]
-requires-dist = [{ name = "mcp", extras = ["cli"], specifier = ">=1.26.0" }]
+requires-dist = [{ name = "mcp", specifier = ">=1.26.0" }]
 
 [package.metadata.requires-dev]
 dev = [{ name = "pytest", specifier = ">=9.0.2" }]
@@ -311,18 +302,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" },
 ]
 
-[[package]]
-name = "markdown-it-py"
-version = "4.0.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "mdurl" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
-]
-
 [[package]]
 name = "mcp"
 version = "1.27.0"
@@ -348,21 +327,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9c/46/f6b4ad632c67ef35209a66127e4bddc95759649dd595f71f13fba11bdf9a/mcp-1.27.0-py3-none-any.whl", hash = "sha256:5ce1fa81614958e267b21fb2aa34e0aea8e2c6ede60d52aba45fd47246b4d741", size = 215967, upload-time = "2026-04-02T14:48:07.24Z" },
 ]
 
-[package.optional-dependencies]
-cli = [
-    { name = "python-dotenv" },
-    { name = "typer" },
-]
-
-[[package]]
-name = "mdurl"
-version = "0.1.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
-]
-
 [[package]]
 name = "packaging"
 version = "26.0"
@@ -606,19 +570,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" },
 ]
 
-[[package]]
-name = "rich"
-version = "14.3.3"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "markdown-it-py" },
-    { name = "pygments" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/b3/c6/f3b320c27991c46f43ee9d856302c70dc2d0fb2dba4842ff739d5f46b393/rich-14.3.3.tar.gz", hash = "sha256:b8daa0b9e4eef54dd8cf7c86c03713f53241884e814f4e2f5fb342fe520f639b", size = 230582, upload-time = "2026-02-19T17:23:12.474Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" },
-]
-
 [[package]]
 name = "rpds-py"
 version = "0.30.0"
@@ -727,15 +678,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d1/b7/b95708304cd49b7b6f82fdd039f1748b66ec2b21d6a45180910802f1abf1/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e", size = 562191, upload-time = "2025-11-30T20:24:36.853Z" },
 ]
 
-[[package]]
-name = "shellingham"
-version = "1.5.4"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
-]
-
 [[package]]
 name = "sse-starlette"
 version = "3.3.4"
@@ -762,21 +704,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0b/c9/584bc9651441b4ba60cc4d557d8a547b5aff901af35bda3a4ee30c819b82/starlette-1.0.0-py3-none-any.whl", hash = "sha256:d3ec55e0bb321692d275455ddfd3df75fff145d009685eb40dc91fc66b03d38b", size = 72651, upload-time = "2026-03-22T18:29:45.111Z" },
 ]
 
-[[package]]
-name = "typer"
-version = "0.24.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "annotated-doc" },
-    { name = "click" },
-    { name = "rich" },
-    { name = "shellingham" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" },
-]
-
 [[package]]
 name = "typing-extensions"
 version = "4.15.0"