From 0678950ebeb05b88225368cf71b2aeb5039ced57 Mon Sep 17 00:00:00 2001
From: monkscode <1dhruvilvyas@gmail.com>
Date: Thu, 23 Apr 2026 16:43:13 +0530
Subject: [PATCH 1/2] feat: Update environment configuration and enhance
 logging for local development; normalize Robot Framework CSS selectors

---
 .env.example                                  | 22 ++----
 run.sh                                        | 18 +++++
 src/backend/.env.example                      |  7 ++
 src/backend/config/logging_config.py          |  2 +-
 src/backend/crew_ai/crew.py                   | 10 +++
 .../library_context/browser_context.py        |  7 +-
 .../optimization/nl_feedback_engine.py        | 33 +++++++-
 src/backend/crew_ai/robot_code_normalizer.py  | 78 +++++++++++++++++++
 src/backend/crew_ai/tasks.py                  |  3 +
 src/backend/services/workflow_service.py      |  5 ++
 tools/browser_use_service.py                  | 18 ++++-
 11 files changed, 182 insertions(+), 21 deletions(-)
 create mode 100644 src/backend/crew_ai/robot_code_normalizer.py
diff --git a/.env.example b/.env.example
index fce6b4f..d6c1c15 100644
--- a/.env.example
+++ b/.env.example
@@ -7,21 +7,15 @@
 #   - fastapi-latest / browser-service-latest (stable from main)
 #   - fastapi-develop / browser-service-develop (latest from develop)
 #   - fastapi-1001 / browser-service-1001 (specific version)
-FASTAPI_IMAGE_TAG=monkscode/nlrf:fastapi-pr-50
-BROWSER_SERVICE_IMAGE_TAG=monkscode/nlrf:browser-service-pr-50
-TEST_RUNNER_IMAGE_TAG=monkscode/nlrf:test-runner-pr-50
+FASTAPI_IMAGE_TAG=monkscode/nlrf:fastapi-local
+BROWSER_SERVICE_IMAGE_TAG=monkscode/nlrf:browser-service-local
+TEST_RUNNER_IMAGE_TAG=monkscode/nlrf:test-runner-local
 
-# --- Docker-in-Docker Configuration ---
-# CRITICAL: Set this to the host machine's absolute path to robot_tests directory
-# This is required for test execution in spawned Docker containers
-# 
-# Linux/Mac: Use $(pwd)/robot_tests
-# Windows: Use ${PWD}/robot_tests or full path like C:/Users/YourName/project/robot_tests
-#
-# Example values:
-#   Linux/Mac: /home/user/Natural-Language-to-Robot-Framework/robot_tests
-#   Windows: C:/Users/<username>/Documents/GitHub/Natural-Language-to-Robot-Framework/robot_tests
-HOST_ROBOT_TESTS_DIR=./robot_tests
+# --- Docker-in-Docker bind mount ---
+# HOST_ROBOT_TESTS_DIR is auto-set by docker-compose.yml to ${PWD}/robot_tests
+# and by run.sh to the absolute script directory. You normally do not need to set
+# it here. Override only if you run FastAPI outside compose (e.g., `docker run`)
+# and need a custom absolute host path for test-runner bind mounts.
 
 # Maximum time in seconds to wait for a test container to finish execution
 # Default: 1800 (30 minutes). Increase for very long test suites.
diff --git a/run.sh b/run.sh
index 270cee6..1c4d7b7 100755
--- a/run.sh
+++ b/run.sh
@@ -8,6 +8,10 @@ export PYTHONUTF8=1
 # This uses pure Python protobuf parsing (slower but compatible)
 export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
 
+# Add Docker Desktop binaries to PATH so docker-credential-desktop is resolvable
+# by the Python docker SDK when authenticating against Docker Hub on Windows.
+export PATH="/c/Program Files/Docker/Docker/resources/bin:$PATH"
+
 # Check for .env file
 if [ ! -f "src/backend/.env" ]; then
     echo "Error: src/backend/.env file not found."
@@ -20,11 +24,25 @@ set -a
 source src/backend/.env
 set +a
 
+# --- Local-dev overrides (run.sh process only, never containers) ---
+# These override values from src/backend/.env for processes launched by this script.
+# Docker Compose reads src/backend/.env directly and does NOT source run.sh, so the
+# on-disk value (BROWSER_HEADLESS=true) remains authoritative for containers.
+export BROWSER_HEADLESS=false
+export LOG_FORMAT=console          # human-readable colored logs
+# export CREWAI_VERBOSE=true         # show agent reasoning in console
+
 # Support both APP_PORT (new) and PORT (legacy) variables with a sane default
 APP_PORT="${APP_PORT:-${PORT:-5000}}"
 export APP_PORT
 export PORT="$APP_PORT"
 
+# When running locally (not inside Docker Compose), HOST_ROBOT_TESTS_DIR must be
+# an absolute Windows path so the Docker daemon can resolve the bind mount.
+# The .env value (./robot_tests) is relative and only valid inside Docker Compose.
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -W 2>/dev/null || pwd)"
+export HOST_ROBOT_TESTS_DIR="${SCRIPT_DIR}/robot_tests"
+
 # Cross-platform venv activation and path handling
 VENV_DIR="venv"
 if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" || "$OSTYPE" == "win32" ]]; then
diff --git a/src/backend/.env.example b/src/backend/.env.example
index 434e7e5..5938b0b 100644
--- a/src/backend/.env.example
+++ b/src/backend/.env.example
@@ -150,6 +150,13 @@ OPTIMIZATION_CONTEXT_PRUNING_ENABLED=true
 # Default: 0.6
 OPTIMIZATION_CONTEXT_PRUNING_THRESHOLD=0.6
 
+# --- Log Format ---
+# Controls log output format for both FastAPI and BrowserUse services.
+# Options:
+#   console — human-readable colored output (recommended for local development)
+#   (unset) — JSON structured output (recommended for production/log aggregation)
+LOG_FORMAT=
+
 # --- LLM Observability ---
 # Controls where OTel traces are exported. Options:
 #   sqlite  — Built-in SQLite trace store at data/llm_traces.db (zero infra, default)
diff --git a/src/backend/config/logging_config.py b/src/backend/config/logging_config.py
index 495dffb..4b24a27 100644
--- a/src/backend/config/logging_config.py
+++ b/src/backend/config/logging_config.py
@@ -67,7 +67,7 @@ def setup_logging(log_dir: str = "logs", log_level: str = "INFO") -> None:
     )
 
     renderer = (
-        structlog.dev.ConsoleRenderer()
+        structlog.dev.ConsoleRenderer(colors=sys.stdout.isatty())
         if os.environ.get("LOG_FORMAT", "").lower() == "console"
         else structlog.processors.JSONRenderer()
     )
diff --git a/src/backend/crew_ai/crew.py b/src/backend/crew_ai/crew.py
index 1b3cb1c..75be55a 100644
--- a/src/backend/crew_ai/crew.py
+++ b/src/backend/crew_ai/crew.py
@@ -416,6 +416,16 @@ def run_crew(query: str, model_provider: str, model_name: str, library_type: str
             if optimization_metrics:
                 logger.info("📊 Optimization metrics collected")
 
+            # CrewAI's event bus dispatches sync handlers via ThreadPoolExecutor (fire-and-forget).
+            # TaskCompletedEvent for task 3 is submitted to the pool but may execute AFTER
+            # kickoff() returns and unregister_workflow() clears the routing maps — causing
+            # _resolve() to return None and silently drop the 100% event.
+            # Pushing 100% here (success path only) guarantees delivery before unregistration.
+            # _push_if_forward deduplicates: if the thread pool wins the race, this is a no-op.
+            if progress_queue is not None:
+                from src.backend.crew_ai.progress_events import _push_if_forward
+                _push_if_forward(workflow_id, progress_queue, "🎉 Test generation complete", 100)
+
             return result, crew, optimization_metrics, hint_metadata, agents.llm._monitor
 
         except Exception as e:
diff --git a/src/backend/crew_ai/library_context/browser_context.py b/src/backend/crew_ai/library_context/browser_context.py
index 8798bed..1399d4e 100644
--- a/src/backend/crew_ai/library_context/browser_context.py
+++ b/src/backend/crew_ai/library_context/browser_context.py
@@ -87,7 +87,9 @@ def core_rules(self) -> str:
    text > role > data-testid > id > css > xpath
    - text=<value> → Most stable
    - role=<role>[name="<name>"] → Accessibility-first
-   - CSS selectors need no prefix
+   - css=<selector> → Always prefix CSS selectors with `css=` (e.g. `css=#searchBox`, `css=.btn`).
+     A bare `#` at the start of a variable value or argument is parsed as a Robot Framework
+     comment and the locator becomes empty at runtime.
 
 6. **COMMON PITFALLS:**
    ❌ Missing viewport config → Elements not found
@@ -205,7 +207,8 @@ def code_assembly_context(self) -> str:
 2. MUST include "New Context    viewport=None" for proper element detection
 3. Browser Library uses 'browser' and 'headless' parameters (NOT 'options')
 4. Browser Library auto-waits, so explicit waits are rarely needed
-5. Locators can be CSS selectors without prefix
+5. Always prefix CSS selectors with `css=` (e.g. `css=#searchBox`, `css=.btn`) — a bare `#` at the
+   start of a variable value is parsed as a Robot Framework comment and the locator becomes empty
 6. Text and role selectors are preferred for stability
 
 **KEYWORD REFERENCE:**
diff --git a/src/backend/crew_ai/optimization/nl_feedback_engine.py b/src/backend/crew_ai/optimization/nl_feedback_engine.py
index a49b78d..3714d88 100644
--- a/src/backend/crew_ai/optimization/nl_feedback_engine.py
+++ b/src/backend/crew_ai/optimization/nl_feedback_engine.py
@@ -450,12 +450,43 @@ def get_hints(
 
         # Format as hint strings (cap at 5)
         hints = [
-            f"\u26a0\ufe0f USER FEEDBACK: {h['feedback_text']}"
+            self._format_feedback_hint(h['feedback_text'])
             for h in deduped[:5]
         ]
 
         return hints if hints else None
 
+    # Detects Robot Framework variable syntax anywhere in a line
+    _RF_VAR_RE = re.compile(r'\$\{[A-Za-z_][A-Za-z0-9_ ]*\}')
+
+    def _format_feedback_hint(self, feedback_text: str) -> str:
+        # Wrap RF code lines in a labelled code block so the LLM treats them
+        # as reference examples only, not literal output to reproduce.
+        prose_lines: list[str] = []
+        code_lines: list[str] = []
+
+        for line in feedback_text.splitlines():
+            if self._RF_VAR_RE.search(line):
+                code_lines.append(line)
+            else:
+                stripped = line.strip()
+                if stripped:
+                    prose_lines.append(stripped)
+
+        parts: list[str] = []
+        if prose_lines:
+            parts.append(" ".join(prose_lines))
+        if code_lines:
+            parts.append(
+                "WRONG CODE EXAMPLE — reference only, do NOT reproduce in output:\n"
+                "```robot\n"
+                + "\n".join(code_lines)
+                + "\n```"
+            )
+
+        body = "\n".join(parts) if parts else feedback_text
+        return f"\u26a0\ufe0f USER FEEDBACK: {body}"
+    
     def update_hint_effectiveness(
         self,
         domain: Optional[str],
diff --git a/src/backend/crew_ai/robot_code_normalizer.py b/src/backend/crew_ai/robot_code_normalizer.py
new file mode 100644
index 0000000..f68a46e
--- /dev/null
+++ b/src/backend/crew_ai/robot_code_normalizer.py
@@ -0,0 +1,78 @@
+"""
+Deterministic post-processor for Robot Framework code emitted by the Code Assembler.
+
+Robot Framework treats `#` as a comment marker wherever it appears at the start of a
+cell (variable value or keyword argument). A bare CSS selector like `#searchBox`
+therefore becomes an empty string at parse time, and tests fail with
+`locator.fill: Unexpected token "" while parsing css selector ""`.
+
+This module deterministically prefixes bare CSS selectors (`#id`, `.class`, or combined
+forms like `#id.active`, `.btn.primary`, `#id[attr='v']`) with `css=` so RF parses
+them as values rather than comments.
+
+Runs AFTER the Code Assembler returns. Complements the prompt guidance in
+`library_context/browser_context.py` and `library_context/selenium_context.py` as a
+belt-and-suspenders guarantee, since the LLM occasionally ignores prompt rules.
+
+Referenced by:
+    src.backend.services.workflow_service._run_crew_thread (after tasks[2] extraction)
+"""
+
+import logging
+import re
+
+logger = logging.getLogger(__name__)
+
+
+# A cell qualifies as a bare CSS selector when it starts with `#` or `.` followed
+# by a CSS identifier character. Anchoring on the start is enough: a cell can
+# legitimately contain CSS combinators with single spaces (e.g. `#id > .child`)
+# because RF cell boundaries require 2+ spaces or a tab, so single spaces live
+# inside one cell. Enumerating all valid CSS syntax in a regex would be brittle;
+# start-of-cell anchoring avoids that and still rejects comments like `# note`
+# (space after `#` fails the second-char test).
+_BARE_CSS_RE = re.compile(r"^[#.][A-Za-z_]")
+
+# Robot Framework cell boundary: two-or-more spaces, or one-or-more tabs.
+_CELL_SPLIT_RE = re.compile(r"(\s{2,}|\t+)")
+
+
+def normalize_robot_code(robot_code: str) -> str:
+    """
+    Prefix bare CSS selectors in a Robot Framework file with `css=`.
+
+    Only rewrites cells that look like bare CSS selectors. Leaves untouched:
+      - Already-prefixed locators (css=, xpath=, id=, name=, text=, role=, ...)
+      - Attribute selectors starting with `[` (already Playwright-native)
+      - Full-line comments (lines whose first non-whitespace char is `#`)
+      - Tag-only selectors (e.g. `body`, `input`) — ambiguous with keyword
+        names, intentionally skipped
+
+    Args:
+        robot_code: The Robot Framework source as a string.
+
+    Returns:
+        The same string with bare CSS selectors prefixed. Returns the input
+        unchanged if it contains no `#` or `.` characters at all (fast path).
+    """
+    if not robot_code or ("#" not in robot_code and "." not in robot_code):
+        return robot_code
+
+    rewrote = 0
+    out_lines = []
+    for line in robot_code.split("\n"):
+        if line.lstrip().startswith("#"):
+            # Full-line comment — preserve verbatim.
+            out_lines.append(line)
+            continue
+
+        parts = _CELL_SPLIT_RE.split(line)
+        for i, cell in enumerate(parts):
+            if cell and _BARE_CSS_RE.match(cell):
+                parts[i] = f"css={cell}"
+                rewrote += 1
+        out_lines.append("".join(parts))
+
+    if rewrote:
+        logger.info(f"Locator normalizer: prefixed {rewrote} bare CSS selector(s) with `css=`")
+    return "\n".join(out_lines)
diff --git a/src/backend/crew_ai/tasks.py b/src/backend/crew_ai/tasks.py
index 91897ee..c6ef74c 100644
--- a/src/backend/crew_ai/tasks.py
+++ b/src/backend/crew_ai/tasks.py
@@ -411,6 +411,9 @@ def _get_task_hints(self, role: str) -> str:
     def plan_steps_task(self, agent, query) -> Task:
         # Build prompt using PromptComponents for maintainability
         description = f"""{self._get_task_hints("planner")}
+            CRITICAL: Any code examples in the USER FEEDBACK blocks above are reference context only.
+            Your output MUST be pure JSON — do NOT echo, copy, or reproduce any Robot Framework code from those blocks.
+
             Your mission is to act as an expert Test Automation Planner. You must analyze a user's natural language query and decompose it into a comprehensive, step-by-step test plan that a junior test engineer could follow.
 
             The user query is: "{query}"
diff --git a/src/backend/services/workflow_service.py b/src/backend/services/workflow_service.py
index 9a740de..069f084 100644
--- a/src/backend/services/workflow_service.py
+++ b/src/backend/services/workflow_service.py
@@ -11,6 +11,7 @@
 from datetime import datetime, timezone
 
 from src.backend.crew_ai.crew import run_crew, extract_url_from_query
+from src.backend.crew_ai.robot_code_normalizer import normalize_robot_code
 from src.backend.services.docker_service import get_docker_client, build_image, run_test_in_container
 from src.backend.config.logging_config import EMOJI, bind_workflow_context
 from src.backend.core.observability import create_workflow_span
@@ -379,6 +380,10 @@ def run_agentic_workflow(natural_language_query: str, model_provider: str, model
             robot_code = robot_code.replace('\\r', '\r')
             logging.info("✅ Normalized escaped newlines/tabs to actual characters")
 
+        # Prefix bare CSS selectors (#id, .class) with `css=` so Robot Framework does
+        # not parse them as comments. See robot_code_normalizer for full rationale.
+        robot_code = normalize_robot_code(robot_code)
+
         # Simplified cleaning logic - prompt now handles most cases
         # Keep only essential defensive measures
         
diff --git a/tools/browser_use_service.py b/tools/browser_use_service.py
index 54f47e0..af1c38d 100644
--- a/tools/browser_use_service.py
+++ b/tools/browser_use_service.py
@@ -66,6 +66,21 @@
 if str(_tools_dir) not in sys.path:
     sys.path.insert(0, str(_tools_dir))
 
+# ========================================
+# ENV LOADING — must happen before browser_service import
+# ========================================
+# browser_service/__init__.py configures structlog at import time, so LOG_FORMAT
+# must be in os.environ before that import. Load src/backend/.env here so that
+# running `python tools/browser_use_service.py` in a separate terminal (without
+# sourcing run.sh first) still picks up LOG_FORMAT and other shared env vars.
+try:
+    from dotenv import load_dotenv as _load_dotenv
+    _env_path = _project_root / "src" / "backend" / ".env"
+    if _env_path.exists():
+        _load_dotenv(_env_path, override=False)  # override=False: shell env takes precedence
+except ImportError:
+    pass  # python-dotenv not available — rely on shell environment
+
 # ========================================
 # LOGGING — already configured by browser_service/__init__.py
 # ========================================
@@ -105,9 +120,6 @@
 # 2. Fallback above sets up path (when run directly)
 from src.backend.core.config import settings  # noqa: E402
 
-# Load environment variables
-load_dotenv("src/backend/.env")
-
 # ========================================
 # FLASK APPLICATION
 # ========================================

From 86d7aab9652f852431bb5fe4d13e8fc6d195da16 Mon Sep 17 00:00:00 2001
From: monkscode <1dhruvilvyas@gmail.com>
Date: Fri, 24 Apr 2026 16:06:54 +0530
Subject: [PATCH 2/2] feat: Enhance NLFeedbackEngine scope mapping and add
 locator normalization tests

---
 .../optimization/nl_feedback_engine.py        |  80 +++----
 src/backend/crew_ai/progress_events.py        |   6 +
 src/backend/crew_ai/robot_code_normalizer.py  | 213 ++++++++++++++++--
 .../test_robot_code_normalizer.py             | 166 ++++++++++++++
 .../test_nl_feedback_pipeline.py              |  21 +-
 5 files changed, 407 insertions(+), 79 deletions(-)
 create mode 100644 tests/test_crew_ai/test_robot_code_normalizer.py

diff --git a/src/backend/crew_ai/optimization/nl_feedback_engine.py b/src/backend/crew_ai/optimization/nl_feedback_engine.py
index 3714d88..b087485 100644
--- a/src/backend/crew_ai/optimization/nl_feedback_engine.py
+++ b/src/backend/crew_ai/optimization/nl_feedback_engine.py
@@ -42,6 +42,10 @@
 # Scope auto-determination — maps triage category to hint scope
 # ---------------------------------------------------------------------------
 
+# Scope map covers the full planned triage taxonomy, not just Phase 1.
+# "assertion" is pre-wired for DAY_08's wrong_assertion pattern (not yet implemented).
+# "positive"/"negative" are reserved for future LLM-triage paths that may bypass
+# the empty-text short-circuit. Do not remove — additions happen in Phase 3.
 _SCOPE_BY_CATEGORY = {
     "structural": "domain",
     "keyword": "global",
@@ -204,6 +208,14 @@ class NLFeedbackEngine(LearningEngine):
     _CROSS_REF_BOOST = 0.20
     _MAX_CONFIDENCE = 1.0
 
+    # Scope WHERE clause shared by get_hints and update_hint_effectiveness.
+    # Parameter order is (domain, url).
+    _SCOPE_WHERE = (
+        "scope = 'global' "
+        "OR (scope = 'domain' AND domain = ?) "
+        "OR (scope = 'url' AND url = ?)"
+    )
+
     def __init__(self, db_conn: sqlite3.Connection = None):
         """
         Initialize NLFeedbackEngine.
@@ -272,20 +284,13 @@ def process_feedback(
         best = matches[0]
         pattern_count = len(matches)
 
-        # Calculate confidence
-        confidence = min(
-            self._BASE_CONFIDENCE + (pattern_count * self._PATTERN_BOOST),
-            self._MAX_CONFIDENCE - self._CROSS_REF_BOOST,  # leave room for boost
-        )
-
-        # Cross-reference with error message
+        # Base + per-pattern boost, add cross-ref boost when error confirms, cap once.
+        confidence = self._BASE_CONFIDENCE + (pattern_count * self._PATTERN_BOOST)
         if error_message and self._error_confirms_feedback(
             best["taxonomy"], error_message
         ):
-            confidence = min(
-                confidence + self._CROSS_REF_BOOST,
-                self._MAX_CONFIDENCE,
-            )
+            confidence += self._CROSS_REF_BOOST
+        confidence = min(confidence, self._MAX_CONFIDENCE)
 
         result = {
             "category": best["category"],
@@ -331,7 +336,7 @@ def learn_from_feedback(self, record, feedback_insight) -> None:
 
         category = feedback_insight.get("category", "uncategorized")
         # Skip purely informational triage results (no text correction)
-        if category in ("positive",):
+        if category == "positive":
             return
 
         scope = _SCOPE_BY_CATEGORY.get(category, "domain")
@@ -422,11 +427,7 @@ def get_hints(
                 "       success_count, evidence_count "
                 "FROM nl_feedback_corrections "
                 "WHERE is_active = 1 "
-                "AND ( "
-                "    scope = 'global' "
-                "    OR (scope = 'domain' AND domain = ?) "
-                "    OR (scope = 'url' AND url = ?) "
-                ") "
+                f"AND ({self._SCOPE_WHERE}) "
                 "ORDER BY success_count DESC, "
                 "         evidence_count DESC, "
                 "         last_seen DESC "
@@ -456,37 +457,18 @@ def get_hints(
 
         return hints if hints else None
 
-    # Detects Robot Framework variable syntax anywhere in a line
-    _RF_VAR_RE = re.compile(r'\$\{[A-Za-z_][A-Za-z0-9_ ]*\}')
-
     def _format_feedback_hint(self, feedback_text: str) -> str:
-        # Wrap RF code lines in a labelled code block so the LLM treats them
-        # as reference examples only, not literal output to reproduce.
-        prose_lines: list[str] = []
-        code_lines: list[str] = []
-
-        for line in feedback_text.splitlines():
-            if self._RF_VAR_RE.search(line):
-                code_lines.append(line)
-            else:
-                stripped = line.strip()
-                if stripped:
-                    prose_lines.append(stripped)
-
-        parts: list[str] = []
-        if prose_lines:
-            parts.append(" ".join(prose_lines))
-        if code_lines:
-            parts.append(
-                "WRONG CODE EXAMPLE — reference only, do NOT reproduce in output:\n"
-                "```robot\n"
-                + "\n".join(code_lines)
-                + "\n```"
-            )
+        # Pass feedback through verbatim with a universal warning header.
+        # The header travels with every hint to every agent (Planner, Assembler,
+        # Validator), so the "do not copy literally" guardrail applies even for
+        # agents whose task prompts lack an explicit disclaimer. Preserving the
+        # original line order retains the user's reasoning structure.
+        return (
+            "\u26a0\ufe0f USER FEEDBACK (reference context from a past test — "
+            "do NOT copy any code literally, treat as guidance only):\n"
+            f"{feedback_text.strip()}"
+        )
 
-        body = "\n".join(parts) if parts else feedback_text
-        return f"\u26a0\ufe0f USER FEEDBACK: {body}"
-    
     def update_hint_effectiveness(
         self,
         domain: Optional[str],
@@ -518,11 +500,7 @@ def update_hint_effectiveness(
                 "       applied_count, success_count, failure_count "
                 "FROM nl_feedback_corrections "
                 "WHERE is_active = 1 "
-                "AND ( "
-                "    scope = 'global' "
-                "    OR (scope = 'domain' AND domain = ?) "
-                "    OR (scope = 'url' AND url = ?) "
-                ")",
+                f"AND ({self._SCOPE_WHERE})",
                 (domain, url),
             ).fetchall()
 
diff --git a/src/backend/crew_ai/progress_events.py b/src/backend/crew_ai/progress_events.py
index 0b599c9..8cdb2be 100644
--- a/src/backend/crew_ai/progress_events.py
+++ b/src/backend/crew_ai/progress_events.py
@@ -174,8 +174,14 @@ def _push_if_forward(
     """Push a progress event only if it advances the current progress value.
 
     Thread-safe. Discards the event silently if progress would not increase.
+    Also ignores events for workflows whose queue has been unregistered — this
+    prevents a late async TaskCompletedEvent handler from enqueuing a duplicate
+    100% event after crew.py's manual push + unregister_workflow() have run,
+    and from re-creating the _current_progress entry (memory leak).
     """
     with _lock:
+        if _workflow_queues.get(workflow_id) is not queue:
+            return
         current = _current_progress.get(workflow_id, 0)
         if progress <= current:
             return
diff --git a/src/backend/crew_ai/robot_code_normalizer.py b/src/backend/crew_ai/robot_code_normalizer.py
index f68a46e..2c93cdb 100644
--- a/src/backend/crew_ai/robot_code_normalizer.py
+++ b/src/backend/crew_ai/robot_code_normalizer.py
@@ -1,14 +1,32 @@
 """
 Deterministic post-processor for Robot Framework code emitted by the Code Assembler.
 
-Robot Framework treats `#` as a comment marker wherever it appears at the start of a
-cell (variable value or keyword argument). A bare CSS selector like `#searchBox`
-therefore becomes an empty string at parse time, and tests fail with
-`locator.fill: Unexpected token "" while parsing css selector ""`.
+Two problems motivate this pass:
+
+1. Parse-level (`#`): Robot Framework treats `#` at the start of a cell as a
+   comment marker. A bare CSS selector like `#searchBox` becomes an empty
+   string at parse time, and tests fail with
+   `locator.fill: Unexpected token "" while parsing css selector ""`.
+
+2. Strategy-level (`.`): Locators without an explicit strategy prefix fall
+   back to each library's default strategy. SeleniumLibrary's implicit
+   strategy tries id/name/identifier first and may mis-route `.button`;
+   Browser Library defaults to CSS but gains nothing from ambiguity either.
+   Prefixing with `css=` makes the strategy explicit for both libraries.
 
 This module deterministically prefixes bare CSS selectors (`#id`, `.class`, or combined
-forms like `#id.active`, `.btn.primary`, `#id[attr='v']`) with `css=` so RF parses
-them as values rather than comments.
+forms like `#id.active`, `.btn.primary`, `#id[attr='v']`) with `css=` — but ONLY at
+cell positions that are actually locator arguments. Non-locator data such as tag
+values, assertion values, or the text argument of `Fill Text` is never rewritten.
+
+Scope rules (conservative: no rewrite beats a wrong rewrite):
+    - Keyword calls: rewrite only the arg positions declared in _LOCATOR_KEYWORDS.
+      Unknown keywords cause the line to be left untouched.
+    - Variable declarations (assignment prefix with no recognized keyword): rewrite
+      any bare-CSS cell after the assignment. Matches the common pattern
+      `${SEARCH}    #searchBox` in *** Variables *** sections.
+    - Setting markers ([Tags], [Documentation], ...): always skipped.
+    - Full-line comments and `...` continuations: preserved verbatim.
 
 Runs AFTER the Code Assembler returns. Complements the prompt guidance in
 `library_context/browser_context.py` and `library_context/selenium_context.py` as a
@@ -36,17 +54,115 @@
 # Robot Framework cell boundary: two-or-more spaces, or one-or-more tabs.
 _CELL_SPLIT_RE = re.compile(r"(\s{2,}|\t+)")
 
+# Variable-assignment prefix: a cell that is exactly a scalar/list/dict variable
+# (`${x}`, `@{list}`, `&{dict}`) with an optional trailing `=`. Used to detect
+# lines of the form `${x}=    Keyword    ...` or `${X}    value` in Variables.
+_ASSIGN_PREFIX_RE = re.compile(r"^[$@&]\{[^}]+\}\s*=?\s*$")
+
+# Setting markers. When these are the first cell of a line, the rest of the line
+# holds literal values, never locators — never rewrite.
+_SETTING_MARKERS = frozenset({
+    "[tags]", "[documentation]", "[arguments]", "[setup]", "[teardown]",
+    "[timeout]", "[template]", "[return]",
+})
+
+
+def _canon_keyword(name: str) -> str:
+    """Canonicalize an RF keyword cell: lowercase, single-space between words,
+    library prefix stripped. RF itself is case- and space-insensitive for keyword
+    names, so `Fill Text`, `fill_text`, and `fill  text` all map to `fill text`.
+    """
+    stripped = re.sub(r"[\s_]+", " ", name.strip().lower())
+    # Strip library prefix (e.g., "browser.click" -> "click"). A leading `.` is
+    # a bare CSS selector — not a library prefix — so guard against that.
+    if "." in stripped and not stripped.startswith("."):
+        stripped = stripped.rsplit(".", 1)[-1].strip()
+    return stripped
+
+
+# Keyword -> tuple of 0-indexed argument positions that accept a locator.
+# Any argument at a position NOT listed here (e.g., the text arg at position 1
+# of Fill Text) is left untouched. Keywords not in this dict cause the whole
+# line to skip keyword-path rewriting (see normalize_robot_code for the variable-
+# declaration fallback).
+_LOCATOR_KEYWORDS: dict[str, tuple[int, ...]] = {
+    # Browser Library (Playwright-based)
+    "click": (0,),
+    "click with options": (0,),
+    "fill text": (0,),
+    "fill secret": (0,),
+    "type text": (0,),
+    "type secret": (0,),
+    "check checkbox": (0,),
+    "uncheck checkbox": (0,),
+    "select options by": (0,),
+    "hover": (0,),
+    "focus": (0,),
+    "press keys": (0,),
+    "get text": (0,),
+    "get attribute": (0,),
+    "get property": (0,),
+    "get style": (0,),
+    "get element": (0,),
+    "get elements": (0,),
+    "get element count": (0,),
+    "get element states": (0,),
+    "get select options": (0,),
+    "wait for elements state": (0,),
+    "scroll to element": (0,),
+    "upload file by selector": (0,),
+    # SeleniumLibrary
+    "click element": (0,),
+    "click button": (0,),
+    "click link": (0,),
+    "click image": (0,),
+    "input text": (0,),
+    "input password": (0,),
+    "select checkbox": (0,),
+    "unselect checkbox": (0,),
+    "select from list by label": (0,),
+    "select from list by index": (0,),
+    "select from list by value": (0,),
+    "mouse over": (0,),
+    "mouse out": (0,),
+    "submit form": (0,),
+    "choose file": (0,),
+    "clear element text": (0,),
+    "scroll element into view": (0,),
+    "wait until element is visible": (0,),
+    "wait until element is enabled": (0,),
+    "wait until element is not visible": (0,),
+    "wait until element contains": (0,),
+    "wait until page contains element": (0,),
+    "wait until page does not contain element": (0,),
+    "element should be visible": (0,),
+    "element should be enabled": (0,),
+    "element should be disabled": (0,),
+    "element should contain": (0,),
+    "element should not contain": (0,),
+    "element should not be visible": (0,),
+    "get webelement": (0,),
+    "get webelements": (0,),
+    "get element attribute": (0,),
+    # Two-locator keywords
+    "drag and drop": (0, 1),
+}
+
 
 def normalize_robot_code(robot_code: str) -> str:
     """
-    Prefix bare CSS selectors in a Robot Framework file with `css=`.
+    Prefix bare CSS selectors in Robot Framework code with `css=`.
 
-    Only rewrites cells that look like bare CSS selectors. Leaves untouched:
-      - Already-prefixed locators (css=, xpath=, id=, name=, text=, role=, ...)
-      - Attribute selectors starting with `[` (already Playwright-native)
-      - Full-line comments (lines whose first non-whitespace char is `#`)
-      - Tag-only selectors (e.g. `body`, `input`) — ambiguous with keyword
-        names, intentionally skipped
+    Rewrites only cells that are actually locator arguments:
+      - Locator args of known keywords (per _LOCATOR_KEYWORDS)
+      - Values of variable declarations (`${X}    #foo` and `${x}=    <unknown-kw>    #foo`)
+
+    Leaves untouched:
+      - Non-locator args (e.g., the text arg of `Fill Text`)
+      - Setting marker lines (`[Tags]`, `[Documentation]`, ...)
+      - Assertion/log values (`Should Be Equal    ${v}    .NET`)
+      - Full-line comments and `...` continuations
+      - Custom/unknown keywords (conservative — no rewrite)
 
     Args:
         robot_code: The Robot Framework source as a string.
@@ -61,17 +177,74 @@ def normalize_robot_code(robot_code: str) -> str:
     rewrote = 0
     out_lines = []
     for line in robot_code.split("\n"):
-        if line.lstrip().startswith("#"):
-            # Full-line comment — preserve verbatim.
+        stripped = line.lstrip()
+
+        # Full-line comment or continuation — preserve verbatim.
+        if stripped.startswith("#") or stripped.startswith("..."):
             out_lines.append(line)
             continue
 
         parts = _CELL_SPLIT_RE.split(line)
-        for i, cell in enumerate(parts):
-            if cell and _BARE_CSS_RE.match(cell):
-                parts[i] = f"css={cell}"
-                rewrote += 1
-        out_lines.append("".join(parts))
+        # Indices of content cells (non-empty, non-separator).
+        cell_positions = [
+            i for i, p in enumerate(parts)
+            if p and not _CELL_SPLIT_RE.fullmatch(p)
+        ]
+        if not cell_positions:
+            out_lines.append(line)
+            continue
+
+        first_cell = parts[cell_positions[0]]
+
+        # Setting marker lines never contain locators.
+        if first_cell.lower() in _SETTING_MARKERS:
+            out_lines.append(line)
+            continue
+
+        # Walk past leading assignment cells to find the keyword cell.
+        keyword_pos_in_cells = 0
+        for k, ci in enumerate(cell_positions):
+            if _ASSIGN_PREFIX_RE.match(parts[ci]):
+                keyword_pos_in_cells = k + 1
+                continue
+            break
+
+        # --- Case A: recognized keyword call ---
+        if keyword_pos_in_cells < len(cell_positions):
+            keyword_cell = parts[cell_positions[keyword_pos_in_cells]]
+            locator_arg_positions = _LOCATOR_KEYWORDS.get(_canon_keyword(keyword_cell))
+            if locator_arg_positions is not None:
+                arg_cells_start = keyword_pos_in_cells + 1
+                for offset in locator_arg_positions:
+                    arg_pos = arg_cells_start + offset
+                    if arg_pos >= len(cell_positions):
+                        continue
+                    target_idx = cell_positions[arg_pos]
+                    cell = parts[target_idx]
+                    if _BARE_CSS_RE.match(cell):
+                        parts[target_idx] = f"css={cell}"
+                        rewrote += 1
+                out_lines.append("".join(parts))
+                continue
+
+        # --- Case B: variable declaration (assignment prefix, no known keyword) ---
+        # Common in *** Variables *** sections: `${SEARCH}    #searchBox`.
+        # Also covers `${x}=    Set Variable    #foo` (Set Variable isn't in the
+        # allowlist, but the value being stored is a locator).
+        if keyword_pos_in_cells > 0:
+            for ci in cell_positions[keyword_pos_in_cells:]:
+                cell = parts[ci]
+                if _BARE_CSS_RE.match(cell):
+                    parts[ci] = f"css={cell}"
+                    rewrote += 1
+            out_lines.append("".join(parts))
+            continue
+
+        # --- Case C: unrecognized line (custom keyword or section header) ---
+        # Conservative: leave untouched. A custom user-defined keyword whose
+        # first arg is a locator will miss rewriting here, but that's safer
+        # than corrupting legitimate non-locator data.
+        out_lines.append(line)
 
     if rewrote:
         logger.info(f"Locator normalizer: prefixed {rewrote} bare CSS selector(s) with `css=`")
diff --git a/tests/test_crew_ai/test_robot_code_normalizer.py b/tests/test_crew_ai/test_robot_code_normalizer.py
new file mode 100644
index 0000000..54c8ff6
--- /dev/null
+++ b/tests/test_crew_ai/test_robot_code_normalizer.py
@@ -0,0 +1,166 @@
+"""Tests for src.backend.crew_ai.robot_code_normalizer.
+
+Covers the false-positive regressions the allowlist was designed to fix, plus
+the legitimate rewrite cases it must keep working.
+"""
+
+import pytest
+
+from src.backend.crew_ai.robot_code_normalizer import normalize_robot_code
+
+
+@pytest.mark.parametrize(
+    "label, source, expected",
+    [
+        # --- False positives that the old global-rewrite version corrupted ---
+        (
+            "Fill Text: #release is the typed text, not a locator",
+            "    Fill Text    ${message_locator}    #release",
+            "    Fill Text    ${message_locator}    #release",
+        ),
+        (
+            "[Tags] takes literal tag values",
+            "    [Tags]    #smoke",
+            "    [Tags]    #smoke",
+        ),
+        (
+            "Should Be Equal: .NET is an assertion value",
+            "    Should Be Equal    ${value}    .NET",
+            "    Should Be Equal    ${value}    .NET",
+        ),
+        (
+            "Log with a hashtag literal — unknown-to-allowlist keyword stays untouched",
+            "    Log    #hashtag",
+            "    Log    #hashtag",
+        ),
+        (
+            "Custom user-defined keyword — conservative skip",
+            "    My Custom Keyword    #foo",
+            "    My Custom Keyword    #foo",
+        ),
+        # --- Legitimate rewrites (Browser Library) ---
+        (
+            "Click with bare id",
+            "    Click    #searchBox",
+            "    Click    css=#searchBox",
+        ),
+        (
+            "Click with bare class",
+            "    Click    .btn-primary",
+            "    Click    css=.btn-primary",
+        ),
+        (
+            "Fill Text rewrites position 0 only",
+            "    Fill Text    #email    user@example.com",
+            "    Fill Text    css=#email    user@example.com",
+        ),
+        (
+            "Drag And Drop rewrites both locator args",
+            "    Drag And Drop    #src    #dst",
+            "    Drag And Drop    css=#src    css=#dst",
+        ),
+        (
+            "Assignment prefix + known keyword",
+            "    ${x}=    Click    #foo",
+            "    ${x}=    Click    css=#foo",
+        ),
+        # --- Legitimate rewrites (SeleniumLibrary) ---
+        (
+            "Click Element rewrites bare id",
+            "    Click Element    #submit",
+            "    Click Element    css=#submit",
+        ),
+        (
+            "Input Text locator + literal text",
+            "    Input Text    #q    search term",
+            "    Input Text    css=#q    search term",
+        ),
+        (
+            "Element Should Be Visible with bare class",
+            "    Element Should Be Visible    .result-row",
+            "    Element Should Be Visible    css=.result-row",
+        ),
+        # --- Variable declarations ---
+        (
+            "*** Variables *** bare id value",
+            "${SEARCH}    #searchBox",
+            "${SEARCH}    css=#searchBox",
+        ),
+        (
+            "*** Variables *** bare class value",
+            "${BTN}    .btn-submit",
+            "${BTN}    css=.btn-submit",
+        ),
+        (
+            "${x}= Set Variable stores a bare locator value",
+            "    ${x}=    Set Variable    #foo",
+            "    ${x}=    Set Variable    css=#foo",
+        ),
+        # --- Canonicalization ---
+        (
+            "Case-insensitive keyword lookup",
+            "    click    #foo",
+            "    click    css=#foo",
+        ),
+        (
+            "Underscore-separated keyword name",
+            "    fill_text    #x    y",
+            "    fill_text    css=#x    y",
+        ),
+        # --- No-op edges ---
+        (
+            "Already-prefixed locator — no double-prefix",
+            "    Click    css=#searchBox",
+            "    Click    css=#searchBox",
+        ),
+        (
+            "XPath locator — untouched",
+            "    Click    //button[@id='x']",
+            "    Click    //button[@id='x']",
+        ),
+        (
+            "Full-line comment preserved verbatim",
+            "# This is a comment about #searchBox",
+            "# This is a comment about #searchBox",
+        ),
+        (
+            "Line continuation marker preserved",
+            "    ...    #continuation-text",
+            "    ...    #continuation-text",
+        ),
+        (
+            "Section header untouched",
+            "*** Test Cases ***",
+            "*** Test Cases ***",
+        ),
+        (
+            "Empty string",
+            "",
+            "",
+        ),
+        (
+            "Tab-separated cells",
+            "\tClick\t#foo",
+            "\tClick\tcss=#foo",
+        ),
+        # --- Multi-line integrity ---
+        (
+            "Multi-line input preserves non-locator lines",
+            "*** Test Cases ***\nMy Test\n    Click    #x\n    Log    done",
+            "*** Test Cases ***\nMy Test\n    Click    css=#x\n    Log    done",
+        ),
+    ],
+)
+def test_normalize_robot_code(label, source, expected):
+    assert normalize_robot_code(source) == expected, label
+
+
+def test_fast_path_returns_input_when_no_selector_chars():
+    """Strings without `#` or `.` must short-circuit and return the same object."""
+    src = "    Click    ${BTN}\n    Log    hello"
+    assert normalize_robot_code(src) is src
+
+
+def test_none_and_empty():
+    assert normalize_robot_code("") == ""
+    assert normalize_robot_code(None) is None
diff --git a/tests/test_optimization/test_nl_feedback_pipeline.py b/tests/test_optimization/test_nl_feedback_pipeline.py
index aba155d..6ebf8eb 100644
--- a/tests/test_optimization/test_nl_feedback_pipeline.py
+++ b/tests/test_optimization/test_nl_feedback_pipeline.py
@@ -372,7 +372,8 @@ def test_hints_formatted_with_prefix(self, in_memory_db):
         engine = NLFeedbackEngine(in_memory_db)
         hints = engine.get_hints("click", "https://x.com", "assembler")
         assert hints is not None
-        assert hints[0].startswith("\u26a0\ufe0f USER FEEDBACK:")
+        assert hints[0].startswith("\u26a0\ufe0f USER FEEDBACK")
+        assert "Test prefix" in hints[0]
 
 
 # ===================================================================
@@ -565,13 +566,17 @@ class TestScopeMapping:
     """_SCOPE_BY_CATEGORY mapping completeness."""
 
     def test_scope_mapping_completeness(self):
-        """All expected categories should have a mapping."""
-        expected = {"structural", "keyword", "locator", "anti_pattern", "validation"}
-        mapped = set(_SCOPE_BY_CATEGORY.keys())
-        for cat in expected:
-            assert cat in mapped or True, (
-                f"Category '{cat}' has no explicit scope mapping (will default to 'domain')"
-            )
+        """Scope map must cover every category any code path can emit.
+
+        Covers Phase 1 live categories plus pre-wired entries for Phase 3
+        (assertion, positive, negative). Removing any of these would silently
+        change scope when the corresponding patterns land.
+        """
+        expected = {
+            "structural", "keyword", "locator", "timing", "data",
+            "assertion", "uncategorized", "positive", "negative",
+        }
+        assert set(_SCOPE_BY_CATEGORY.keys()) == expected
 
     def test_scope_default_domain(self):
         """Unknown categories should default to 'domain'."""