From 0678950ebeb05b88225368cf71b2aeb5039ced57 Mon Sep 17 00:00:00 2001 From: monkscode <1dhruvilvyas@gmail.com> Date: Thu, 23 Apr 2026 16:43:13 +0530 Subject: [PATCH 1/2] feat: Update environment configuration and enhance logging for local development; normalize Robot Framework CSS selectors --- .env.example | 22 ++---- run.sh | 18 +++++ src/backend/.env.example | 7 ++ src/backend/config/logging_config.py | 2 +- src/backend/crew_ai/crew.py | 10 +++ .../library_context/browser_context.py | 7 +- .../optimization/nl_feedback_engine.py | 33 +++++++- src/backend/crew_ai/robot_code_normalizer.py | 78 +++++++++++++++++++ src/backend/crew_ai/tasks.py | 3 + src/backend/services/workflow_service.py | 5 ++ tools/browser_use_service.py | 18 ++++- 11 files changed, 182 insertions(+), 21 deletions(-) create mode 100644 src/backend/crew_ai/robot_code_normalizer.py diff --git a/.env.example b/.env.example index fce6b4f..d6c1c15 100644 --- a/.env.example +++ b/.env.example @@ -7,21 +7,15 @@ # - fastapi-latest / browser-service-latest (stable from main) # - fastapi-develop / browser-service-develop (latest from develop) # - fastapi-1001 / browser-service-1001 (specific version) -FASTAPI_IMAGE_TAG=monkscode/nlrf:fastapi-pr-50 -BROWSER_SERVICE_IMAGE_TAG=monkscode/nlrf:browser-service-pr-50 -TEST_RUNNER_IMAGE_TAG=monkscode/nlrf:test-runner-pr-50 +FASTAPI_IMAGE_TAG=monkscode/nlrf:fastapi-local +BROWSER_SERVICE_IMAGE_TAG=monkscode/nlrf:browser-service-local +TEST_RUNNER_IMAGE_TAG=monkscode/nlrf:test-runner-local -# --- Docker-in-Docker Configuration --- -# CRITICAL: Set this to the host machine's absolute path to robot_tests directory -# This is required for test execution in spawned Docker containers -# -# Linux/Mac: Use $(pwd)/robot_tests -# Windows: Use ${PWD}/robot_tests or full path like C:/Users/YourName/project/robot_tests -# -# Example values: -# Linux/Mac: /home/user/Natural-Language-to-Robot-Framework/robot_tests -# Windows: C:/Users//Documents/GitHub/Natural-Language-to-Robot-Framework/robot_tests -HOST_ROBOT_TESTS_DIR=./robot_tests +# --- Docker-in-Docker bind mount --- +# HOST_ROBOT_TESTS_DIR is auto-set by docker-compose.yml to ${PWD}/robot_tests +# and by run.sh to the absolute script directory. You normally do not need to set +# it here. Override only if you run FastAPI outside compose (e.g., `docker run`) +# and need a custom absolute host path for test-runner bind mounts. # Maximum time in seconds to wait for a test container to finish execution # Default: 1800 (30 minutes). Increase for very long test suites. diff --git a/run.sh b/run.sh index 270cee6..1c4d7b7 100755 --- a/run.sh +++ b/run.sh @@ -8,6 +8,10 @@ export PYTHONUTF8=1 # This uses pure Python protobuf parsing (slower but compatible) export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python +# Add Docker Desktop binaries to PATH so docker-credential-desktop is resolvable +# by the Python docker SDK when authenticating against Docker Hub on Windows. +export PATH="/c/Program Files/Docker/Docker/resources/bin:$PATH" + # Check for .env file if [ ! -f "src/backend/.env" ]; then echo "Error: src/backend/.env file not found." @@ -20,11 +24,25 @@ set -a source src/backend/.env set +a +# --- Local-dev overrides (run.sh process only, never containers) --- +# These override values from src/backend/.env for processes launched by this script. +# Docker Compose reads src/backend/.env directly and does NOT source run.sh, so the +# on-disk value (BROWSER_HEADLESS=true) remains authoritative for containers. +export BROWSER_HEADLESS=false +export LOG_FORMAT=console # human-readable colored logs +# export CREWAI_VERBOSE=true # show agent reasoning in console + # Support both APP_PORT (new) and PORT (legacy) variables with a sane default APP_PORT="${APP_PORT:-${PORT:-5000}}" export APP_PORT export PORT="$APP_PORT" +# When running locally (not inside Docker Compose), HOST_ROBOT_TESTS_DIR must be +# an absolute Windows path so the Docker daemon can resolve the bind mount. +# The .env value (./robot_tests) is relative and only valid inside Docker Compose. +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -W 2>/dev/null || pwd)" +export HOST_ROBOT_TESTS_DIR="${SCRIPT_DIR}/robot_tests" + # Cross-platform venv activation and path handling VENV_DIR="venv" if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "cygwin" || "$OSTYPE" == "win32" ]]; then diff --git a/src/backend/.env.example b/src/backend/.env.example index 434e7e5..5938b0b 100644 --- a/src/backend/.env.example +++ b/src/backend/.env.example @@ -150,6 +150,13 @@ OPTIMIZATION_CONTEXT_PRUNING_ENABLED=true # Default: 0.6 OPTIMIZATION_CONTEXT_PRUNING_THRESHOLD=0.6 +# --- Log Format --- +# Controls log output format for both FastAPI and BrowserUse services. +# Options: +# console — human-readable colored output (recommended for local development) +# (unset) — JSON structured output (recommended for production/log aggregation) +LOG_FORMAT= + # --- LLM Observability --- # Controls where OTel traces are exported. Options: # sqlite — Built-in SQLite trace store at data/llm_traces.db (zero infra, default) diff --git a/src/backend/config/logging_config.py b/src/backend/config/logging_config.py index 495dffb..4b24a27 100644 --- a/src/backend/config/logging_config.py +++ b/src/backend/config/logging_config.py @@ -67,7 +67,7 @@ def setup_logging(log_dir: str = "logs", log_level: str = "INFO") -> None: ) renderer = ( - structlog.dev.ConsoleRenderer() + structlog.dev.ConsoleRenderer(colors=sys.stdout.isatty()) if os.environ.get("LOG_FORMAT", "").lower() == "console" else structlog.processors.JSONRenderer() ) diff --git a/src/backend/crew_ai/crew.py b/src/backend/crew_ai/crew.py index 1b3cb1c..75be55a 100644 --- a/src/backend/crew_ai/crew.py +++ b/src/backend/crew_ai/crew.py @@ -416,6 +416,16 @@ def run_crew(query: str, model_provider: str, model_name: str, library_type: str if optimization_metrics: logger.info("📊 Optimization metrics collected") + # CrewAI's event bus dispatches sync handlers via ThreadPoolExecutor (fire-and-forget). + # TaskCompletedEvent for task 3 is submitted to the pool but may execute AFTER + # kickoff() returns and unregister_workflow() clears the routing maps — causing + # _resolve() to return None and silently drop the 100% event. + # Pushing 100% here (success path only) guarantees delivery before unregistration. + # _push_if_forward deduplicates: if the thread pool wins the race, this is a no-op. + if progress_queue is not None: + from src.backend.crew_ai.progress_events import _push_if_forward + _push_if_forward(workflow_id, progress_queue, "🎉 Test generation complete", 100) + return result, crew, optimization_metrics, hint_metadata, agents.llm._monitor except Exception as e: diff --git a/src/backend/crew_ai/library_context/browser_context.py b/src/backend/crew_ai/library_context/browser_context.py index 8798bed..1399d4e 100644 --- a/src/backend/crew_ai/library_context/browser_context.py +++ b/src/backend/crew_ai/library_context/browser_context.py @@ -87,7 +87,9 @@ def core_rules(self) -> str: text > role > data-testid > id > css > xpath - text= → Most stable - role=[name=""] → Accessibility-first - - CSS selectors need no prefix + - css= → Always prefix CSS selectors with `css=` (e.g. `css=#searchBox`, `css=.btn`). + A bare `#` at the start of a variable value or argument is parsed as a Robot Framework + comment and the locator becomes empty at runtime. 6. **COMMON PITFALLS:** ❌ Missing viewport config → Elements not found @@ -205,7 +207,8 @@ def code_assembly_context(self) -> str: 2. MUST include "New Context viewport=None" for proper element detection 3. Browser Library uses 'browser' and 'headless' parameters (NOT 'options') 4. Browser Library auto-waits, so explicit waits are rarely needed -5. Locators can be CSS selectors without prefix +5. Always prefix CSS selectors with `css=` (e.g. `css=#searchBox`, `css=.btn`) — a bare `#` at the + start of a variable value is parsed as a Robot Framework comment and the locator becomes empty 6. Text and role selectors are preferred for stability **KEYWORD REFERENCE:** diff --git a/src/backend/crew_ai/optimization/nl_feedback_engine.py b/src/backend/crew_ai/optimization/nl_feedback_engine.py index a49b78d..3714d88 100644 --- a/src/backend/crew_ai/optimization/nl_feedback_engine.py +++ b/src/backend/crew_ai/optimization/nl_feedback_engine.py @@ -450,12 +450,43 @@ def get_hints( # Format as hint strings (cap at 5) hints = [ - f"\u26a0\ufe0f USER FEEDBACK: {h['feedback_text']}" + self._format_feedback_hint(h['feedback_text']) for h in deduped[:5] ] return hints if hints else None + # Detects Robot Framework variable syntax anywhere in a line + _RF_VAR_RE = re.compile(r'\$\{[A-Za-z_][A-Za-z0-9_ ]*\}') + + def _format_feedback_hint(self, feedback_text: str) -> str: + # Wrap RF code lines in a labelled code block so the LLM treats them + # as reference examples only, not literal output to reproduce. + prose_lines: list[str] = [] + code_lines: list[str] = [] + + for line in feedback_text.splitlines(): + if self._RF_VAR_RE.search(line): + code_lines.append(line) + else: + stripped = line.strip() + if stripped: + prose_lines.append(stripped) + + parts: list[str] = [] + if prose_lines: + parts.append(" ".join(prose_lines)) + if code_lines: + parts.append( + "WRONG CODE EXAMPLE — reference only, do NOT reproduce in output:\n" + "```robot\n" + + "\n".join(code_lines) + + "\n```" + ) + + body = "\n".join(parts) if parts else feedback_text + return f"\u26a0\ufe0f USER FEEDBACK: {body}" + def update_hint_effectiveness( self, domain: Optional[str], diff --git a/src/backend/crew_ai/robot_code_normalizer.py b/src/backend/crew_ai/robot_code_normalizer.py new file mode 100644 index 0000000..f68a46e --- /dev/null +++ b/src/backend/crew_ai/robot_code_normalizer.py @@ -0,0 +1,78 @@ +""" +Deterministic post-processor for Robot Framework code emitted by the Code Assembler. + +Robot Framework treats `#` as a comment marker wherever it appears at the start of a +cell (variable value or keyword argument). A bare CSS selector like `#searchBox` +therefore becomes an empty string at parse time, and tests fail with +`locator.fill: Unexpected token "" while parsing css selector ""`. + +This module deterministically prefixes bare CSS selectors (`#id`, `.class`, or combined +forms like `#id.active`, `.btn.primary`, `#id[attr='v']`) with `css=` so RF parses +them as values rather than comments. + +Runs AFTER the Code Assembler returns. Complements the prompt guidance in +`library_context/browser_context.py` and `library_context/selenium_context.py` as a +belt-and-suspenders guarantee, since the LLM occasionally ignores prompt rules. + +Referenced by: + src.backend.services.workflow_service._run_crew_thread (after tasks[2] extraction) +""" + +import logging +import re + +logger = logging.getLogger(__name__) + + +# A cell qualifies as a bare CSS selector when it starts with `#` or `.` followed +# by a CSS identifier character. Anchoring on the start is enough: a cell can +# legitimately contain CSS combinators with single spaces (e.g. `#id > .child`) +# because RF cell boundaries require 2+ spaces or a tab, so single spaces live +# inside one cell. Enumerating all valid CSS syntax in a regex would be brittle; +# start-of-cell anchoring avoids that and still rejects comments like `# note` +# (space after `#` fails the second-char test). +_BARE_CSS_RE = re.compile(r"^[#.][A-Za-z_]") + +# Robot Framework cell boundary: two-or-more spaces, or one-or-more tabs. +_CELL_SPLIT_RE = re.compile(r"(\s{2,}|\t+)") + + +def normalize_robot_code(robot_code: str) -> str: + """ + Prefix bare CSS selectors in a Robot Framework file with `css=`. + + Only rewrites cells that look like bare CSS selectors. Leaves untouched: + - Already-prefixed locators (css=, xpath=, id=, name=, text=, role=, ...) + - Attribute selectors starting with `[` (already Playwright-native) + - Full-line comments (lines whose first non-whitespace char is `#`) + - Tag-only selectors (e.g. `body`, `input`) — ambiguous with keyword + names, intentionally skipped + + Args: + robot_code: The Robot Framework source as a string. + + Returns: + The same string with bare CSS selectors prefixed. Returns the input + unchanged if it contains no `#` or `.` characters at all (fast path). + """ + if not robot_code or ("#" not in robot_code and "." not in robot_code): + return robot_code + + rewrote = 0 + out_lines = [] + for line in robot_code.split("\n"): + if line.lstrip().startswith("#"): + # Full-line comment — preserve verbatim. + out_lines.append(line) + continue + + parts = _CELL_SPLIT_RE.split(line) + for i, cell in enumerate(parts): + if cell and _BARE_CSS_RE.match(cell): + parts[i] = f"css={cell}" + rewrote += 1 + out_lines.append("".join(parts)) + + if rewrote: + logger.info(f"Locator normalizer: prefixed {rewrote} bare CSS selector(s) with `css=`") + return "\n".join(out_lines) diff --git a/src/backend/crew_ai/tasks.py b/src/backend/crew_ai/tasks.py index 91897ee..c6ef74c 100644 --- a/src/backend/crew_ai/tasks.py +++ b/src/backend/crew_ai/tasks.py @@ -411,6 +411,9 @@ def _get_task_hints(self, role: str) -> str: def plan_steps_task(self, agent, query) -> Task: # Build prompt using PromptComponents for maintainability description = f"""{self._get_task_hints("planner")} + CRITICAL: Any code examples in the USER FEEDBACK blocks above are reference context only. + Your output MUST be pure JSON — do NOT echo, copy, or reproduce any Robot Framework code from those blocks. + Your mission is to act as an expert Test Automation Planner. You must analyze a user's natural language query and decompose it into a comprehensive, step-by-step test plan that a junior test engineer could follow. The user query is: "{query}" diff --git a/src/backend/services/workflow_service.py b/src/backend/services/workflow_service.py index 9a740de..069f084 100644 --- a/src/backend/services/workflow_service.py +++ b/src/backend/services/workflow_service.py @@ -11,6 +11,7 @@ from datetime import datetime, timezone from src.backend.crew_ai.crew import run_crew, extract_url_from_query +from src.backend.crew_ai.robot_code_normalizer import normalize_robot_code from src.backend.services.docker_service import get_docker_client, build_image, run_test_in_container from src.backend.config.logging_config import EMOJI, bind_workflow_context from src.backend.core.observability import create_workflow_span @@ -379,6 +380,10 @@ def run_agentic_workflow(natural_language_query: str, model_provider: str, model robot_code = robot_code.replace('\\r', '\r') logging.info("✅ Normalized escaped newlines/tabs to actual characters") + # Prefix bare CSS selectors (#id, .class) with `css=` so Robot Framework does + # not parse them as comments. See robot_code_normalizer for full rationale. + robot_code = normalize_robot_code(robot_code) + # Simplified cleaning logic - prompt now handles most cases # Keep only essential defensive measures diff --git a/tools/browser_use_service.py b/tools/browser_use_service.py index 54f47e0..af1c38d 100644 --- a/tools/browser_use_service.py +++ b/tools/browser_use_service.py @@ -66,6 +66,21 @@ if str(_tools_dir) not in sys.path: sys.path.insert(0, str(_tools_dir)) +# ======================================== +# ENV LOADING — must happen before browser_service import +# ======================================== +# browser_service/__init__.py configures structlog at import time, so LOG_FORMAT +# must be in os.environ before that import. Load src/backend/.env here so that +# running `python tools/browser_use_service.py` in a separate terminal (without +# sourcing run.sh first) still picks up LOG_FORMAT and other shared env vars. +try: + from dotenv import load_dotenv as _load_dotenv + _env_path = _project_root / "src" / "backend" / ".env" + if _env_path.exists(): + _load_dotenv(_env_path, override=False) # override=False: shell env takes precedence +except ImportError: + pass # python-dotenv not available — rely on shell environment + # ======================================== # LOGGING — already configured by browser_service/__init__.py # ======================================== @@ -105,9 +120,6 @@ # 2. Fallback above sets up path (when run directly) from src.backend.core.config import settings # noqa: E402 -# Load environment variables -load_dotenv("src/backend/.env") - # ======================================== # FLASK APPLICATION # ======================================== From 86d7aab9652f852431bb5fe4d13e8fc6d195da16 Mon Sep 17 00:00:00 2001 From: monkscode <1dhruvilvyas@gmail.com> Date: Fri, 24 Apr 2026 16:06:54 +0530 Subject: [PATCH 2/2] feat: Enhance NLFeedbackEngine scope mapping and add locator normalization tests --- .../optimization/nl_feedback_engine.py | 80 +++---- src/backend/crew_ai/progress_events.py | 6 + src/backend/crew_ai/robot_code_normalizer.py | 213 ++++++++++++++++-- .../test_robot_code_normalizer.py | 166 ++++++++++++++ .../test_nl_feedback_pipeline.py | 21 +- 5 files changed, 407 insertions(+), 79 deletions(-) create mode 100644 tests/test_crew_ai/test_robot_code_normalizer.py diff --git a/src/backend/crew_ai/optimization/nl_feedback_engine.py b/src/backend/crew_ai/optimization/nl_feedback_engine.py index 3714d88..b087485 100644 --- a/src/backend/crew_ai/optimization/nl_feedback_engine.py +++ b/src/backend/crew_ai/optimization/nl_feedback_engine.py @@ -42,6 +42,10 @@ # Scope auto-determination — maps triage category to hint scope # --------------------------------------------------------------------------- +# Scope map covers the full planned triage taxonomy, not just Phase 1. +# "assertion" is pre-wired for DAY_08's wrong_assertion pattern (not yet implemented). +# "positive"/"negative" are reserved for future LLM-triage paths that may bypass +# the empty-text short-circuit. Do not remove — additions happen in Phase 3. _SCOPE_BY_CATEGORY = { "structural": "domain", "keyword": "global", @@ -204,6 +208,14 @@ class NLFeedbackEngine(LearningEngine): _CROSS_REF_BOOST = 0.20 _MAX_CONFIDENCE = 1.0 + # Scope WHERE clause shared by get_hints and update_hint_effectiveness. + # Parameter order is (domain, url). + _SCOPE_WHERE = ( + "scope = 'global' " + "OR (scope = 'domain' AND domain = ?) " + "OR (scope = 'url' AND url = ?)" + ) + def __init__(self, db_conn: sqlite3.Connection = None): """ Initialize NLFeedbackEngine. @@ -272,20 +284,13 @@ def process_feedback( best = matches[0] pattern_count = len(matches) - # Calculate confidence - confidence = min( - self._BASE_CONFIDENCE + (pattern_count * self._PATTERN_BOOST), - self._MAX_CONFIDENCE - self._CROSS_REF_BOOST, # leave room for boost - ) - - # Cross-reference with error message + # Base + per-pattern boost, add cross-ref boost when error confirms, cap once. + confidence = self._BASE_CONFIDENCE + (pattern_count * self._PATTERN_BOOST) if error_message and self._error_confirms_feedback( best["taxonomy"], error_message ): - confidence = min( - confidence + self._CROSS_REF_BOOST, - self._MAX_CONFIDENCE, - ) + confidence += self._CROSS_REF_BOOST + confidence = min(confidence, self._MAX_CONFIDENCE) result = { "category": best["category"], @@ -331,7 +336,7 @@ def learn_from_feedback(self, record, feedback_insight) -> None: category = feedback_insight.get("category", "uncategorized") # Skip purely informational triage results (no text correction) - if category in ("positive",): + if category == "positive": return scope = _SCOPE_BY_CATEGORY.get(category, "domain") @@ -422,11 +427,7 @@ def get_hints( " success_count, evidence_count " "FROM nl_feedback_corrections " "WHERE is_active = 1 " - "AND ( " - " scope = 'global' " - " OR (scope = 'domain' AND domain = ?) " - " OR (scope = 'url' AND url = ?) " - ") " + f"AND ({self._SCOPE_WHERE}) " "ORDER BY success_count DESC, " " evidence_count DESC, " " last_seen DESC " @@ -456,37 +457,18 @@ def get_hints( return hints if hints else None - # Detects Robot Framework variable syntax anywhere in a line - _RF_VAR_RE = re.compile(r'\$\{[A-Za-z_][A-Za-z0-9_ ]*\}') - def _format_feedback_hint(self, feedback_text: str) -> str: - # Wrap RF code lines in a labelled code block so the LLM treats them - # as reference examples only, not literal output to reproduce. - prose_lines: list[str] = [] - code_lines: list[str] = [] - - for line in feedback_text.splitlines(): - if self._RF_VAR_RE.search(line): - code_lines.append(line) - else: - stripped = line.strip() - if stripped: - prose_lines.append(stripped) - - parts: list[str] = [] - if prose_lines: - parts.append(" ".join(prose_lines)) - if code_lines: - parts.append( - "WRONG CODE EXAMPLE — reference only, do NOT reproduce in output:\n" - "```robot\n" - + "\n".join(code_lines) - + "\n```" - ) + # Pass feedback through verbatim with a universal warning header. + # The header travels with every hint to every agent (Planner, Assembler, + # Validator), so the "do not copy literally" guardrail applies even for + # agents whose task prompts lack an explicit disclaimer. Preserving the + # original line order retains the user's reasoning structure. + return ( + "\u26a0\ufe0f USER FEEDBACK (reference context from a past test — " + "do NOT copy any code literally, treat as guidance only):\n" + f"{feedback_text.strip()}" + ) - body = "\n".join(parts) if parts else feedback_text - return f"\u26a0\ufe0f USER FEEDBACK: {body}" - def update_hint_effectiveness( self, domain: Optional[str], @@ -518,11 +500,7 @@ def update_hint_effectiveness( " applied_count, success_count, failure_count " "FROM nl_feedback_corrections " "WHERE is_active = 1 " - "AND ( " - " scope = 'global' " - " OR (scope = 'domain' AND domain = ?) " - " OR (scope = 'url' AND url = ?) " - ")", + f"AND ({self._SCOPE_WHERE})", (domain, url), ).fetchall() diff --git a/src/backend/crew_ai/progress_events.py b/src/backend/crew_ai/progress_events.py index 0b599c9..8cdb2be 100644 --- a/src/backend/crew_ai/progress_events.py +++ b/src/backend/crew_ai/progress_events.py @@ -174,8 +174,14 @@ def _push_if_forward( """Push a progress event only if it advances the current progress value. Thread-safe. Discards the event silently if progress would not increase. + Also ignores events for workflows whose queue has been unregistered — this + prevents a late async TaskCompletedEvent handler from enqueuing a duplicate + 100% event after crew.py's manual push + unregister_workflow() have run, + and from re-creating the _current_progress entry (memory leak). """ with _lock: + if _workflow_queues.get(workflow_id) is not queue: + return current = _current_progress.get(workflow_id, 0) if progress <= current: return diff --git a/src/backend/crew_ai/robot_code_normalizer.py b/src/backend/crew_ai/robot_code_normalizer.py index f68a46e..2c93cdb 100644 --- a/src/backend/crew_ai/robot_code_normalizer.py +++ b/src/backend/crew_ai/robot_code_normalizer.py @@ -1,14 +1,32 @@ """ Deterministic post-processor for Robot Framework code emitted by the Code Assembler. -Robot Framework treats `#` as a comment marker wherever it appears at the start of a -cell (variable value or keyword argument). A bare CSS selector like `#searchBox` -therefore becomes an empty string at parse time, and tests fail with -`locator.fill: Unexpected token "" while parsing css selector ""`. +Two problems motivate this pass: + +1. Parse-level (`#`): Robot Framework treats `#` at the start of a cell as a + comment marker. A bare CSS selector like `#searchBox` becomes an empty + string at parse time, and tests fail with + `locator.fill: Unexpected token "" while parsing css selector ""`. + +2. Strategy-level (`.`): Locators without an explicit strategy prefix fall + back to each library's default strategy. SeleniumLibrary's implicit + strategy tries id/name/identifier first and may mis-route `.button`; + Browser Library defaults to CSS but gains nothing from ambiguity either. + Prefixing with `css=` makes the strategy explicit for both libraries. This module deterministically prefixes bare CSS selectors (`#id`, `.class`, or combined -forms like `#id.active`, `.btn.primary`, `#id[attr='v']`) with `css=` so RF parses -them as values rather than comments. +forms like `#id.active`, `.btn.primary`, `#id[attr='v']`) with `css=` — but ONLY at +cell positions that are actually locator arguments. Non-locator data such as tag +values, assertion values, or the text argument of `Fill Text` is never rewritten. + +Scope rules (conservative: no rewrite beats a wrong rewrite): + - Keyword calls: rewrite only the arg positions declared in _LOCATOR_KEYWORDS. + Unknown keywords cause the line to be left untouched. + - Variable declarations (assignment prefix with no recognized keyword): rewrite + any bare-CSS cell after the assignment. Matches the common pattern + `${SEARCH} #searchBox` in *** Variables *** sections. + - Setting markers ([Tags], [Documentation], ...): always skipped. + - Full-line comments and `...` continuations: preserved verbatim. Runs AFTER the Code Assembler returns. Complements the prompt guidance in `library_context/browser_context.py` and `library_context/selenium_context.py` as a @@ -36,17 +54,115 @@ # Robot Framework cell boundary: two-or-more spaces, or one-or-more tabs. _CELL_SPLIT_RE = re.compile(r"(\s{2,}|\t+)") +# Variable-assignment prefix: a cell that is exactly a scalar/list/dict variable +# (`${x}`, `@{list}`, `&{dict}`) with an optional trailing `=`. Used to detect +# lines of the form `${x}= Keyword ...` or `${X} value` in Variables. +_ASSIGN_PREFIX_RE = re.compile(r"^[$@&]\{[^}]+\}\s*=?\s*$") + +# Setting markers. When these are the first cell of a line, the rest of the line +# holds literal values, never locators — never rewrite. +_SETTING_MARKERS = frozenset({ + "[tags]", "[documentation]", "[arguments]", "[setup]", "[teardown]", + "[timeout]", "[template]", "[return]", +}) + + +def _canon_keyword(name: str) -> str: + """Canonicalize an RF keyword cell: lowercase, single-space between words, + library prefix stripped. RF itself is case- and space-insensitive for keyword + names, so `Fill Text`, `fill_text`, and `fill text` all map to `fill text`. + """ + stripped = re.sub(r"[\s_]+", " ", name.strip().lower()) + # Strip library prefix (e.g., "browser.click" -> "click"). A leading `.` is + # a bare CSS selector — not a library prefix — so guard against that. + if "." in stripped and not stripped.startswith("."): + stripped = stripped.rsplit(".", 1)[-1].strip() + return stripped + + +# Keyword -> tuple of 0-indexed argument positions that accept a locator. +# Any argument at a position NOT listed here (e.g., the text arg at position 1 +# of Fill Text) is left untouched. Keywords not in this dict cause the whole +# line to skip keyword-path rewriting (see normalize_robot_code for the variable- +# declaration fallback). +_LOCATOR_KEYWORDS: dict[str, tuple[int, ...]] = { + # Browser Library (Playwright-based) + "click": (0,), + "click with options": (0,), + "fill text": (0,), + "fill secret": (0,), + "type text": (0,), + "type secret": (0,), + "check checkbox": (0,), + "uncheck checkbox": (0,), + "select options by": (0,), + "hover": (0,), + "focus": (0,), + "press keys": (0,), + "get text": (0,), + "get attribute": (0,), + "get property": (0,), + "get style": (0,), + "get element": (0,), + "get elements": (0,), + "get element count": (0,), + "get element states": (0,), + "get select options": (0,), + "wait for elements state": (0,), + "scroll to element": (0,), + "upload file by selector": (0,), + # SeleniumLibrary + "click element": (0,), + "click button": (0,), + "click link": (0,), + "click image": (0,), + "input text": (0,), + "input password": (0,), + "select checkbox": (0,), + "unselect checkbox": (0,), + "select from list by label": (0,), + "select from list by index": (0,), + "select from list by value": (0,), + "mouse over": (0,), + "mouse out": (0,), + "submit form": (0,), + "choose file": (0,), + "clear element text": (0,), + "scroll element into view": (0,), + "wait until element is visible": (0,), + "wait until element is enabled": (0,), + "wait until element is not visible": (0,), + "wait until element contains": (0,), + "wait until page contains element": (0,), + "wait until page does not contain element": (0,), + "element should be visible": (0,), + "element should be enabled": (0,), + "element should be disabled": (0,), + "element should contain": (0,), + "element should not contain": (0,), + "element should not be visible": (0,), + "get webelement": (0,), + "get webelements": (0,), + "get element attribute": (0,), + # Two-locator keywords + "drag and drop": (0, 1), +} + def normalize_robot_code(robot_code: str) -> str: """ - Prefix bare CSS selectors in a Robot Framework file with `css=`. + Prefix bare CSS selectors in Robot Framework code with `css=`. - Only rewrites cells that look like bare CSS selectors. Leaves untouched: - - Already-prefixed locators (css=, xpath=, id=, name=, text=, role=, ...) - - Attribute selectors starting with `[` (already Playwright-native) - - Full-line comments (lines whose first non-whitespace char is `#`) - - Tag-only selectors (e.g. `body`, `input`) — ambiguous with keyword - names, intentionally skipped + Rewrites only cells that are actually locator arguments: + - Locator args of known keywords (per _LOCATOR_KEYWORDS) + - Values of variable declarations (`${X} #foo` and `${x}= #foo`) + + Leaves untouched: + - Non-locator args (e.g., the text arg of `Fill Text`) + - Setting marker lines (`[Tags]`, `[Documentation]`, ...) + - Assertion/log values (`Should Be Equal ${v} .NET`) + - Full-line comments and `...` continuations + - Custom/unknown keywords (conservative — no rewrite) Args: robot_code: The Robot Framework source as a string. @@ -61,17 +177,74 @@ def normalize_robot_code(robot_code: str) -> str: rewrote = 0 out_lines = [] for line in robot_code.split("\n"): - if line.lstrip().startswith("#"): - # Full-line comment — preserve verbatim. + stripped = line.lstrip() + + # Full-line comment or continuation — preserve verbatim. + if stripped.startswith("#") or stripped.startswith("..."): out_lines.append(line) continue parts = _CELL_SPLIT_RE.split(line) - for i, cell in enumerate(parts): - if cell and _BARE_CSS_RE.match(cell): - parts[i] = f"css={cell}" - rewrote += 1 - out_lines.append("".join(parts)) + # Indices of content cells (non-empty, non-separator). + cell_positions = [ + i for i, p in enumerate(parts) + if p and not _CELL_SPLIT_RE.fullmatch(p) + ] + if not cell_positions: + out_lines.append(line) + continue + + first_cell = parts[cell_positions[0]] + + # Setting marker lines never contain locators. + if first_cell.lower() in _SETTING_MARKERS: + out_lines.append(line) + continue + + # Walk past leading assignment cells to find the keyword cell. + keyword_pos_in_cells = 0 + for k, ci in enumerate(cell_positions): + if _ASSIGN_PREFIX_RE.match(parts[ci]): + keyword_pos_in_cells = k + 1 + continue + break + + # --- Case A: recognized keyword call --- + if keyword_pos_in_cells < len(cell_positions): + keyword_cell = parts[cell_positions[keyword_pos_in_cells]] + locator_arg_positions = _LOCATOR_KEYWORDS.get(_canon_keyword(keyword_cell)) + if locator_arg_positions is not None: + arg_cells_start = keyword_pos_in_cells + 1 + for offset in locator_arg_positions: + arg_pos = arg_cells_start + offset + if arg_pos >= len(cell_positions): + continue + target_idx = cell_positions[arg_pos] + cell = parts[target_idx] + if _BARE_CSS_RE.match(cell): + parts[target_idx] = f"css={cell}" + rewrote += 1 + out_lines.append("".join(parts)) + continue + + # --- Case B: variable declaration (assignment prefix, no known keyword) --- + # Common in *** Variables *** sections: `${SEARCH} #searchBox`. + # Also covers `${x}= Set Variable #foo` (Set Variable isn't in the + # allowlist, but the value being stored is a locator). + if keyword_pos_in_cells > 0: + for ci in cell_positions[keyword_pos_in_cells:]: + cell = parts[ci] + if _BARE_CSS_RE.match(cell): + parts[ci] = f"css={cell}" + rewrote += 1 + out_lines.append("".join(parts)) + continue + + # --- Case C: unrecognized line (custom keyword or section header) --- + # Conservative: leave untouched. A custom user-defined keyword whose + # first arg is a locator will miss rewriting here, but that's safer + # than corrupting legitimate non-locator data. + out_lines.append(line) if rewrote: logger.info(f"Locator normalizer: prefixed {rewrote} bare CSS selector(s) with `css=`") diff --git a/tests/test_crew_ai/test_robot_code_normalizer.py b/tests/test_crew_ai/test_robot_code_normalizer.py new file mode 100644 index 0000000..54c8ff6 --- /dev/null +++ b/tests/test_crew_ai/test_robot_code_normalizer.py @@ -0,0 +1,166 @@ +"""Tests for src.backend.crew_ai.robot_code_normalizer. + +Covers the false-positive regressions the allowlist was designed to fix, plus +the legitimate rewrite cases it must keep working. +""" + +import pytest + +from src.backend.crew_ai.robot_code_normalizer import normalize_robot_code + + +@pytest.mark.parametrize( + "label, source, expected", + [ + # --- False positives that the old global-rewrite version corrupted --- + ( + "Fill Text: #release is the typed text, not a locator", + " Fill Text ${message_locator} #release", + " Fill Text ${message_locator} #release", + ), + ( + "[Tags] takes literal tag values", + " [Tags] #smoke", + " [Tags] #smoke", + ), + ( + "Should Be Equal: .NET is an assertion value", + " Should Be Equal ${value} .NET", + " Should Be Equal ${value} .NET", + ), + ( + "Log with a hashtag literal — unknown-to-allowlist keyword stays untouched", + " Log #hashtag", + " Log #hashtag", + ), + ( + "Custom user-defined keyword — conservative skip", + " My Custom Keyword #foo", + " My Custom Keyword #foo", + ), + # --- Legitimate rewrites (Browser Library) --- + ( + "Click with bare id", + " Click #searchBox", + " Click css=#searchBox", + ), + ( + "Click with bare class", + " Click .btn-primary", + " Click css=.btn-primary", + ), + ( + "Fill Text rewrites position 0 only", + " Fill Text #email user@example.com", + " Fill Text css=#email user@example.com", + ), + ( + "Drag And Drop rewrites both locator args", + " Drag And Drop #src #dst", + " Drag And Drop css=#src css=#dst", + ), + ( + "Assignment prefix + known keyword", + " ${x}= Click #foo", + " ${x}= Click css=#foo", + ), + # --- Legitimate rewrites (SeleniumLibrary) --- + ( + "Click Element rewrites bare id", + " Click Element #submit", + " Click Element css=#submit", + ), + ( + "Input Text locator + literal text", + " Input Text #q search term", + " Input Text css=#q search term", + ), + ( + "Element Should Be Visible with bare class", + " Element Should Be Visible .result-row", + " Element Should Be Visible css=.result-row", + ), + # --- Variable declarations --- + ( + "*** Variables *** bare id value", + "${SEARCH} #searchBox", + "${SEARCH} css=#searchBox", + ), + ( + "*** Variables *** bare class value", + "${BTN} .btn-submit", + "${BTN} css=.btn-submit", + ), + ( + "${x}= Set Variable stores a bare locator value", + " ${x}= Set Variable #foo", + " ${x}= Set Variable css=#foo", + ), + # --- Canonicalization --- + ( + "Case-insensitive keyword lookup", + " click #foo", + " click css=#foo", + ), + ( + "Underscore-separated keyword name", + " fill_text #x y", + " fill_text css=#x y", + ), + # --- No-op edges --- + ( + "Already-prefixed locator — no double-prefix", + " Click css=#searchBox", + " Click css=#searchBox", + ), + ( + "XPath locator — untouched", + " Click //button[@id='x']", + " Click //button[@id='x']", + ), + ( + "Full-line comment preserved verbatim", + "# This is a comment about #searchBox", + "# This is a comment about #searchBox", + ), + ( + "Line continuation marker preserved", + " ... #continuation-text", + " ... #continuation-text", + ), + ( + "Section header untouched", + "*** Test Cases ***", + "*** Test Cases ***", + ), + ( + "Empty string", + "", + "", + ), + ( + "Tab-separated cells", + "\tClick\t#foo", + "\tClick\tcss=#foo", + ), + # --- Multi-line integrity --- + ( + "Multi-line input preserves non-locator lines", + "*** Test Cases ***\nMy Test\n Click #x\n Log done", + "*** Test Cases ***\nMy Test\n Click css=#x\n Log done", + ), + ], +) +def test_normalize_robot_code(label, source, expected): + assert normalize_robot_code(source) == expected, label + + +def test_fast_path_returns_input_when_no_selector_chars(): + """Strings without `#` or `.` must short-circuit and return the same object.""" + src = " Click ${BTN}\n Log hello" + assert normalize_robot_code(src) is src + + +def test_none_and_empty(): + assert normalize_robot_code("") == "" + assert normalize_robot_code(None) is None diff --git a/tests/test_optimization/test_nl_feedback_pipeline.py b/tests/test_optimization/test_nl_feedback_pipeline.py index aba155d..6ebf8eb 100644 --- a/tests/test_optimization/test_nl_feedback_pipeline.py +++ b/tests/test_optimization/test_nl_feedback_pipeline.py @@ -372,7 +372,8 @@ def test_hints_formatted_with_prefix(self, in_memory_db): engine = NLFeedbackEngine(in_memory_db) hints = engine.get_hints("click", "https://x.com", "assembler") assert hints is not None - assert hints[0].startswith("\u26a0\ufe0f USER FEEDBACK:") + assert hints[0].startswith("\u26a0\ufe0f USER FEEDBACK") + assert "Test prefix" in hints[0] # =================================================================== @@ -565,13 +566,17 @@ class TestScopeMapping: """_SCOPE_BY_CATEGORY mapping completeness.""" def test_scope_mapping_completeness(self): - """All expected categories should have a mapping.""" - expected = {"structural", "keyword", "locator", "anti_pattern", "validation"} - mapped = set(_SCOPE_BY_CATEGORY.keys()) - for cat in expected: - assert cat in mapped or True, ( - f"Category '{cat}' has no explicit scope mapping (will default to 'domain')" - ) + """Scope map must cover every category any code path can emit. + + Covers Phase 1 live categories plus pre-wired entries for Phase 3 + (assertion, positive, negative). Removing any of these would silently + change scope when the corresponding patterns land. + """ + expected = { + "structural", "keyword", "locator", "timing", "data", + "assertion", "uncategorized", "positive", "negative", + } + assert set(_SCOPE_BY_CATEGORY.keys()) == expected def test_scope_default_domain(self): """Unknown categories should default to 'domain'."""