From 1d33cbcb4e009d182244d3c088867e9e90024615 Mon Sep 17 00:00:00 2001 From: CalettiGabriele Date: Sat, 11 Oct 2025 19:11:12 +0200 Subject: [PATCH 1/4] feat: add gcloud authentication support for Vertex AI with project ID --- README.md | 2 ++ agent.py | 22 ++++++++++++++++------ main.py | 8 +++++++- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 73cc1ef..b07c042 100644 --- a/README.md +++ b/README.md @@ -129,6 +129,8 @@ The `main.py` script is the command-line interface (CLI) for running the browser | `--env` | The computer use environment to use. Must be one of the following: `playwright`, or `browserbase` | No | N/A | All | | `--initial_url` | The initial URL to load when the browser starts. | No | https://www.google.com | All | | `--highlight_mouse` | If specified, the agent will attempt to highlight the mouse cursor's position in the screenshots. This is useful for visual debugging. | No | False (not highlighted) | `playwright` | +| `--gcloud-auth` | Use gcloud authentication with Vertex AI. Specify the project ID. When used, this overrides API key authentication. | No | None | All | + ### Environment Variables diff --git a/agent.py b/agent.py index 2df602e..3c55c02 100644 --- a/agent.py +++ b/agent.py @@ -67,18 +67,28 @@ def __init__( query: str, model_name: str, verbose: bool = True, + gcloud_project: Optional[str] = None, ): self._browser_computer = browser_computer self._query = query self._model_name = model_name self._verbose = verbose self.final_reasoning = None - self._client = genai.Client( - api_key=os.environ.get("GEMINI_API_KEY"), - vertexai=os.environ.get("USE_VERTEXAI", "0").lower() in ["true", "1"], - project=os.environ.get("VERTEXAI_PROJECT"), - location=os.environ.get("VERTEXAI_LOCATION"), - ) + + # Configure client based on authentication method + if gcloud_project: + self._client = genai.Client( + vertexai=True, + project=gcloud_project, + location="global", # europe-west1 + ) + else: + self._client = genai.Client( + api_key=os.environ.get("GEMINI_API_KEY"), + vertexai=os.environ.get("USE_VERTEXAI", "0").lower() in ["true", "1"], + project=os.environ.get("VERTEXAI_PROJECT"), + location=os.environ.get("VERTEXAI_LOCATION"), + ) self._contents: list[Content] = [ Content( role="user", diff --git a/main.py b/main.py index 05d5537..398b179 100644 --- a/main.py +++ b/main.py @@ -20,7 +20,6 @@ PLAYWRIGHT_SCREEN_SIZE = (1440, 900) - def main() -> int: parser = argparse.ArgumentParser(description="Run the browser agent with a query.") parser.add_argument( @@ -54,6 +53,12 @@ def main() -> int: default='gemini-2.5-computer-use-preview-10-2025', help="Set which main model to use.", ) + parser.add_argument( + "--gcloud-auth", + type=str, + default=None, + help="Use gcloud authentication with Vertex AI. Specify the project ID (e.g., mlr-generative-ai-lab).", + ) args = parser.parse_args() if args.env == "playwright": @@ -75,6 +80,7 @@ def main() -> int: browser_computer=browser_computer, query=args.query, model_name=args.model, + gcloud_project=args.gcloud_auth, ) agent.agent_loop() return 0 From 01a274366cb7980ae420e7a36faf751c0b7d565a Mon Sep 17 00:00:00 2001 From: CalettiGabriele Date: Sat, 11 Oct 2025 19:11:14 +0200 Subject: [PATCH 2/4] feat: add trust mode to automatically approve safety confirmations --- README.md | 1 + agent.py | 13 +++++++++++++ main.py | 7 +++++++ 3 files changed, 21 insertions(+) diff --git a/README.md b/README.md index b07c042..07509d4 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,7 @@ The `main.py` script is the command-line interface (CLI) for running the browser | `--initial_url` | The initial URL to load when the browser starts. | No | https://www.google.com | All | | `--highlight_mouse` | If specified, the agent will attempt to highlight the mouse cursor's position in the screenshots. This is useful for visual debugging. | No | False (not highlighted) | `playwright` | | `--gcloud-auth` | Use gcloud authentication with Vertex AI. Specify the project ID. When used, this overrides API key authentication. | No | None | All | +| `--trust` | Automatically approve all safety confirmations without prompting. Use with caution. | No | False | All | ### Environment Variables diff --git a/agent.py b/agent.py index 3c55c02..98f3983 100644 --- a/agent.py +++ b/agent.py @@ -68,11 +68,13 @@ def __init__( model_name: str, verbose: bool = True, gcloud_project: Optional[str] = None, + trust_mode: bool = False, ): self._browser_computer = browser_computer self._query = query self._model_name = model_name self._verbose = verbose + self._trust_mode = trust_mode self.final_reasoning = None # Configure client based on authentication method @@ -401,6 +403,17 @@ def _get_safety_confirmation( ) -> Literal["CONTINUE", "TERMINATE"]: if safety["decision"] != "require_confirmation": raise ValueError(f"Unknown safety decision: safety['decision']") + + # If trust mode is enabled, automatically approve + if self._trust_mode: + termcolor.cprint( + "Safety confirmation auto-approved (trust mode enabled)", + color="green", + ) + print(safety["explanation"]) + return "CONTINUE" + + # Otherwise, ask for user confirmation termcolor.cprint( "Safety service requires explicit confirmation!", color="yellow", diff --git a/main.py b/main.py index 398b179..b5d31c0 100644 --- a/main.py +++ b/main.py @@ -59,6 +59,12 @@ def main() -> int: default=None, help="Use gcloud authentication with Vertex AI. Specify the project ID (e.g., mlr-generative-ai-lab).", ) + parser.add_argument( + "--trust", + action="store_true", + default=False, + help="Automatically approve all safety confirmations without prompting.", + ) args = parser.parse_args() if args.env == "playwright": @@ -81,6 +87,7 @@ def main() -> int: query=args.query, model_name=args.model, gcloud_project=args.gcloud_auth, + trust_mode=args.trust, ) agent.agent_loop() return 0 From d9c73bf9bff2490661d26d069c90f2301f6df556 Mon Sep 17 00:00:00 2001 From: CalettiGabriele Date: Sun, 12 Oct 2025 18:47:52 +0200 Subject: [PATCH 3/4] feat: add secure credential management system with environment variable support --- agent.py | 121 ++++++++++++++++++++++++++++++++++++++++++++++- main.py | 4 ++ requirements.txt | 1 + 3 files changed, 124 insertions(+), 2 deletions(-) diff --git a/agent.py b/agent.py index 98f3983..3b6d301 100644 --- a/agent.py +++ b/agent.py @@ -60,6 +60,90 @@ def multiply_numbers(x: float, y: float) -> dict: return {"result": x * y} +def get_available_credentials() -> dict: + """Returns information about which credentials are available without exposing their values. + + This function checks environment variables for stored credentials and returns + metadata about what's available. The actual credential values are never exposed + to the model - they remain secure in environment variables. + + Returns: + A dictionary containing: + - available_sites: list of sites for which credentials are configured + - credential_keys: list of available credential identifiers (without values) + """ + available_creds = {} + available_sites = [] + + # Check for common credential patterns in environment variables + # Format: SITE_USERNAME, SITE_PASSWORD (e.g., GITHUB_USERNAME, GITHUB_PASSWORD) + import os + env_vars = os.environ.keys() + + for var in env_vars: + if var.endswith('_USERNAME') or var.endswith('_USER'): + site = var.replace('_USERNAME', '').replace('_USER', '') + password_var = f"{site}_PASSWORD" + if password_var in env_vars: + available_sites.append(site.lower()) + available_creds[site.lower()] = { + "username_key": var, + "password_key": password_var, + "has_credentials": True + } + + return { + "available_sites": available_sites, + "message": f"Credentials are securely stored for: {', '.join(available_sites) if available_sites else 'no sites'}. Use perform_secure_login() to authenticate." + } + + +def perform_secure_login(site: str) -> dict: + """Performs a secure login using stored credentials without exposing them. + + This function retrieves credentials from environment variables and returns + them in a way that they can be used by the browser automation, but the actual + values are never shown in the conversation history or logs. + + Args: + site: The site identifier (e.g., 'github', 'linkedin', 'mediaset') + + Returns: + A dictionary with: + - success: boolean indicating if credentials were found + - message: status message (without credential values) + - username: the actual username (to be used internally) + - password: the actual password (to be used internally) + """ + import os + + site_upper = site.upper() + username_key = f"{site_upper}_USERNAME" + password_key = f"{site_upper}_PASSWORD" + + # Try alternative key format + if username_key not in os.environ: + username_key = f"{site_upper}_USER" + + username = os.environ.get(username_key) + password = os.environ.get(password_key) + + if username and password: + return { + "success": True, + "message": f"Credentials retrieved for {site}. Ready to perform login.", + "username": username, + "password": password + } + else: + return { + "success": False, + "message": f"No credentials found for {site}. Expected environment variables: {username_key} and {password_key}", + "username": None, + "password": None + } + + class BrowserAgent: def __init__( self, @@ -76,6 +160,7 @@ def __init__( self._verbose = verbose self._trust_mode = trust_mode self.final_reasoning = None + self._temp_credentials = {} # Temporary storage for credentials during login # Configure client based on authentication method if gcloud_project: @@ -108,7 +193,14 @@ def __init__( # For example: types.FunctionDeclaration.from_callable( client=self._client, callable=multiply_numbers - ) + ), + # Secure credential management functions + types.FunctionDeclaration.from_callable( + client=self._client, callable=get_available_credentials + ), + types.FunctionDeclaration.from_callable( + client=self._client, callable=perform_secure_login + ), ] self._generate_content_config = GenerateContentConfig( @@ -150,10 +242,18 @@ def handle_action(self, action: types.FunctionCall) -> FunctionResponseT: y = self.denormalize_y(action.args["y"]) press_enter = action.args.get("press_enter", False) clear_before_typing = action.args.get("clear_before_typing", True) + + # Handle special placeholders for secure credentials + text = action.args["text"] + if text == "{{USERNAME}}" and "username" in self._temp_credentials: + text = self._temp_credentials["username"] + elif text == "{{PASSWORD}}" and "password" in self._temp_credentials: + text = self._temp_credentials["password"] + return self._browser_computer.type_text_at( x=x, y=y, - text=action.args["text"], + text=text, press_enter=press_enter, clear_before_typing=clear_before_typing, ) @@ -202,6 +302,23 @@ def handle_action(self, action: types.FunctionCall) -> FunctionResponseT: # Handle the custom function declarations here. elif action.name == multiply_numbers.__name__: return multiply_numbers(x=action.args["x"], y=action.args["y"]) + elif action.name == get_available_credentials.__name__: + return get_available_credentials() + elif action.name == perform_secure_login.__name__: + result = perform_secure_login(site=action.args["site"]) + # Remove sensitive data from the response that goes back to the model + safe_result = { + "success": result["success"], + "message": result["message"] + } + # Store credentials temporarily for the agent to use + if result["success"]: + self._temp_credentials = { + "username": result["username"], + "password": result["password"] + } + safe_result["instruction"] = "Credentials loaded. Use type_text_at with {{USERNAME}} and {{PASSWORD}} placeholders to enter credentials in the appropriate fields." + return safe_result else: raise ValueError(f"Unsupported function: {action}") diff --git a/main.py b/main.py index b5d31c0..0239758 100644 --- a/main.py +++ b/main.py @@ -13,10 +13,14 @@ # limitations under the License. import argparse import os +from dotenv import load_dotenv from agent import BrowserAgent from computers import BrowserbaseComputer, PlaywrightComputer +# Load environment variables from .env file +load_dotenv() + PLAYWRIGHT_SCREEN_SIZE = (1440, 900) diff --git a/requirements.txt b/requirements.txt index cab25ed..349f162 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ playwright==1.52.0 browserbase==1.3.0 rich pytest +python-dotenv==1.0.0 From 46493f8bedabde4cbd4081f1c5a64f19e29903c8 Mon Sep 17 00:00:00 2001 From: CalettiGabriele Date: Tue, 14 Oct 2025 07:35:34 +0200 Subject: [PATCH 4/4] feat: add site authentication support to Playwright browser automation --- README.md | 1 + computers/playwright/auth.py | 224 ++++++++++++++++++++++ computers/playwright/playwright.py | 35 +++- main.py | 17 ++ playwright-auth.toml.example | 291 +++++++++++++++++++++++++++++ requirements.txt | 1 + 6 files changed, 567 insertions(+), 2 deletions(-) create mode 100644 computers/playwright/auth.py create mode 100644 playwright-auth.toml.example diff --git a/README.md b/README.md index 07509d4..40f46f1 100644 --- a/README.md +++ b/README.md @@ -129,6 +129,7 @@ The `main.py` script is the command-line interface (CLI) for running the browser | `--env` | The computer use environment to use. Must be one of the following: `playwright`, or `browserbase` | No | N/A | All | | `--initial_url` | The initial URL to load when the browser starts. | No | https://www.google.com | All | | `--highlight_mouse` | If specified, the agent will attempt to highlight the mouse cursor's position in the screenshots. This is useful for visual debugging. | No | False (not highlighted) | `playwright` | +| `--auth-site` | Enable authentication and specify which site to login to. If specified, automatic login will be performed before agent operations. Uses `playwright-auth.toml` for configuration. | No | None | `playwright` | | `--gcloud-auth` | Use gcloud authentication with Vertex AI. Specify the project ID. When used, this overrides API key authentication. | No | None | All | | `--trust` | Automatically approve all safety confirmations without prompting. Use with caution. | No | False | All | diff --git a/computers/playwright/auth.py b/computers/playwright/auth.py new file mode 100644 index 0000000..eace056 --- /dev/null +++ b/computers/playwright/auth.py @@ -0,0 +1,224 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Authentication module for Playwright browser automation. +Handles login flows based on TOML configuration files. +""" + +import os +import time +import toml +import termcolor +from typing import Optional, Dict, Any +from playwright.sync_api import Page, TimeoutError as PlaywrightTimeoutError + + +class AuthConfig: + """Configuration for a single authentication site.""" + + def __init__(self, site_name: str, config: Dict[str, Any]): + self.site_name = site_name + self.name = config.get("name", site_name) + self.login_url = config["login_url"] + self.success_url = config.get("success_url", "") + self.username_env = config["username_env"] + self.password_env = config["password_env"] + + # Selectors + selectors = config.get("selectors", {}) + self.username_field = selectors.get("username_field", "") + self.password_field = selectors.get("password_field", "") + self.submit_button = selectors.get("submit_button", "") + self.success_element = selectors.get("success_element", "") + self.timeout = selectors.get("timeout", 30) + + def get_credentials(self) -> tuple[str, str]: + """Retrieve credentials from environment variables.""" + username = os.environ.get(self.username_env) + password = os.environ.get(self.password_env) + + if not username or not password: + raise ValueError( + f"Missing credentials for {self.name}. " + f"Please set {self.username_env} and {self.password_env} in .env file" + ) + + return username, password + + +class PlaywrightAuthenticator: + """Handles authentication for Playwright browser sessions.""" + + def __init__(self, config_path: str = "playwright-auth.toml"): + """ + Initialize the authenticator with a TOML configuration file. + + Args: + config_path: Path to the TOML configuration file + """ + self.config_path = config_path + self.config_data = self._load_config() + self.default_site = self.config_data.get("default_site", "") + + def _load_config(self) -> Dict[str, Any]: + """Load and parse the TOML configuration file.""" + if not os.path.exists(self.config_path): + raise FileNotFoundError( + f"Authentication config file not found: {self.config_path}" + ) + + with open(self.config_path, "r", encoding="utf-8") as f: + return toml.load(f) + + def get_site_config(self, site_name: Optional[str] = None) -> AuthConfig: + """ + Get configuration for a specific site. + + Args: + site_name: Name of the site. If None, uses default_site. + + Returns: + AuthConfig object for the specified site + """ + if site_name is None: + site_name = self.default_site + + if not site_name: + raise ValueError("No site specified and no default_site configured") + + sites = self.config_data.get("sites", {}) + if site_name not in sites: + available = ", ".join(sites.keys()) + raise ValueError( + f"Site '{site_name}' not found in config. " + f"Available sites: {available}" + ) + + return AuthConfig(site_name, sites[site_name]) + + def perform_login( + self, + page: Page, + site_name: Optional[str] = None, + verbose: bool = True + ) -> str: + """ + Perform login on the specified site. + + Args: + page: Playwright Page object + site_name: Name of the site to login to. If None, uses default_site. + verbose: Whether to print progress messages + + Returns: + The success URL after login + """ + config = self.get_site_config(site_name) + + if verbose: + termcolor.cprint( + f"Performing authentication for {config.name}...", + color="cyan", + attrs=["bold"], + ) + + # Get credentials from environment + username, password = config.get_credentials() + + # Navigate to login page + if verbose: + print(f" → Navigating to login page: {config.login_url}") + page.goto(config.login_url) + page.wait_for_load_state("networkidle", timeout=config.timeout * 1000) + + # Wait a bit for any dynamic content to load + time.sleep(1) + + # Fill username + if verbose: + print(f" → Filling username field") + try: + # Try as CSS selector first + page.fill(config.username_field, username, timeout=5000) + except PlaywrightTimeoutError: + # Try as label text + page.get_by_label(config.username_field).fill(username) + + # Fill password + if verbose: + print(f" → Filling password field") + try: + # Try as CSS selector first + page.fill(config.password_field, password, timeout=5000) + except PlaywrightTimeoutError: + # Try as label text + page.get_by_label(config.password_field).fill(password) + + # Click submit button + if verbose: + print(f" → Clicking submit button") + try: + # Try as CSS selector first + page.click(config.submit_button, timeout=5000) + except PlaywrightTimeoutError: + # Try as button text + page.get_by_role("button", name=config.submit_button).click() + + # Wait for navigation after login + if verbose: + print(f" → Waiting for authentication to complete...") + + # Wait for either success URL or success element + if config.success_url: + try: + page.wait_for_url( + config.success_url, + timeout=config.timeout * 1000 + ) + except PlaywrightTimeoutError: + # If exact URL match fails, just wait for load state + page.wait_for_load_state("networkidle", timeout=config.timeout * 1000) + else: + page.wait_for_load_state("networkidle", timeout=config.timeout * 1000) + + # Optionally wait for a specific element to verify login + if config.success_element: + try: + page.wait_for_selector( + config.success_element, + timeout=config.timeout * 1000 + ) + except PlaywrightTimeoutError: + termcolor.cprint( + f"Warning: Success element '{config.success_element}' not found", + color="yellow" + ) + + # Additional wait to ensure page is fully loaded + time.sleep(1) + + if verbose: + termcolor.cprint( + f"✓ Authentication successful for {config.name}", + color="green", + attrs=["bold"], + ) + + # Return the URL to navigate to (or current URL if success_url not specified) + return config.success_url if config.success_url else page.url + + def list_available_sites(self) -> list[str]: + """Return a list of all configured sites.""" + return list(self.config_data.get("sites", {}).keys()) diff --git a/computers/playwright/playwright.py b/computers/playwright/playwright.py index e7a53c3..3c7bf19 100644 --- a/computers/playwright/playwright.py +++ b/computers/playwright/playwright.py @@ -22,7 +22,8 @@ ) import playwright.sync_api from playwright.sync_api import sync_playwright -from typing import Literal +from typing import Literal, Optional +from .auth import PlaywrightAuthenticator # Define a mapping from the user-friendly key names to Playwright's expected key names. # Playwright is generally good with case-insensitivity for these, but it's best to be canonical. @@ -81,11 +82,20 @@ def __init__( initial_url: str = "https://www.google.com", search_engine_url: str = "https://www.google.com", highlight_mouse: bool = False, + auth_config_path: Optional[str] = None, + auth_site: Optional[str] = None, ): self._initial_url = initial_url self._screen_size = screen_size self._search_engine_url = search_engine_url self._highlight_mouse = highlight_mouse + self._auth_config_path = auth_config_path + self._auth_site = auth_site + self._authenticator = None + + # If authentication is configured, initialize the authenticator + if self._auth_config_path: + self._authenticator = PlaywrightAuthenticator(self._auth_config_path) def _handle_new_page(self, new_page: playwright.sync_api.Page): """The Computer Use model only supports a single tab at the moment. @@ -120,7 +130,28 @@ def __enter__(self): } ) self._page = self._context.new_page() - self._page.goto(self._initial_url) + + # Perform authentication if configured + if self._authenticator: + try: + authenticated_url = self._authenticator.perform_login( + self._page, + self._auth_site, + verbose=True + ) + # Update initial URL to the authenticated page + if authenticated_url: + self._initial_url = authenticated_url + except Exception as e: + termcolor.cprint( + f"Authentication failed: {str(e)}", + color="red", + attrs=["bold"], + ) + raise + else: + # No authentication, just navigate to initial URL + self._page.goto(self._initial_url) self._context.on("page", self._handle_new_page) diff --git a/main.py b/main.py index 0239758..1c79680 100644 --- a/main.py +++ b/main.py @@ -40,6 +40,12 @@ def main() -> int: default="playwright", help="The computer use environment to use.", ) + parser.add_argument( + "--auth-site", + type=str, + default=None, + help="Enable authentication and specify which site to login to. If specified, automatic login will be performed before agent operations. If not specified, uses default_site from config file.", + ) parser.add_argument( "--initial_url", type=str, @@ -72,10 +78,21 @@ def main() -> int: args = parser.parse_args() if args.env == "playwright": + # Check if authentication is requested via --auth-site + auth_config_path = None + auth_site = None + + if args.auth_site is not None: + # Authentication enabled - always use playwright-auth.toml + auth_config_path = "playwright-auth.toml" + auth_site = args.auth_site if args.auth_site else None + env = PlaywrightComputer( screen_size=PLAYWRIGHT_SCREEN_SIZE, initial_url=args.initial_url, highlight_mouse=args.highlight_mouse, + auth_config_path=auth_config_path, + auth_site=auth_site, ) elif args.env == "browserbase": env = BrowserbaseComputer( diff --git a/playwright-auth.toml.example b/playwright-auth.toml.example new file mode 100644 index 0000000..fc26390 --- /dev/null +++ b/playwright-auth.toml.example @@ -0,0 +1,291 @@ +# ============================================================================ +# Playwright Authentication Configuration - Example File +# ============================================================================ +# +# This is an example configuration file showing how to set up authentication +# for multiple websites. Copy this file to 'playwright-auth.toml' and customize +# it with your own sites and configurations. +# +# IMPORTANT SECURITY NOTES: +# - Never hardcode credentials in this file +# - Always store credentials in the .env file +# - Reference credentials using environment variable names +# - The .env file should be in .gitignore (never commit it) +# +# ============================================================================ + +# ---------------------------------------------------------------------------- +# Default Site Configuration +# ---------------------------------------------------------------------------- +# Specify which site to use when --auth-site is not provided +# This should match one of the site names defined below (e.g., "google", "github") +default_site = "google" + +# ============================================================================ +# SITE CONFIGURATIONS +# ============================================================================ +# Each site configuration follows this structure: +# +# [sites.SITE_NAME] +# name = "Human-readable site name" +# login_url = "URL of the login page" +# success_url = "URL to navigate to after successful login (optional)" +# username_env = "ENV_VAR_NAME_FOR_USERNAME" +# password_env = "ENV_VAR_NAME_FOR_PASSWORD" +# +# [sites.SITE_NAME.selectors] +# username_field = "CSS selector or label text for username input" +# password_field = "CSS selector or label text for password input" +# submit_button = "CSS selector or button text for submit button" +# success_element = "CSS selector to verify successful login (optional)" +# timeout = 30 # Timeout in seconds for login process +# ============================================================================ + +# ---------------------------------------------------------------------------- +# Example 1: Google Account Login +# ---------------------------------------------------------------------------- +[sites.google] +# Human-readable name for this site configuration +name = "Google Account" + +# URL of the login page where authentication starts +# This is where the browser will navigate first +login_url = "https://accounts.google.com/signin" + +# URL to navigate to after successful login (optional) +# If specified, the browser will navigate here after authentication +# If empty, the browser stays on the page after login +success_url = "https://mail.google.com" + +# Environment variable names for credentials +# These variables must be defined in your .env file +# Example in .env: +# GOOGLE_EMAIL=your.email@gmail.com +# GOOGLE_PASSWORD=your_secure_password +username_env = "GOOGLE_EMAIL" +password_env = "GOOGLE_PASSWORD" + +# CSS Selectors for login form elements +[sites.google.selectors] +# Selector for the email/username input field +# Can be a CSS selector (e.g., "input[type='email']") or label text +# Google uses different selectors, this is a common one: +username_field = "input[type='email']" + +# Selector for the password input field +# Note: Google has a two-step login (email first, then password) +password_field = "input[type='password']" + +# Selector for the submit/sign-in button +# Can be a CSS selector or button text (e.g., "Sign in", "Next") +submit_button = "button[type='button']" + +# Optional: CSS selector to wait for after successful login +# This helps verify that login was successful +# Leave empty ("") if not needed +success_element = "" + +# Timeout in seconds for the entire login process +# Increase this if the site is slow to load +timeout = 45 + +# ---------------------------------------------------------------------------- +# Example 2: GitHub Login +# ---------------------------------------------------------------------------- +[sites.github] +name = "GitHub" +login_url = "https://github.com/login" +success_url = "https://github.com/" +username_env = "GITHUB_USERNAME" +password_env = "GITHUB_PASSWORD" + +[sites.github.selectors] +username_field = "input[name='login']" +password_field = "input[name='password']" +submit_button = "input[type='submit']" +success_element = "button[aria-label='View profile and more']" +timeout = 30 + +# ---------------------------------------------------------------------------- +# Example 3: LinkedIn Login +# ---------------------------------------------------------------------------- +[sites.linkedin] +name = "LinkedIn" +login_url = "https://www.linkedin.com/login" +success_url = "https://www.linkedin.com/feed/" +username_env = "LINKEDIN_EMAIL" +password_env = "LINKEDIN_PASSWORD" + +[sites.linkedin.selectors] +username_field = "input[id='username']" +password_field = "input[id='password']" +submit_button = "button[type='submit']" +success_element = "" +timeout = 30 + +# ============================================================================ +# HOW TO ADD A NEW SITE +# ============================================================================ +# +# 1. Add credentials to your .env file: +# MYSITE_USERNAME=your_username +# MYSITE_PASSWORD=your_password +# +# 2. Find the CSS selectors for the login form: +# - Open the login page in a browser +# - Right-click on the username field → Inspect +# - Look for attributes like name, id, type, or class +# - Common patterns: +# * input[name='username'] +# * input[type='email'] +# * input[id='login'] +# * #username (for id="username") +# * .username-field (for class="username-field") +# +# 3. Add configuration to this file: +# [sites.mysite] +# name = "My Site Name" +# login_url = "https://mysite.com/login" +# success_url = "https://mysite.com/dashboard" +# username_env = "MYSITE_USERNAME" +# password_env = "MYSITE_PASSWORD" +# +# [sites.mysite.selectors] +# username_field = "input[name='username']" +# password_field = "input[name='password']" +# submit_button = "button[type='submit']" +# success_element = "" +# timeout = 30 +# +# 4. Test the configuration: +# python main.py --query "Describe what you see" --auth-site mysite +# +# ============================================================================ + +# ============================================================================ +# CSS SELECTOR GUIDE +# ============================================================================ +# +# Common selector patterns: +# +# By attribute name: +# input[name='username'] +# input[name='email'] +# button[name='submit'] +# +# By ID: +# input[id='username'] +# #username (shorthand) +# +# By type: +# input[type='email'] +# input[type='password'] +# input[type='submit'] +# button[type='submit'] +# +# By class: +# input[class='login-input'] +# .login-input (shorthand) +# +# By multiple attributes: +# input[type='text'][name='username'] +# +# Using label text (alternative to CSS): +# "Email address" +# "Password" +# "Sign in" +# +# ============================================================================ + +# ============================================================================ +# TROUBLESHOOTING +# ============================================================================ +# +# Problem: "Missing credentials" error +# Solution: Ensure the environment variables are set in your .env file +# +# Problem: "Timeout waiting for selector" error +# Solution: +# - Verify the CSS selectors are correct (inspect the page) +# - Try using label text instead of CSS selector +# - Increase the timeout value +# +# Problem: Login fails but credentials are correct +# Solution: +# - Check if the website has CAPTCHA or 2FA +# - Verify the login_url is correct +# - Try running without headless mode to see what's happening: +# Set PLAYWRIGHT_HEADLESS=0 in your .env file +# +# Problem: Can't find the right CSS selector +# Solution: +# - Use browser DevTools (F12) to inspect elements +# - Try different selector strategies (see CSS SELECTOR GUIDE above) +# - You can also use label text directly (e.g., "Email address") +# +# Problem: Site has two-factor authentication (2FA) +# Solution: +# - This system doesn't support 2FA automatically +# - Consider using app-specific passwords if available +# - Or disable 2FA for automation accounts (not recommended for production) +# +# ============================================================================ + +# ============================================================================ +# SECURITY BEST PRACTICES +# ============================================================================ +# +# 1. Never commit the .env file to version control +# - Add .env to .gitignore (already done by default) +# +# 2. Use strong, unique passwords for each site +# - Consider using a password manager +# +# 3. Rotate credentials regularly +# - Update passwords periodically +# +# 4. Limit access to the .env file +# - Set appropriate file permissions (chmod 600 on Unix) +# +# 5. Use environment-specific .env files +# - .env.development +# - .env.staging +# - .env.production +# +# 6. Never share credentials in plain text +# - Don't send via email, chat, or other insecure channels +# +# 7. Consider using dedicated automation accounts +# - Create separate accounts for automation +# - Limit their permissions to only what's needed +# +# 8. Monitor for suspicious activity +# - Check login logs regularly +# - Set up alerts for unusual access patterns +# +# ============================================================================ + +# ============================================================================ +# ADVANCED CONFIGURATION +# ============================================================================ +# +# Custom wait conditions: +# If a site requires waiting for specific elements after login, use success_element: +# +# [sites.example.selectors] +# success_element = "div[class='dashboard']" +# +# This will wait for the dashboard element to appear before considering login successful. +# +# Multiple submit buttons: +# Some sites have multiple submit buttons. Be specific: +# +# submit_button = "button[type='submit'][class='primary-button']" +# +# Dynamic selectors: +# If selectors change, try more stable alternatives: +# - Use data attributes: input[data-testid='username'] +# - Use aria labels: input[aria-label='Email'] +# - Use role attributes: button[role='button'] +# +# ============================================================================ diff --git a/requirements.txt b/requirements.txt index 349f162..52b37c2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ browserbase==1.3.0 rich pytest python-dotenv==1.0.0 +toml==0.10.2