google-gemini · jsamuel1 · Nov 1, 2025
diff --git a/README.md b/README.md
@@ -94,6 +94,7 @@ You can specify a particular environment with the ```--env <environment>``` flag
 
 - `playwright`: Runs the browser locally using Playwright.
 - `browserbase`: Connects to a Browserbase instance.
+- `agentcore`: Connects to Amazon Bedrock AgentCore Browser.
 
 **Local Playwright**
 
@@ -117,6 +118,43 @@ Runs the agent using Browserbase as the browser backend. Ensure the proper Brows
 python main.py --query="Go to Google and type 'Hello World' into the search bar" --env="browserbase"
 ```
 
+**Amazon Bedrock AgentCore**
+
+Runs the agent using Amazon Bedrock AgentCore Browser as the backend. Requires AWS credentials configured and the `bedrock-agentcore` Python package installed.
+
+```bash
+python main.py --query="Search for great deals on Alexa devices" --env="agentcore"
+```
+
+The AWS region is automatically detected from your AWS configuration (environment variables, ~/.aws/config, or IAM role). You can override it by setting:
+
+```bash
+export AWS_REGION="us-east-1"
+```
+
+**Session Recording (AgentCore only)**
+
+Enable session recording to S3 for replay and debugging:
+
+```bash
+# Auto-create IAM role (recommended)
+python main.py --query="Search for great deals on Alexa devices" --env="agentcore" \
+  --recording_bucket="my-recordings-bucket" \
+  --create_execution_role
+
+# Or provide existing role
+python main.py --query="Search for great deals on Alexa devices" --env="agentcore" \
+  --recording_bucket="my-recordings-bucket" \
+  --recording_prefix="sessions" \
+  --execution_role_arn="arn:aws:iam::123456789012:role/AgentCoreRecordingRole"
+```
+
+The auto-created role is scoped to the specified S3 bucket/prefix with minimal permissions:
+- Trust policy: `bedrock-agentcore.amazonaws.com`
+- S3 permissions: `s3:PutObject`, `s3:ListMultipartUploadParts`, `s3:AbortMultipartUpload`
+
+Recordings can be viewed using the AgentCore session replay viewer.
+
 ## Agent CLI
 
 The `main.py` script is the command-line interface (CLI) for running the browser agent.
@@ -126,9 +164,11 @@ The `main.py` script is the command-line interface (CLI) for running the browser
 | Argument | Description | Required | Default | Supported Environment(s) |
 |-|-|-|-|-|
 | `--query` | The natural language query for the browser agent to execute. | Yes | N/A | All |
-| `--env` | The computer use environment to use. Must be one of the following: `playwright`, or `browserbase` | No | N/A | All |
+| `--env` | The computer use environment to use. Must be one of the following: `playwright`, `browserbase`, or `agentcore` | No | playwright | All |
 | `--initial_url` | The initial URL to load when the browser starts. | No | https://www.google.com | All |
 | `--highlight_mouse` | If specified, the agent will attempt to highlight the mouse cursor's position in the screenshots. This is useful for visual debugging. | No | False (not highlighted) | `playwright` |
+| `--recording_bucket` | S3 bucket name for session recording (bucket name only, not ARN). Example: `my-recordings-bucket` | No | None | `agentcore` |
+| `--recording_prefix` | S3 prefix for session recordings. | No | recordings | `agentcore` |
 
 ### Environment Variables
 
@@ -137,3 +177,4 @@ The `main.py` script is the command-line interface (CLI) for running the browser
 | GEMINI_API_KEY | Your API key for the Gemini model. | Yes |
 | BROWSERBASE_API_KEY | Your API key for Browserbase. | Yes (when using the browserbase environment) |
 | BROWSERBASE_PROJECT_ID | Your Project ID for Browserbase. | Yes (when using the browserbase environment) |
+| AWS_REGION | AWS region for AgentCore Browser. | No (auto-detected from AWS config when using agentcore environment) |
diff --git a/computers/__init__.py b/computers/__init__.py
@@ -14,10 +14,12 @@
 from .computer import Computer, EnvState
 from .browserbase.browserbase import BrowserbaseComputer
 from .playwright.playwright import PlaywrightComputer
+from .agentcore.agentcore import AgentCoreComputer
 
 __all__ = [
     "Computer",
     "EnvState",
     "BrowserbaseComputer",
     "PlaywrightComputer",
+    "AgentCoreComputer",
 ]
diff --git a/computers/agentcore/__init__.py b/computers/agentcore/__init__.py
@@ -0,0 +1,3 @@
+from .agentcore import AgentCoreComputer
+
+__all__ = ["AgentCoreComputer"]
diff --git a/computers/agentcore/agentcore.py b/computers/agentcore/agentcore.py
@@ -0,0 +1,149 @@
+import os
+
+import termcolor
+from playwright.sync_api import sync_playwright
+
+from ..playwright.playwright import PlaywrightComputer
+from . import utils
+
+
+class AgentCoreComputer(PlaywrightComputer):
+    """Connects to Amazon Bedrock AgentCore Browser via CDP.
+
+    Supports optional session recording to S3 for replay and debugging.
+    """
+
+    def __init__(
+        self,
+        screen_size: tuple[int, int],
+        initial_url: str = "https://www.google.com",
+        recording_bucket: str | None = None,
+        recording_prefix: str = "recordings",
+        execution_role_arn: str | None = None,
+        create_execution_role: bool = False,
+        browser_identifier: str | None = None,
+        region: str | None = None,
+    ):
+        from boto3.session import Session
+
+        super().__init__(screen_size, initial_url)
+        self._recording_bucket: str | None = recording_bucket
+        self._recording_prefix: str = recording_prefix
+        self._execution_role_arn: str | None = execution_role_arn
+        self._create_execution_role: bool = create_execution_role
+        self._browser_identifier: str = (
+            browser_identifier or
+            os.getenv("AGENTCORE_BROWSER_IDENTIFIER", "aws.browser.v1")
+        )
+        # Determine region with fallback chain
+        boto_region = Session().region_name
+        self._region: str = (
+            region
+            or os.getenv("AGENTCORE_REGION")
+            or os.getenv("AWS_REGION")
+            or (boto_region if isinstance(boto_region, str) else None)
+            or "us-west-2"
+        )
+        self._created_browser: bool = False
+        self._client = None
+
+    def __enter__(self):
+        from bedrock_agentcore.tools.browser_client import BrowserClient
+
+        print("Creating AgentCore browser session...")
+
+        region = self._region
+
+        # Create browser with recording if bucket specified
+        browser_identifier_to_use = self._browser_identifier
+        if self._recording_bucket:
+            # If browser_identifier is already a browser ID (starts with "br-"), use it directly
+            if self._browser_identifier.startswith("br-"):
+                termcolor.cprint(
+                    f"Using provided browser ID: {self._browser_identifier}",
+                    color="cyan"
+                )
+                browser_identifier_to_use = self._browser_identifier
+            else:
+                # Create a unique browser name based on the bucket and prefix
+                # This ensures each recording configuration gets its own browser
+                import hashlib
+                config_hash = hashlib.sha256(
+                    f"{self._recording_bucket}/{self._recording_prefix}".encode()
+                ).hexdigest()[:8]
+                browser_name = f"recording_{config_hash}"
+
+                self._execution_role_arn, browser_id = utils.setup_browser_recording(
+                    browser_name,
+                    self._browser_identifier,
+                    self._recording_bucket,
+                    self._recording_prefix,
+                    self._execution_role_arn,
+                    self._create_execution_role,
+                    region
+                )
+                # Use the custom browser ID instead of the original identifier
+                browser_identifier_to_use = browser_id
+
+        self._client = BrowserClient(region)
+
+        session_id = self._client.start(
+            identifier=browser_identifier_to_use,
+            name="gemini-browser-session"
+        )
+        print(f"AgentCore browser session started: {session_id}")
+
+        ws_url, headers = self._client.generate_ws_headers()
+
+        self._playwright = sync_playwright().start()
+        self._browser = self._playwright.chromium.connect_over_cdp(
+            ws_url,
+            headers=headers
+        )
+        self._context = self._browser.contexts[0]
+        self._page = self._context.pages[0]
+
+        # Set viewport explicitly (CDP connection doesn't inherit from session config)
+        self._page.set_viewport_size({
+            "width": self._screen_size[0],
+            "height": self._screen_size[1]
+        })
+
+        self._page.goto(self._initial_url)
+
+        self._context.on("page", self._handle_new_page)
+
+        termcolor.cprint(
+            f"AgentCore browser session started in {region}",
+            color="green",
+            attrs=["bold"],
+        )
+
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        # Clean up in reverse order, with error handling for each step
+        try:
+            if self._page:
+                self._page.close()
+
+            if self._context:
+                self._context.close()
+
+            if self._browser:
+                self._browser.close()
+        finally:
+            try:
+                if self._client:
+                    _ = self._client.stop()
+            finally:
+                try:
+                    if self._playwright:
+                        self._playwright.stop()
+                finally:
+                    termcolor.cprint(
+                        "AgentCore browser session stopped",
+                        color="green",
+                        attrs=["bold"],
+                    )
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from .agentcore import AgentCoreComputer

		__all__ = ["AgentCoreComputer"]