Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 42 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ You can specify a particular environment with the ```--env <environment>``` flag

- `playwright`: Runs the browser locally using Playwright.
- `browserbase`: Connects to a Browserbase instance.
- `agentcore`: Connects to Amazon Bedrock AgentCore Browser.

**Local Playwright**

Expand All @@ -117,6 +118,43 @@ Runs the agent using Browserbase as the browser backend. Ensure the proper Brows
python main.py --query="Go to Google and type 'Hello World' into the search bar" --env="browserbase"
```

**Amazon Bedrock AgentCore**

Runs the agent using Amazon Bedrock AgentCore Browser as the backend. Requires AWS credentials configured and the `bedrock-agentcore` Python package installed.

```bash
python main.py --query="Search for great deals on Alexa devices" --env="agentcore"
```

The AWS region is automatically detected from your AWS configuration (environment variables, ~/.aws/config, or IAM role). You can override it by setting:

```bash
export AWS_REGION="us-east-1"
```

**Session Recording (AgentCore only)**

Enable session recording to S3 for replay and debugging:

```bash
# Auto-create IAM role (recommended)
python main.py --query="Search for great deals on Alexa devices" --env="agentcore" \
--recording_bucket="my-recordings-bucket" \
--create_execution_role

# Or provide existing role
python main.py --query="Search for great deals on Alexa devices" --env="agentcore" \
--recording_bucket="my-recordings-bucket" \
--recording_prefix="sessions" \
--execution_role_arn="arn:aws:iam::123456789012:role/AgentCoreRecordingRole"
```

The auto-created role is scoped to the specified S3 bucket/prefix with minimal permissions:
- Trust policy: `bedrock-agentcore.amazonaws.com`
- S3 permissions: `s3:PutObject`, `s3:ListMultipartUploadParts`, `s3:AbortMultipartUpload`

Recordings can be viewed using the AgentCore session replay viewer.

## Agent CLI

The `main.py` script is the command-line interface (CLI) for running the browser agent.
Expand All @@ -126,9 +164,11 @@ The `main.py` script is the command-line interface (CLI) for running the browser
| Argument | Description | Required | Default | Supported Environment(s) |
|-|-|-|-|-|
| `--query` | The natural language query for the browser agent to execute. | Yes | N/A | All |
| `--env` | The computer use environment to use. Must be one of the following: `playwright`, or `browserbase` | No | N/A | All |
| `--env` | The computer use environment to use. Must be one of the following: `playwright`, `browserbase`, or `agentcore` | No | playwright | All |
| `--initial_url` | The initial URL to load when the browser starts. | No | https://www.google.com | All |
| `--highlight_mouse` | If specified, the agent will attempt to highlight the mouse cursor's position in the screenshots. This is useful for visual debugging. | No | False (not highlighted) | `playwright` |
| `--recording_bucket` | S3 bucket name for session recording (bucket name only, not ARN). Example: `my-recordings-bucket` | No | None | `agentcore` |
| `--recording_prefix` | S3 prefix for session recordings. | No | recordings | `agentcore` |

### Environment Variables

Expand All @@ -137,3 +177,4 @@ The `main.py` script is the command-line interface (CLI) for running the browser
| GEMINI_API_KEY | Your API key for the Gemini model. | Yes |
| BROWSERBASE_API_KEY | Your API key for Browserbase. | Yes (when using the browserbase environment) |
| BROWSERBASE_PROJECT_ID | Your Project ID for Browserbase. | Yes (when using the browserbase environment) |
| AWS_REGION | AWS region for AgentCore Browser. | No (auto-detected from AWS config when using agentcore environment) |
2 changes: 2 additions & 0 deletions computers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@
from .computer import Computer, EnvState
from .browserbase.browserbase import BrowserbaseComputer
from .playwright.playwright import PlaywrightComputer
from .agentcore.agentcore import AgentCoreComputer

__all__ = [
"Computer",
"EnvState",
"BrowserbaseComputer",
"PlaywrightComputer",
"AgentCoreComputer",
]
3 changes: 3 additions & 0 deletions computers/agentcore/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .agentcore import AgentCoreComputer

__all__ = ["AgentCoreComputer"]
149 changes: 149 additions & 0 deletions computers/agentcore/agentcore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import os

import termcolor
from playwright.sync_api import sync_playwright

from ..playwright.playwright import PlaywrightComputer
from . import utils


class AgentCoreComputer(PlaywrightComputer):
"""Connects to Amazon Bedrock AgentCore Browser via CDP.

Supports optional session recording to S3 for replay and debugging.
"""

def __init__(
self,
screen_size: tuple[int, int],
initial_url: str = "https://www.google.com",
recording_bucket: str | None = None,
recording_prefix: str = "recordings",
execution_role_arn: str | None = None,
create_execution_role: bool = False,
browser_identifier: str | None = None,
region: str | None = None,
):
from boto3.session import Session

super().__init__(screen_size, initial_url)
self._recording_bucket: str | None = recording_bucket
self._recording_prefix: str = recording_prefix
self._execution_role_arn: str | None = execution_role_arn
self._create_execution_role: bool = create_execution_role
self._browser_identifier: str = (
browser_identifier or
os.getenv("AGENTCORE_BROWSER_IDENTIFIER", "aws.browser.v1")
)
# Determine region with fallback chain
boto_region = Session().region_name
self._region: str = (
region
or os.getenv("AGENTCORE_REGION")
or os.getenv("AWS_REGION")
or (boto_region if isinstance(boto_region, str) else None)
or "us-west-2"
)
self._created_browser: bool = False
self._client = None

def __enter__(self):
from bedrock_agentcore.tools.browser_client import BrowserClient

print("Creating AgentCore browser session...")

region = self._region

# Create browser with recording if bucket specified
browser_identifier_to_use = self._browser_identifier
if self._recording_bucket:
# If browser_identifier is already a browser ID (starts with "br-"), use it directly
if self._browser_identifier.startswith("br-"):
termcolor.cprint(
f"Using provided browser ID: {self._browser_identifier}",
color="cyan"
)
browser_identifier_to_use = self._browser_identifier
else:
# Create a unique browser name based on the bucket and prefix
# This ensures each recording configuration gets its own browser
import hashlib
config_hash = hashlib.sha256(
f"{self._recording_bucket}/{self._recording_prefix}".encode()
).hexdigest()[:8]
browser_name = f"recording_{config_hash}"

self._execution_role_arn, browser_id = utils.setup_browser_recording(
browser_name,
self._browser_identifier,
self._recording_bucket,
self._recording_prefix,
self._execution_role_arn,
self._create_execution_role,
region
)
# Use the custom browser ID instead of the original identifier
browser_identifier_to_use = browser_id

self._client = BrowserClient(region)

session_id = self._client.start(
identifier=browser_identifier_to_use,
name="gemini-browser-session"
)
print(f"AgentCore browser session started: {session_id}")

ws_url, headers = self._client.generate_ws_headers()

self._playwright = sync_playwright().start()
self._browser = self._playwright.chromium.connect_over_cdp(
ws_url,
headers=headers
)
self._context = self._browser.contexts[0]
self._page = self._context.pages[0]

# Set viewport explicitly (CDP connection doesn't inherit from session config)
self._page.set_viewport_size({
"width": self._screen_size[0],
"height": self._screen_size[1]
})

self._page.goto(self._initial_url)

self._context.on("page", self._handle_new_page)

termcolor.cprint(
f"AgentCore browser session started in {region}",
color="green",
attrs=["bold"],
)

return self

def __exit__(self, exc_type, exc_val, exc_tb):
# Clean up in reverse order, with error handling for each step
try:
if self._page:
self._page.close()

if self._context:
self._context.close()

if self._browser:
self._browser.close()
finally:
try:
if self._client:
_ = self._client.stop()
finally:
try:
if self._playwright:
self._playwright.stop()
finally:
termcolor.cprint(
"AgentCore browser session stopped",
color="green",
attrs=["bold"],
)

Loading