From 0a0d484bed7592ed8faeeff7d1032afa5e4438cb Mon Sep 17 00:00:00 2001 From: anthonyduong Date: Fri, 5 Sep 2025 23:27:07 -0700 Subject: [PATCH] make visualwebarena package --- README.md | 27 +++++-- pyproject.toml | 72 ++++++++++++++++++ run.py | 16 ++-- run_demo.py | 14 ++-- scripts/collect_obs.py | 4 +- scripts/generate_test_data.py | 2 +- setup.cfg | 25 ------ setup.py | 4 - tests/conftest.py | 2 +- .../test_action_functionalities.py | 2 +- tests/test_browser_env/test_actions.py | 2 +- tests/test_browser_env/test_auth_cookie.py | 2 +- .../test_playwright_actions.py | 2 +- .../test_script_browser_env.py | 6 +- .../test_exact_evaluators.py | 10 +-- .../test_helper_functions.py | 6 +- visualwebarena/__init__.py | 13 ++++ {agent => visualwebarena/agent}/__init__.py | 0 {agent => visualwebarena/agent}/agent.py | 12 +-- .../agent}/prompts/__init__.py | 0 .../prompts/jsons/p_cot_id_actree_2s.json | 0 .../jsons/p_cot_id_actree_2s_no_na.json | 0 .../prompts/jsons/p_cot_id_actree_3s.json | 0 .../jsons/p_multimodal_cot_id_actree_3s.json | 0 .../prompts/jsons/p_som_cot_id_actree_3s.json | 0 .../multimodal_example1.png | Bin .../multimodal_example2.png | Bin .../multimodal_example3.png | Bin .../agent}/prompts/prompt_constructor.py | 12 +-- .../agent}/prompts/raw/p_cot_id_actree_2s.py | 0 .../prompts/raw/p_cot_id_actree_2s_no_na.py | 0 .../agent}/prompts/raw/p_cot_id_actree_3s.py | 0 .../raw/p_multimodal_cot_id_actree_3s.py | 0 .../prompts/raw/p_som_cot_id_actree_3s.py | 0 .../prompts/som_examples/som_example1.png | Bin .../prompts/som_examples/som_example2.png | Bin .../prompts/som_examples/som_example3.png | Bin .../agent}/prompts/to_json.py | 0 .../browser_env}/__init__.py | 0 .../browser_env}/actions.py | 6 +- .../browser_env}/async_envs.py | 0 .../browser_env}/auto_login.py | 6 +- .../browser_env}/constants.py | 0 .../browser_env}/env_config.py | 0 .../browser_env}/envs.py | 4 +- .../browser_env}/helper_functions.py | 4 +- .../javascript/frame_mark_elements.js | 0 .../javascript/frame_unmark_elements.js | 0 .../browser_env}/processors.py | 2 +- .../browser_env}/py.typed | 0 .../browser_env}/trajectory.py | 0 .../browser_env}/utils.py | 0 .../evaluation_harness}/__init__.py | 0 .../evaluation_harness}/evaluators.py | 8 +- .../evaluation_harness}/helper_functions.py | 4 +- .../evaluation_harness}/image_utils.py | 0 {llms => visualwebarena/llms}/__init__.py | 0 {llms => visualwebarena/llms}/lm_config.py | 0 .../llms}/providers/gemini_utils.py | 0 .../llms}/providers/hf_utils.py | 0 .../llms}/providers/openai_utils.py | 0 {llms => visualwebarena/llms}/tokenizers.py | 0 {llms => visualwebarena/llms}/utils.py | 4 +- 63 files changed, 172 insertions(+), 99 deletions(-) create mode 100644 pyproject.toml delete mode 100644 setup.cfg delete mode 100644 setup.py create mode 100644 visualwebarena/__init__.py rename {agent => visualwebarena/agent}/__init__.py (100%) rename {agent => visualwebarena/agent}/agent.py (96%) rename {agent => visualwebarena/agent}/prompts/__init__.py (100%) rename {agent => visualwebarena/agent}/prompts/jsons/p_cot_id_actree_2s.json (100%) rename {agent => visualwebarena/agent}/prompts/jsons/p_cot_id_actree_2s_no_na.json (100%) rename {agent => visualwebarena/agent}/prompts/jsons/p_cot_id_actree_3s.json (100%) rename {agent => visualwebarena/agent}/prompts/jsons/p_multimodal_cot_id_actree_3s.json (100%) rename {agent => visualwebarena/agent}/prompts/jsons/p_som_cot_id_actree_3s.json (100%) rename {agent => visualwebarena/agent}/prompts/multimodal_examples/multimodal_example1.png (100%) rename {agent => visualwebarena/agent}/prompts/multimodal_examples/multimodal_example2.png (100%) rename {agent => visualwebarena/agent}/prompts/multimodal_examples/multimodal_example3.png (100%) rename {agent => visualwebarena/agent}/prompts/prompt_constructor.py (97%) rename {agent => visualwebarena/agent}/prompts/raw/p_cot_id_actree_2s.py (100%) rename {agent => visualwebarena/agent}/prompts/raw/p_cot_id_actree_2s_no_na.py (100%) rename {agent => visualwebarena/agent}/prompts/raw/p_cot_id_actree_3s.py (100%) rename {agent => visualwebarena/agent}/prompts/raw/p_multimodal_cot_id_actree_3s.py (100%) rename {agent => visualwebarena/agent}/prompts/raw/p_som_cot_id_actree_3s.py (100%) rename {agent => visualwebarena/agent}/prompts/som_examples/som_example1.png (100%) rename {agent => visualwebarena/agent}/prompts/som_examples/som_example2.png (100%) rename {agent => visualwebarena/agent}/prompts/som_examples/som_example3.png (100%) rename {agent => visualwebarena/agent}/prompts/to_json.py (100%) rename {browser_env => visualwebarena/browser_env}/__init__.py (100%) rename {browser_env => visualwebarena/browser_env}/actions.py (99%) rename {browser_env => visualwebarena/browser_env}/async_envs.py (100%) rename {browser_env => visualwebarena/browser_env}/auto_login.py (97%) rename {browser_env => visualwebarena/browser_env}/constants.py (100%) rename {browser_env => visualwebarena/browser_env}/env_config.py (100%) rename {browser_env => visualwebarena/browser_env}/envs.py (98%) rename {browser_env => visualwebarena/browser_env}/helper_functions.py (99%) rename {browser_env => visualwebarena/browser_env}/javascript/frame_mark_elements.js (100%) rename {browser_env => visualwebarena/browser_env}/javascript/frame_unmark_elements.js (100%) rename {browser_env => visualwebarena/browser_env}/processors.py (99%) rename {browser_env => visualwebarena/browser_env}/py.typed (100%) rename {browser_env => visualwebarena/browser_env}/trajectory.py (100%) rename {browser_env => visualwebarena/browser_env}/utils.py (100%) rename {evaluation_harness => visualwebarena/evaluation_harness}/__init__.py (100%) rename {evaluation_harness => visualwebarena/evaluation_harness}/evaluators.py (99%) rename {evaluation_harness => visualwebarena/evaluation_harness}/helper_functions.py (99%) rename {evaluation_harness => visualwebarena/evaluation_harness}/image_utils.py (100%) rename {llms => visualwebarena/llms}/__init__.py (100%) rename {llms => visualwebarena/llms}/lm_config.py (100%) rename {llms => visualwebarena/llms}/providers/gemini_utils.py (100%) rename {llms => visualwebarena/llms}/providers/hf_utils.py (100%) rename {llms => visualwebarena/llms}/providers/openai_utils.py (100%) rename {llms => visualwebarena/llms}/tokenizers.py (100%) rename {llms => visualwebarena/llms}/utils.py (96%) diff --git a/README.md b/README.md index cea8689..b6bedce 100644 --- a/README.md +++ b/README.md @@ -27,17 +27,34 @@ - [01/25/2024]: GitHub repo released with tasks and scripts for setting up the VWA environments. ## Install + +**Option 1: pip install from GitHub (Recommended)** ```bash # Python 3.10 (or 3.11, but not 3.12 cause 3.12 deprecated distutils needed here) -python -m venv venv -source venv/bin/activate -pip install -r requirements.txt +pip install git+https://github.com/web-arena-x/visualwebarena.git playwright install -pip install -e . ``` -You can also run the unit tests to ensure that VisualWebArena is installed correctly: +**Option 2: Install from Source** +```bash +# Python 3.10 (or 3.11, but not 3.12 cause 3.12 deprecated distutils needed here) +git clone https://github.com/web-arena-x/visualwebarena.git +cd visualwebarena +pip install . +playwright install ``` + +**Option 3: Development Installation** +```bash +# Python 3.10 (or 3.11, but not 3.12 cause 3.12 deprecated distutils needed here) +git clone https://github.com/web-arena-x/visualwebarena.git +cd visualwebarena +pip install -e . +playwright install +``` + +You can run the unit tests to ensure that VisualWebArena is installed correctly: +```bash pytest -x ``` diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..541ae81 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,72 @@ +[build-system] +requires = ["setuptools>=64", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "visualwebarena" +version = "0.1.0" +description = "Evaluating Multimodal Agents on Realistic Visual Web Tasks" +readme = "README.md" +license = {text = "MIT"} +authors = [ + {name = "VisualWebArena Team"}, +] +requires-python = ">=3.7" +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Researchers", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Topic :: Scientific/Engineering :: Artificial Intelligence", +] +dependencies = [ + "beartype>=0.12.0", + "beautifulsoup4>=4.12.2", + "gymnasium>=0.29.1", + "numpy>=1.25.2", + "pillow>=10.0.1", + "playwright>=1.37.0", + "pydantic>=2.4.2", + "requests>=2.31.0", + "openai>=1.3.5", + "torch>=2.0.1", + "transformers>=4.34.0", + "nltk>=3.8.1", + "scikit-image>=0.22.0", + "tiktoken>=0.4.0", + "matplotlib>=3.8.0", + "text-generation>=0.6.1", + "aiolimiter>=1.1.0", + "evaluate>=0.4.0", +] + +[project.optional-dependencies] +dev = [ + "pre-commit==3.0.1", + "pytest==7.1.2", + "mypy==0.991", + "nbmake", + "pytest-asyncio", + "types-requests", +] + +[project.urls] +Homepage = "https://jykoh.com/vwa" +Repository = "https://github.com/web-arena-x/visualwebarena" +Issues = "https://github.com/web-arena-x/visualwebarena/issues" + +[tool.setuptools.packages.find] +include = ["visualwebarena*"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = "test_*.py" + +[tool.mypy] +strict = true diff --git a/run.py b/run.py index 7a48a2e..4175e59 100644 --- a/run.py +++ b/run.py @@ -19,12 +19,12 @@ import torch from PIL import Image -from agent import ( +from visualwebarena.agent import ( PromptAgent, construct_agent, ) -from agent.prompts import * -from browser_env import ( +from visualwebarena.agent.prompts import * +from visualwebarena.browser_env import ( Action, ActionTypes, ScriptBrowserEnv, @@ -32,13 +32,13 @@ Trajectory, create_stop_action, ) -from browser_env.actions import is_equivalent -from browser_env.auto_login import get_site_comb_from_filepath -from browser_env.helper_functions import ( +from visualwebarena.browser_env.actions import is_equivalent +from visualwebarena.browser_env.auto_login import get_site_comb_from_filepath +from visualwebarena.browser_env.helper_functions import ( RenderHelper, get_action_description, ) -from evaluation_harness import evaluator_router, image_utils +from visualwebarena.evaluation_harness import evaluator_router, image_utils DATASET = os.environ["DATASET"] @@ -469,7 +469,7 @@ def test( def prepare(args: argparse.Namespace) -> None: # convert prompt python files to json - from agent.prompts import to_json + from visualwebarena.agent.prompts import to_json to_json.run() diff --git a/run_demo.py b/run_demo.py index 4c0ea95..0ba1755 100644 --- a/run_demo.py +++ b/run_demo.py @@ -17,12 +17,12 @@ from beartype import beartype from PIL import Image -from agent import ( +from visualwebarena.agent import ( PromptAgent, construct_agent, ) -from agent.prompts import * -from browser_env import ( +from visualwebarena.agent.prompts import * +from visualwebarena.browser_env import ( Action, ActionTypes, ScriptBrowserEnv, @@ -30,12 +30,12 @@ Trajectory, create_stop_action, ) -from browser_env.actions import is_equivalent -from browser_env.helper_functions import ( +from visualwebarena.browser_env.actions import is_equivalent +from visualwebarena.browser_env.helper_functions import ( RenderHelper, get_action_description, ) -from evaluation_harness import image_utils +from visualwebarena.evaluation_harness import image_utils LOG_FOLDER = "log_files" Path(LOG_FOLDER).mkdir(parents=True, exist_ok=True) @@ -395,7 +395,7 @@ def test( def prepare(args: argparse.Namespace) -> None: # convert prompt python files to json - from agent.prompts import to_json + from visualwebarena.agent.prompts import to_json to_json.run() diff --git a/scripts/collect_obs.py b/scripts/collect_obs.py index 49317bc..23472f0 100644 --- a/scripts/collect_obs.py +++ b/scripts/collect_obs.py @@ -8,14 +8,14 @@ import pytest from playwright.sync_api import Page, expect import browser_env -from browser_env import ( +from visualwebarena.browser_env import ( ScriptBrowserEnv, create_id_based_action, create_key_press_action, create_playwright_action, create_scroll_action, ) -from browser_env.env_config import * +from visualwebarena.browser_env.env_config import * HEADLESS = False diff --git a/scripts/generate_test_data.py b/scripts/generate_test_data.py index 06ecbf1..d7a97c6 100644 --- a/scripts/generate_test_data.py +++ b/scripts/generate_test_data.py @@ -3,7 +3,7 @@ import json import os -from browser_env.env_config import * +from visualwebarena.browser_env.env_config import * def main() -> None: diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 0d57c06..0000000 --- a/setup.cfg +++ /dev/null @@ -1,25 +0,0 @@ -[metadata] -name = webarena - -[tool.pytest.ini_options] -testpaths = ["tests"] -python_files = "test_*.py" - -[options.extras_require] -dev = - pre-commit==3.0.1 - pytest==7.1.2 - mypy==0.991 - nbmake - pytest-asyncio - types-requests - -[options] -python_requires = >=3.7, <4 -packages = - browser_env - agent - evaluation_harness - llms -[mypy] -strict = true \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index 57c026b..0000000 --- a/setup.py +++ /dev/null @@ -1,4 +0,0 @@ -from setuptools import setup - -if __name__ == "__main__": - setup() \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index b3bd25b..5309056 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,7 +3,7 @@ import pytest import pytest_asyncio -from browser_env import AsyncScriptBrowserEnv, ScriptBrowserEnv +from visualwebarena.browser_env import AsyncScriptBrowserEnv, ScriptBrowserEnv HEADLESS = True SLOW_MO = 0 diff --git a/tests/test_browser_env/test_action_functionalities.py b/tests/test_browser_env/test_action_functionalities.py index 4efa6a9..8eca7a2 100644 --- a/tests/test_browser_env/test_action_functionalities.py +++ b/tests/test_browser_env/test_action_functionalities.py @@ -4,7 +4,7 @@ import pytest from playwright.sync_api import Page, expect -from browser_env import ( +from visualwebarena.browser_env import ( ScriptBrowserEnv, create_id_based_action, create_key_press_action, diff --git a/tests/test_browser_env/test_actions.py b/tests/test_browser_env/test_actions.py index 332a32b..d3433cd 100644 --- a/tests/test_browser_env/test_actions.py +++ b/tests/test_browser_env/test_actions.py @@ -1,6 +1,6 @@ import numpy as np -from browser_env import * +from visualwebarena.browser_env import * def test_is_equivalent() -> None: diff --git a/tests/test_browser_env/test_auth_cookie.py b/tests/test_browser_env/test_auth_cookie.py index 2456a7a..8f496da 100644 --- a/tests/test_browser_env/test_auth_cookie.py +++ b/tests/test_browser_env/test_auth_cookie.py @@ -1,7 +1,7 @@ import asyncio import json -from browser_env import * +from visualwebarena.browser_env import * auth_json = { "cookies": [ diff --git a/tests/test_browser_env/test_playwright_actions.py b/tests/test_browser_env/test_playwright_actions.py index ce55eeb..50d606a 100644 --- a/tests/test_browser_env/test_playwright_actions.py +++ b/tests/test_browser_env/test_playwright_actions.py @@ -3,7 +3,7 @@ import pytest from playwright.sync_api import Page -from browser_env import ScriptBrowserEnv, create_playwright_action +from visualwebarena.browser_env import ScriptBrowserEnv, create_playwright_action HEADLESS = True SLOW_MO = 0 diff --git a/tests/test_browser_env/test_script_browser_env.py b/tests/test_browser_env/test_script_browser_env.py index 3d14070..bd27909 100644 --- a/tests/test_browser_env/test_script_browser_env.py +++ b/tests/test_browser_env/test_script_browser_env.py @@ -9,7 +9,7 @@ from gymnasium.vector import AsyncVectorEnv from playwright.sync_api import Page -from browser_env import ( +from visualwebarena.browser_env import ( Action, AsyncScriptBrowserEnv, DetachedPage, @@ -20,8 +20,8 @@ create_playwright_action, create_scroll_action, ) -from browser_env.actions import create_id_based_action -from browser_env.env_config import ACCOUNTS, REDDIT, SHOPPING +from visualwebarena.browser_env.actions import create_id_based_action +from visualwebarena.browser_env.env_config import ACCOUNTS, REDDIT, SHOPPING @pytest.mark.skip(reason="The actions are deprecated") def test_script_browser_env(script_browser_env: ScriptBrowserEnv) -> None: diff --git a/tests/test_evaluation_harness/test_exact_evaluators.py b/tests/test_evaluation_harness/test_exact_evaluators.py index 0fbf735..f20d3d3 100644 --- a/tests/test_evaluation_harness/test_exact_evaluators.py +++ b/tests/test_evaluation_harness/test_exact_evaluators.py @@ -10,17 +10,17 @@ from PIL import Image from py import test -from agent import Agent, TeacherForcingAgent -from browser_env import ActionTypes, ScriptBrowserEnv -from browser_env.env_config import * -from evaluation_harness import ( +from visualwebarena.agent import Agent, TeacherForcingAgent +from visualwebarena.browser_env import ActionTypes, ScriptBrowserEnv +from visualwebarena.browser_env.env_config import * +from visualwebarena.evaluation_harness import ( HTMLContentExactEvaluator, PageImageEvaluator, StringEvaluator, URLExactEvaluator, image_utils, ) -from evaluation_harness.evaluators import EvaluatorComb +from visualwebarena.evaluation_harness.evaluators import EvaluatorComb IN_GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true" HEADLESS = True diff --git a/tests/test_evaluation_harness/test_helper_functions.py b/tests/test_evaluation_harness/test_helper_functions.py index a176695..b8e1274 100644 --- a/tests/test_evaluation_harness/test_helper_functions.py +++ b/tests/test_evaluation_harness/test_helper_functions.py @@ -1,9 +1,9 @@ import json import os -from browser_env import ScriptBrowserEnv -from browser_env.env_config import * -from evaluation_harness.helper_functions import ( +from visualwebarena.browser_env import ScriptBrowserEnv +from visualwebarena.browser_env.env_config import * +from visualwebarena.evaluation_harness.helper_functions import ( get_query_text, get_query_text_lowercase, reddit_get_latest_comment_content_by_username, diff --git a/visualwebarena/__init__.py b/visualwebarena/__init__.py new file mode 100644 index 0000000..238f770 --- /dev/null +++ b/visualwebarena/__init__.py @@ -0,0 +1,13 @@ +""" +VisualWebArena: Evaluating Multimodal Agents on Realistic Visual Web Tasks + +This package provides components for evaluating multimodal autonomous language agents +on web-based visual tasks. +""" + +__version__ = "0.1.0" + +from .browser_env import ScriptBrowserEnv +from .agent import PromptAgent + +__all__ = ["ScriptBrowserEnv", "PromptAgent"] diff --git a/agent/__init__.py b/visualwebarena/agent/__init__.py similarity index 100% rename from agent/__init__.py rename to visualwebarena/agent/__init__.py diff --git a/agent/agent.py b/visualwebarena/agent/agent.py similarity index 96% rename from agent/agent.py rename to visualwebarena/agent/agent.py index 5fbcba9..f408f6c 100644 --- a/agent/agent.py +++ b/visualwebarena/agent/agent.py @@ -6,24 +6,24 @@ from beartype import beartype from PIL import Image -from agent.prompts import * -from browser_env import Trajectory -from browser_env.actions import ( +from visualwebarena.agent.prompts import * +from visualwebarena.browser_env import Trajectory +from visualwebarena.browser_env.actions import ( Action, ActionParsingError, create_id_based_action, create_none_action, create_playwright_action, ) -from browser_env.utils import Observation, StateInfo -from llms import ( +from visualwebarena.browser_env.utils import Observation, StateInfo +from visualwebarena.llms import ( call_llm, generate_from_huggingface_completion, generate_from_openai_chat_completion, generate_from_openai_completion, lm_config, ) -from llms.tokenizers import Tokenizer +from visualwebarena.llms.tokenizers import Tokenizer class Agent: diff --git a/agent/prompts/__init__.py b/visualwebarena/agent/prompts/__init__.py similarity index 100% rename from agent/prompts/__init__.py rename to visualwebarena/agent/prompts/__init__.py diff --git a/agent/prompts/jsons/p_cot_id_actree_2s.json b/visualwebarena/agent/prompts/jsons/p_cot_id_actree_2s.json similarity index 100% rename from agent/prompts/jsons/p_cot_id_actree_2s.json rename to visualwebarena/agent/prompts/jsons/p_cot_id_actree_2s.json diff --git a/agent/prompts/jsons/p_cot_id_actree_2s_no_na.json b/visualwebarena/agent/prompts/jsons/p_cot_id_actree_2s_no_na.json similarity index 100% rename from agent/prompts/jsons/p_cot_id_actree_2s_no_na.json rename to visualwebarena/agent/prompts/jsons/p_cot_id_actree_2s_no_na.json diff --git a/agent/prompts/jsons/p_cot_id_actree_3s.json b/visualwebarena/agent/prompts/jsons/p_cot_id_actree_3s.json similarity index 100% rename from agent/prompts/jsons/p_cot_id_actree_3s.json rename to visualwebarena/agent/prompts/jsons/p_cot_id_actree_3s.json diff --git a/agent/prompts/jsons/p_multimodal_cot_id_actree_3s.json b/visualwebarena/agent/prompts/jsons/p_multimodal_cot_id_actree_3s.json similarity index 100% rename from agent/prompts/jsons/p_multimodal_cot_id_actree_3s.json rename to visualwebarena/agent/prompts/jsons/p_multimodal_cot_id_actree_3s.json diff --git a/agent/prompts/jsons/p_som_cot_id_actree_3s.json b/visualwebarena/agent/prompts/jsons/p_som_cot_id_actree_3s.json similarity index 100% rename from agent/prompts/jsons/p_som_cot_id_actree_3s.json rename to visualwebarena/agent/prompts/jsons/p_som_cot_id_actree_3s.json diff --git a/agent/prompts/multimodal_examples/multimodal_example1.png b/visualwebarena/agent/prompts/multimodal_examples/multimodal_example1.png similarity index 100% rename from agent/prompts/multimodal_examples/multimodal_example1.png rename to visualwebarena/agent/prompts/multimodal_examples/multimodal_example1.png diff --git a/agent/prompts/multimodal_examples/multimodal_example2.png b/visualwebarena/agent/prompts/multimodal_examples/multimodal_example2.png similarity index 100% rename from agent/prompts/multimodal_examples/multimodal_example2.png rename to visualwebarena/agent/prompts/multimodal_examples/multimodal_example2.png diff --git a/agent/prompts/multimodal_examples/multimodal_example3.png b/visualwebarena/agent/prompts/multimodal_examples/multimodal_example3.png similarity index 100% rename from agent/prompts/multimodal_examples/multimodal_example3.png rename to visualwebarena/agent/prompts/multimodal_examples/multimodal_example3.png diff --git a/agent/prompts/prompt_constructor.py b/visualwebarena/agent/prompts/prompt_constructor.py similarity index 97% rename from agent/prompts/prompt_constructor.py rename to visualwebarena/agent/prompts/prompt_constructor.py index 5c50c9d..13bf310 100644 --- a/agent/prompts/prompt_constructor.py +++ b/visualwebarena/agent/prompts/prompt_constructor.py @@ -4,12 +4,12 @@ from typing import Any, TypedDict from PIL import Image -from browser_env import Action, ActionParsingError, Trajectory -from browser_env.env_config import URL_MAPPINGS -from browser_env.utils import StateInfo, pil_to_b64, pil_to_vertex -from llms import lm_config -from llms.tokenizers import Tokenizer -from llms.utils import APIInput +from visualwebarena.browser_env import Action, ActionParsingError, Trajectory +from visualwebarena.browser_env.env_config import URL_MAPPINGS +from visualwebarena.browser_env.utils import StateInfo, pil_to_b64, pil_to_vertex +from visualwebarena.llms import lm_config +from visualwebarena.llms.tokenizers import Tokenizer +from visualwebarena.llms.utils import APIInput class Instruction(TypedDict): diff --git a/agent/prompts/raw/p_cot_id_actree_2s.py b/visualwebarena/agent/prompts/raw/p_cot_id_actree_2s.py similarity index 100% rename from agent/prompts/raw/p_cot_id_actree_2s.py rename to visualwebarena/agent/prompts/raw/p_cot_id_actree_2s.py diff --git a/agent/prompts/raw/p_cot_id_actree_2s_no_na.py b/visualwebarena/agent/prompts/raw/p_cot_id_actree_2s_no_na.py similarity index 100% rename from agent/prompts/raw/p_cot_id_actree_2s_no_na.py rename to visualwebarena/agent/prompts/raw/p_cot_id_actree_2s_no_na.py diff --git a/agent/prompts/raw/p_cot_id_actree_3s.py b/visualwebarena/agent/prompts/raw/p_cot_id_actree_3s.py similarity index 100% rename from agent/prompts/raw/p_cot_id_actree_3s.py rename to visualwebarena/agent/prompts/raw/p_cot_id_actree_3s.py diff --git a/agent/prompts/raw/p_multimodal_cot_id_actree_3s.py b/visualwebarena/agent/prompts/raw/p_multimodal_cot_id_actree_3s.py similarity index 100% rename from agent/prompts/raw/p_multimodal_cot_id_actree_3s.py rename to visualwebarena/agent/prompts/raw/p_multimodal_cot_id_actree_3s.py diff --git a/agent/prompts/raw/p_som_cot_id_actree_3s.py b/visualwebarena/agent/prompts/raw/p_som_cot_id_actree_3s.py similarity index 100% rename from agent/prompts/raw/p_som_cot_id_actree_3s.py rename to visualwebarena/agent/prompts/raw/p_som_cot_id_actree_3s.py diff --git a/agent/prompts/som_examples/som_example1.png b/visualwebarena/agent/prompts/som_examples/som_example1.png similarity index 100% rename from agent/prompts/som_examples/som_example1.png rename to visualwebarena/agent/prompts/som_examples/som_example1.png diff --git a/agent/prompts/som_examples/som_example2.png b/visualwebarena/agent/prompts/som_examples/som_example2.png similarity index 100% rename from agent/prompts/som_examples/som_example2.png rename to visualwebarena/agent/prompts/som_examples/som_example2.png diff --git a/agent/prompts/som_examples/som_example3.png b/visualwebarena/agent/prompts/som_examples/som_example3.png similarity index 100% rename from agent/prompts/som_examples/som_example3.png rename to visualwebarena/agent/prompts/som_examples/som_example3.png diff --git a/agent/prompts/to_json.py b/visualwebarena/agent/prompts/to_json.py similarity index 100% rename from agent/prompts/to_json.py rename to visualwebarena/agent/prompts/to_json.py diff --git a/browser_env/__init__.py b/visualwebarena/browser_env/__init__.py similarity index 100% rename from browser_env/__init__.py rename to visualwebarena/browser_env/__init__.py diff --git a/browser_env/actions.py b/visualwebarena/browser_env/actions.py similarity index 99% rename from browser_env/actions.py rename to visualwebarena/browser_env/actions.py index 13aaf1b..92878ab 100644 --- a/browser_env/actions.py +++ b/visualwebarena/browser_env/actions.py @@ -21,7 +21,7 @@ from playwright.async_api import Page as APage from playwright.sync_api import BrowserContext, Locator, Page -from browser_env.constants import ( +from visualwebarena.browser_env.constants import ( ASCII_CHARSET, FREQ_UNICODE_CHARSET, MAX_ANSWER_LENGTH, @@ -40,7 +40,7 @@ URL_MAX_LENGTH, RolesType, ) -from browser_env.processors import ObservationProcessor +from visualwebarena.browser_env.processors import ObservationProcessor class ParsedPlaywrightCode(TypedDict): @@ -49,7 +49,7 @@ class ParsedPlaywrightCode(TypedDict): keywords: dict[str, Any] -from browser_env.processors import ( +from visualwebarena.browser_env.processors import ( ObservationProcessor, TextObervationProcessor, ) diff --git a/browser_env/async_envs.py b/visualwebarena/browser_env/async_envs.py similarity index 100% rename from browser_env/async_envs.py rename to visualwebarena/browser_env/async_envs.py diff --git a/browser_env/auto_login.py b/visualwebarena/browser_env/auto_login.py similarity index 97% rename from browser_env/auto_login.py rename to visualwebarena/browser_env/auto_login.py index 67a22c9..0639e55 100644 --- a/browser_env/auto_login.py +++ b/visualwebarena/browser_env/auto_login.py @@ -8,11 +8,11 @@ from pathlib import Path from playwright.sync_api import sync_playwright -from browser_env.env_config import ACCOUNTS +from visualwebarena.browser_env.env_config import ACCOUNTS DATASET = os.environ["DATASET"] if DATASET == "webarena": - from browser_env.env_config import ( + from visualwebarena.browser_env.env_config import ( GITLAB, REDDIT, SHOPPING, @@ -29,7 +29,7 @@ KEYWORDS = ["", "", "Dashboard", "Delete"] elif DATASET == "visualwebarena": - from browser_env.env_config import ( + from visualwebarena.browser_env.env_config import ( CLASSIFIEDS, REDDIT, SHOPPING, diff --git a/browser_env/constants.py b/visualwebarena/browser_env/constants.py similarity index 100% rename from browser_env/constants.py rename to visualwebarena/browser_env/constants.py diff --git a/browser_env/env_config.py b/visualwebarena/browser_env/env_config.py similarity index 100% rename from browser_env/env_config.py rename to visualwebarena/browser_env/env_config.py diff --git a/browser_env/envs.py b/visualwebarena/browser_env/envs.py similarity index 98% rename from browser_env/envs.py rename to visualwebarena/browser_env/envs.py index ef326bb..c8f93df 100644 --- a/browser_env/envs.py +++ b/visualwebarena/browser_env/envs.py @@ -23,9 +23,9 @@ sync_playwright, ) -DATASET = os.environ["DATASET"] +DATASET = os.environ.get("DATASET", "visualwebarena") if DATASET == "visualwebarena": - from browser_env.env_config import ( + from visualwebarena.browser_env.env_config import ( CLASSIFIEDS, CLASSIFIEDS_RESET_TOKEN, ) diff --git a/browser_env/helper_functions.py b/visualwebarena/browser_env/helper_functions.py similarity index 99% rename from browser_env/helper_functions.py rename to visualwebarena/browser_env/helper_functions.py index 54dce12..123212b 100644 --- a/browser_env/helper_functions.py +++ b/visualwebarena/browser_env/helper_functions.py @@ -7,8 +7,8 @@ from PIL import Image -from agent.prompts import * -from browser_env import ( +from visualwebarena.agent.prompts import * +from visualwebarena.browser_env import ( Action, ActionTypes, ObservationMetadata, diff --git a/browser_env/javascript/frame_mark_elements.js b/visualwebarena/browser_env/javascript/frame_mark_elements.js similarity index 100% rename from browser_env/javascript/frame_mark_elements.js rename to visualwebarena/browser_env/javascript/frame_mark_elements.js diff --git a/browser_env/javascript/frame_unmark_elements.js b/visualwebarena/browser_env/javascript/frame_unmark_elements.js similarity index 100% rename from browser_env/javascript/frame_unmark_elements.js rename to visualwebarena/browser_env/javascript/frame_unmark_elements.js diff --git a/browser_env/processors.py b/visualwebarena/browser_env/processors.py similarity index 99% rename from browser_env/processors.py rename to visualwebarena/browser_env/processors.py index f9eb8bc..875a98c 100644 --- a/browser_env/processors.py +++ b/visualwebarena/browser_env/processors.py @@ -17,7 +17,7 @@ from PIL import Image, ImageDraw, ImageFont from playwright.sync_api import CDPSession, Page, ViewportSize -from browser_env.constants import ( +from visualwebarena.browser_env.constants import ( ASCII_CHARSET, FREQ_UNICODE_CHARSET, IGNORED_ACTREE_PROPERTIES, diff --git a/browser_env/py.typed b/visualwebarena/browser_env/py.typed similarity index 100% rename from browser_env/py.typed rename to visualwebarena/browser_env/py.typed diff --git a/browser_env/trajectory.py b/visualwebarena/browser_env/trajectory.py similarity index 100% rename from browser_env/trajectory.py rename to visualwebarena/browser_env/trajectory.py diff --git a/browser_env/utils.py b/visualwebarena/browser_env/utils.py similarity index 100% rename from browser_env/utils.py rename to visualwebarena/browser_env/utils.py diff --git a/evaluation_harness/__init__.py b/visualwebarena/evaluation_harness/__init__.py similarity index 100% rename from evaluation_harness/__init__.py rename to visualwebarena/evaluation_harness/__init__.py diff --git a/evaluation_harness/evaluators.py b/visualwebarena/evaluation_harness/evaluators.py similarity index 99% rename from evaluation_harness/evaluators.py rename to visualwebarena/evaluation_harness/evaluators.py index 03224fd..f67e19a 100644 --- a/evaluation_harness/evaluators.py +++ b/visualwebarena/evaluation_harness/evaluators.py @@ -17,10 +17,10 @@ from PIL import Image from playwright.sync_api import CDPSession, Page -from browser_env.actions import Action -from browser_env.utils import StateInfo -from evaluation_harness import image_utils -from evaluation_harness.helper_functions import ( +from visualwebarena.browser_env.actions import Action +from visualwebarena.browser_env.utils import StateInfo +from visualwebarena.evaluation_harness import image_utils +from visualwebarena.evaluation_harness.helper_functions import ( PseudoPage, get_query_text, get_query_text_lowercase, diff --git a/evaluation_harness/helper_functions.py b/visualwebarena/evaluation_harness/helper_functions.py similarity index 99% rename from evaluation_harness/helper_functions.py rename to visualwebarena/evaluation_harness/helper_functions.py index c934c16..d100bbd 100644 --- a/evaluation_harness/helper_functions.py +++ b/visualwebarena/evaluation_harness/helper_functions.py @@ -9,13 +9,13 @@ from beartype.typing import Dict, List from playwright.sync_api import CDPSession, Page -from browser_env.env_config import ( +from visualwebarena.browser_env.env_config import ( ACCOUNTS, REDDIT, SHOPPING, WIKIPEDIA, ) -from llms.providers.openai_utils import ( +from visualwebarena.llms.providers.openai_utils import ( generate_from_openai_chat_completion, ) diff --git a/evaluation_harness/image_utils.py b/visualwebarena/evaluation_harness/image_utils.py similarity index 100% rename from evaluation_harness/image_utils.py rename to visualwebarena/evaluation_harness/image_utils.py diff --git a/llms/__init__.py b/visualwebarena/llms/__init__.py similarity index 100% rename from llms/__init__.py rename to visualwebarena/llms/__init__.py diff --git a/llms/lm_config.py b/visualwebarena/llms/lm_config.py similarity index 100% rename from llms/lm_config.py rename to visualwebarena/llms/lm_config.py diff --git a/llms/providers/gemini_utils.py b/visualwebarena/llms/providers/gemini_utils.py similarity index 100% rename from llms/providers/gemini_utils.py rename to visualwebarena/llms/providers/gemini_utils.py diff --git a/llms/providers/hf_utils.py b/visualwebarena/llms/providers/hf_utils.py similarity index 100% rename from llms/providers/hf_utils.py rename to visualwebarena/llms/providers/hf_utils.py diff --git a/llms/providers/openai_utils.py b/visualwebarena/llms/providers/openai_utils.py similarity index 100% rename from llms/providers/openai_utils.py rename to visualwebarena/llms/providers/openai_utils.py diff --git a/llms/tokenizers.py b/visualwebarena/llms/tokenizers.py similarity index 100% rename from llms/tokenizers.py rename to visualwebarena/llms/tokenizers.py diff --git a/llms/utils.py b/visualwebarena/llms/utils.py similarity index 96% rename from llms/utils.py rename to visualwebarena/llms/utils.py index 0b94c52..be2df79 100644 --- a/llms/utils.py +++ b/visualwebarena/llms/utils.py @@ -3,11 +3,11 @@ try: from vertexai.preview.generative_models import Image - from llms import generate_from_gemini_completion + from visualwebarena.llms import generate_from_gemini_completion except: print('Google Cloud not set up, skipping import of vertexai.preview.generative_models.Image and llms.generate_from_gemini_completion') -from llms import ( +from visualwebarena.llms import ( generate_from_huggingface_completion, generate_from_openai_chat_completion, generate_from_openai_completion,