From f1cbfcc83a71a522a3c592d0673a2a90afd8e4ea Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Tue, 18 Mar 2025 12:27:20 -0600 Subject: [PATCH 01/26] Add more profiling --- .../gym_environments/base_overcooked_env.py | 23 +++++++++++++++++++ scripts/bash_scripts/profile.sh | 8 ++++--- scripts/profile_analyze.py | 4 ++++ 3 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 scripts/profile_analyze.py diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py index 5108c8b..6c35882 100644 --- a/oai_agents/gym_environments/base_overcooked_env.py +++ b/oai_agents/gym_environments/base_overcooked_env.py @@ -234,6 +234,13 @@ def get_teammate_from_idx(self, idx): return self.teammates[id] def step(self, action): + import cProfile + import pstats + import time + profiler = cProfile.Profile() + profiler.enable() + + if len(self.teammates) == 0: raise ValueError('set_teammates must be set called before starting game.') @@ -278,12 +285,24 @@ def step(self, action): ratio = self.final_sparse_r_ratio reward = self.learner.calculate_reward(p_idx=self.p_idx, env_info=info, ratio=ratio, num_players=self.mdp.num_players) self.step_count += 1 + + + profiler.disable() + c_time = time.strftime("%Y%m%d-%H%M%S") + profiler.dump_stats(f'data/profile/base_overcooked_env_step_{c_time}.prof') return self.get_obs(self.p_idx, done=done), reward, done, info def set_reset_p_idx(self, p_idx): self.reset_p_idx = p_idx def reset(self, p_idx=None): + import cProfile + import pstats + import time + profiler = cProfile.Profile() + profiler.enable() + + if p_idx is not None: self.p_idx = p_idx elif self.reset_p_idx is not None: @@ -313,6 +332,10 @@ def reset(self, p_idx=None): # Reset subtask counts self.completed_tasks = [np.zeros(Subtasks.NUM_SUBTASKS), np.zeros(Subtasks.NUM_SUBTASKS)] + + profiler.disable() + c_time = time.strftime("%Y%m%d-%H%M%S") + profiler.dump_stats(f'data/profile/base_overcooked_env_reset_{c_time}.prof') return self.get_obs(self.p_idx, on_reset=True) diff --git a/scripts/bash_scripts/profile.sh b/scripts/bash_scripts/profile.sh index 116d85c..91e98d5 100644 --- a/scripts/bash_scripts/profile.sh +++ b/scripts/bash_scripts/profile.sh @@ -6,20 +6,22 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="counter_circuit" TOTAL_EGO_AGENTS=1 -EXP_DIR="Test/Profile" -POP_FORCE_TRAINING=false +POP_FORCE_TRAINING=true ADVERSARY_FORCE_TRAINING=false PRIMARY_FORCE_TRAINING=false source scripts/bash_scripts/env_config.sh # Overwrite the default values from env_config.sh here if needed: +EXP_DIR="Test/Profile" N_ENVS=2 WANDB_MODE="disabled" EPOCH_TIMESTEPS=100000 N_X_SP_TOTAL_TRAINING_TIMESTEPS=200000 -python -m cProfile -o profile_results.prof scripts/train_agents.py \ +export CURRENT_TIME=$(date +"%Y-%m-%d_%H-%M-%S") + +python -m cProfile -o data/profile/profile_results_all_${CURRENT_TIME}.prof scripts/train_agents.py \ --layout-names ${LAYOUT_NAMES} \ --algo-name ${ALGO} \ --exp-dir ${EXP_DIR} \ diff --git a/scripts/profile_analyze.py b/scripts/profile_analyze.py new file mode 100644 index 0000000..0a33862 --- /dev/null +++ b/scripts/profile_analyze.py @@ -0,0 +1,4 @@ +import pstats + +p = pstats.Stats("data/profile/profile_results.prof") +p.strip_dirs().sort_stats("cumulative").print_stats(20) # Show top 20 functions From 5d3016a07ec35e9f4d83f2deed78e251c06988fa Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Tue, 18 Mar 2025 12:29:15 -0600 Subject: [PATCH 02/26] mask cuda --- scripts/bash_scripts/profile.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/bash_scripts/profile.sh b/scripts/bash_scripts/profile.sh index 91e98d5..3c73f7b 100644 --- a/scripts/bash_scripts/profile.sh +++ b/scripts/bash_scripts/profile.sh @@ -21,7 +21,7 @@ N_X_SP_TOTAL_TRAINING_TIMESTEPS=200000 export CURRENT_TIME=$(date +"%Y-%m-%d_%H-%M-%S") -python -m cProfile -o data/profile/profile_results_all_${CURRENT_TIME}.prof scripts/train_agents.py \ +CUDA_VISIBLE_DEVICES=1 python -m cProfile -o data/profile/profile_results_all_${CURRENT_TIME}.prof scripts/train_agents.py \ --layout-names ${LAYOUT_NAMES} \ --algo-name ${ALGO} \ --exp-dir ${EXP_DIR} \ From 6b6297f550e17e863541c4165ff8655542d23a86 Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Tue, 18 Mar 2025 12:36:34 -0600 Subject: [PATCH 03/26] add args for profile_analyze --- scripts/profile_analyze.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/profile_analyze.py b/scripts/profile_analyze.py index 0a33862..d7136f9 100644 --- a/scripts/profile_analyze.py +++ b/scripts/profile_analyze.py @@ -1,4 +1,9 @@ import pstats +import argparse -p = pstats.Stats("data/profile/profile_results.prof") -p.strip_dirs().sort_stats("cumulative").print_stats(20) # Show top 20 functions +parser = argparse.ArgumentParser() +parser.add_argument("name", help="name of the profile file") +args = parser.parse_args() +name = args.name +p = pstats.Stats(f"data/profile/{name}") +p.strip_dirs().sort_stats("cumulative").print_stats(20) # Show top 20 functions \ No newline at end of file From 864adf5ff1fae542f3228503844108ed54c0d218 Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Tue, 18 Mar 2025 12:45:10 -0600 Subject: [PATCH 04/26] remove profiling from step and reset --- .../gym_environments/base_overcooked_env.py | 23 ------------------- 1 file changed, 23 deletions(-) diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py index 6c35882..5108c8b 100644 --- a/oai_agents/gym_environments/base_overcooked_env.py +++ b/oai_agents/gym_environments/base_overcooked_env.py @@ -234,13 +234,6 @@ def get_teammate_from_idx(self, idx): return self.teammates[id] def step(self, action): - import cProfile - import pstats - import time - profiler = cProfile.Profile() - profiler.enable() - - if len(self.teammates) == 0: raise ValueError('set_teammates must be set called before starting game.') @@ -285,24 +278,12 @@ def step(self, action): ratio = self.final_sparse_r_ratio reward = self.learner.calculate_reward(p_idx=self.p_idx, env_info=info, ratio=ratio, num_players=self.mdp.num_players) self.step_count += 1 - - - profiler.disable() - c_time = time.strftime("%Y%m%d-%H%M%S") - profiler.dump_stats(f'data/profile/base_overcooked_env_step_{c_time}.prof') return self.get_obs(self.p_idx, done=done), reward, done, info def set_reset_p_idx(self, p_idx): self.reset_p_idx = p_idx def reset(self, p_idx=None): - import cProfile - import pstats - import time - profiler = cProfile.Profile() - profiler.enable() - - if p_idx is not None: self.p_idx = p_idx elif self.reset_p_idx is not None: @@ -332,10 +313,6 @@ def reset(self, p_idx=None): # Reset subtask counts self.completed_tasks = [np.zeros(Subtasks.NUM_SUBTASKS), np.zeros(Subtasks.NUM_SUBTASKS)] - - profiler.disable() - c_time = time.strftime("%Y%m%d-%H%M%S") - profiler.dump_stats(f'data/profile/base_overcooked_env_reset_{c_time}.prof') return self.get_obs(self.p_idx, on_reset=True) From df20f2c9e30a1fa1141548cef8101f3b411ddb84 Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Tue, 18 Mar 2025 12:48:23 -0600 Subject: [PATCH 05/26] add profiling for the learn funciton --- oai_agents/agents/base_agent.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index 32d7cb8..71962d9 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -225,7 +225,17 @@ def get_distribution(self, obs: th.Tensor): return dist def learn(self, epoch_timesteps): + import cProfile + import pstats + import time + profiler = cProfile.Profile() + profiler.enable() + self.agent.learn(total_timesteps=epoch_timesteps, reset_num_timesteps=False) + + profiler.disable() + c_time = time.strftime("%Y%m%d-%H%M%S") + profiler.dump_stats(f'data/profile/learn_{c_time}.prof') self.num_timesteps = self.agent.num_timesteps def save(self, path: Path) -> None: From a459a9bd338a3a4dcc81f8b8a50f6d8a736bfbbf Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Tue, 18 Mar 2025 13:17:58 -0600 Subject: [PATCH 06/26] run step seperately --- oai_agents/common/overcooked_gui.py | 1 - .../gym_environments/base_overcooked_env.py | 25 ++++++++++++------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/oai_agents/common/overcooked_gui.py b/oai_agents/common/overcooked_gui.py index 43eda13..65a4a3b 100644 --- a/oai_agents/common/overcooked_gui.py +++ b/oai_agents/common/overcooked_gui.py @@ -71,7 +71,6 @@ def __init__(self, args, layout_name=None, agent=None, teammates=None, p_idx=0, else: self.env = OvercookedGymEnv(layout_name=self.layout_name, args=args, ret_completed_subtasks=False, is_eval_env=True, horizon=horizon, learner_type='originaler', - ) self.agent = agent self.p_idx = p_idx diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py index 5108c8b..23dac29 100644 --- a/oai_agents/gym_environments/base_overcooked_env.py +++ b/oai_agents/gym_environments/base_overcooked_env.py @@ -1,7 +1,7 @@ from oai_agents.common.state_encodings import ENCODING_SCHEMES from oai_agents.common.subtasks import Subtasks, calculate_completed_subtask, get_doable_subtasks from oai_agents.common.learner import LearnerType, Learner -from oai_agents.agents.agent_utils import CustomAgent +from oai_agents.agents.agent_utils import CustomAgent, DummyAgent from overcooked_ai_py.mdp.overcooked_mdp import OvercookedGridworld, Action, Direction from overcooked_ai_py.mdp.overcooked_env import OvercookedEnv @@ -335,15 +335,22 @@ def close(self): if __name__ == '__main__': from oai_agents.common.arguments import get_arguments - args = get_arguments() - env = OvercookedGymEnv(p1=DummyAgent(), - args=args) # make('overcooked_ai.agents:OvercookedGymEnv-v0', layout='asymmetric_advantages', encoding_fn=encode_state, args=args) - print(check_env(env)) - env.setup_visualization() - env.reset() - env.render() + + args.num_players = 2 + + env = OvercookedGymEnv(layout_name=args.layout_names[0], args=args, ret_completed_subtasks=False, + is_eval_env=True, horizon=400, learner_type='originaler') + + p_idx = 0 + teammates = [DummyAgent()] + + env.set_teammates(teammates) + env.reset(p_idx=p_idx) done = False + while not done: - obs, reward, done, info = env.step(Action.ACTION_TO_INDEX[np.random.choice(Action.ALL_ACTIONS)]) + action = np.random.randint(0, Action.NUM_ACTIONS) + action_idx = Action.ACTION_TO_INDEX[Action.STAY] + obs, reward, done, info = env.step(action_idx) env.render() From 3e6641b7721099cead17d1a38f505ecc6a938130 Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Tue, 18 Mar 2025 14:00:06 -0600 Subject: [PATCH 07/26] Add default for overcooked_sim --- oai_agents/agents/hrl.py | 2 +- oai_agents/agents/rl.py | 2 +- oai_agents/common/arguments.py | 2 +- oai_agents/common/overcooked_simulation.py | 22 ++++++++++++++++++++++ scripts/run_overcooked_game.py | 4 ++-- 5 files changed, 27 insertions(+), 5 deletions(-) diff --git a/oai_agents/agents/hrl.py b/oai_agents/agents/hrl.py index a9779c4..479f64a 100644 --- a/oai_agents/agents/hrl.py +++ b/oai_agents/agents/hrl.py @@ -5,7 +5,7 @@ from oai_agents.common.arguments import get_arguments, get_args_to_save, set_args_from_load from oai_agents.common.subtasks import Subtasks # from oai_agents.gym_environments.worker_env import OvercookedSubtaskGymEnv -from oai_agents.gym_environments.manager_env import OvercookedManagerGymEnv +# from oai_agents.gym_environments.manager_env import OvercookedManagerGymEnv from overcooked_ai_py.mdp.overcooked_mdp import Action, OvercookedGridworld diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index 2d2fb6a..ba62c19 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -4,7 +4,6 @@ from oai_agents.common.state_encodings import ENCODING_SCHEMES from oai_agents.common.tags import AgentPerformance, TeamType, TeammatesCollection, KeyCheckpoints from oai_agents.agents.agent_utils import CustomAgent -from oai_agents.gym_environments.base_overcooked_env import OvercookedGymEnv from oai_agents.common.checked_model_name_handler import CheckedModelNameHandler import numpy as np @@ -192,6 +191,7 @@ def print_tc_helper(self, teammates_collection, message=None): def get_envs(self, _env, _eval_envs, deterministic, learner_type, start_timestep: int = 0): + from oai_agents.gym_environments.base_overcooked_env import OvercookedGymEnv if _env is None: env_kwargs = {'shape_rewards': True, 'full_init': False, 'stack_frames': self.use_frame_stack, 'deterministic': deterministic,'args': self.args, 'learner_type': learner_type, 'start_timestep': start_timestep} diff --git a/oai_agents/common/arguments.py b/oai_agents/common/arguments.py index 1e03433..dda7517 100644 --- a/oai_agents/common/arguments.py +++ b/oai_agents/common/arguments.py @@ -11,7 +11,7 @@ def get_arguments(additional_args=[]): :return: """ parser = argparse.ArgumentParser(description='PyTorch Soft Actor-Critic Args') - parser.add_argument('--layout-names', help='Overcooked maps to use') + parser.add_argument('--layout-names', help='Overcooked maps to use', default='default') parser.add_argument('--horizon', type=int, default=400, help='Max timesteps in a rollout') parser.add_argument('--num_stack', type=int, default=3, help='Number of frame stacks to use in training if frame stacks are being used') parser.add_argument('--encoding-fn', type=str, default='OAI_egocentric', diff --git a/oai_agents/common/overcooked_simulation.py b/oai_agents/common/overcooked_simulation.py index 0af6cea..20ba9b0 100644 --- a/oai_agents/common/overcooked_simulation.py +++ b/oai_agents/common/overcooked_simulation.py @@ -85,3 +85,25 @@ def run_simulation(self, how_many_times): trajectory = self._run_simulation() trajectories.append(trajectory) return trajectories + + + +if __name__ == '__main__': + from oai_agents.common.arguments import get_arguments + from oai_agents.agents.agent_utils import DummyAgent, CustomAgent, load_agent + from pathlib import Path + + args = get_arguments() + args.num_players = 2 + args.layout_names = ['counter_circuit'] + args.n_envs = 4 + p_idx = 0 + + path = 'agent_models/Complex/2/SP_hd256_seed2602/last' + agent = load_agent(Path(path), args) + + # teammates = [agent] + teammates = [CustomAgent(args=args, name='tm', trajectories={args.layout_names[0]: [(1, 1), (1, 2)]})] + + simulation = OvercookedSimulation(args=args, agent=agent, teammates=teammates, layout_name=args.layout_names[0], p_idx=p_idx, horizon=400) + trajectories = simulation.run_simulation(how_many_times=4) \ No newline at end of file diff --git a/scripts/run_overcooked_game.py b/scripts/run_overcooked_game.py index 3f03a43..4ed0dda 100644 --- a/scripts/run_overcooked_game.py +++ b/scripts/run_overcooked_game.py @@ -32,8 +32,8 @@ def get_teammate_from_pop_file(tm_name, tm_score, pop_path, layout_name): # teammates = [load_agent(Path(tm_path), args) for tm_path in teammates_path[:args.num_players - 1]] # trajectories = tile locations. Top left of the layout is (0, 0), bottom right is (M, N) - # teammates = [CustomAgent(args=args, name='human', trajectories={args.layout: [(8, 1), (8, 2), (7, 2), (6, 2)]})] - teammates = [DummyAgent(action='random') for _ in range(args.num_players - 1)] + teammates = [CustomAgent(args=args, name='human', trajectories={args.layout: [(1, 1), (1, 2)]})] + # teammates = [DummyAgent(action='random') for _ in range(args.num_players - 1)] # player_path = 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best' # player = load_agent(Path(player_path), args) From f28cf44bc3cd017da6c35b3cb845b37a0c0d2107 Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Tue, 18 Mar 2025 15:26:13 -0600 Subject: [PATCH 08/26] small optimizations --- oai_agents/agents/base_agent.py | 1 + .../gym_environments/base_overcooked_env.py | 19 ++++++++++++++----- scripts/bash_scripts/profile.sh | 4 ++-- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index 71962d9..616452d 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -320,6 +320,7 @@ def predict(self, obs, state=None, episode_start=None, deterministic=False): # Updated to include action masking self.policy.set_training_mode(False) obs, vectorized_env = self.policy.obs_to_tensor(obs) + with th.no_grad(): if 'subtask_mask' in obs and np.prod(obs['subtask_mask'].shape) == np.prod(self.policy.action_space.n): dist = self.policy.get_distribution(obs, action_masks=obs['subtask_mask']) diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py index 23dac29..43f45e3 100644 --- a/oai_agents/gym_environments/base_overcooked_env.py +++ b/oai_agents/gym_environments/base_overcooked_env.py @@ -195,6 +195,7 @@ def action_masks(self, p_idx): return get_doable_subtasks(self.state, self.prev_subtask[p_idx], self.layout_name, self.terrain, p_idx, self.valid_counters, USEABLE_COUNTERS.get(self.layout_name, 5)).astype(bool) + def get_obs(self, c_idx, done=False, enc_fn=None, on_reset=False, goal_objects=None): enc_fn = enc_fn or self.encoding_fn obs = enc_fn(self.env.mdp, self.state, self.grid_shape, self.args.horizon, p_idx=c_idx, @@ -237,19 +238,25 @@ def step(self, action): if len(self.teammates) == 0: raise ValueError('set_teammates must be set called before starting game.') - joint_action = [None for _ in range(self.mdp.num_players)] + # joint_action = [None for _ in range(self.mdp.num_players)] + # joint_action[self.p_idx] = action + + joint_action = np.full(self.mdp.num_players, None, dtype=object) joint_action[self.p_idx] = action + with th.no_grad(): for t_idx in self.t_idxes: teammate = self.get_teammate_from_idx(t_idx) tm_obs = self.get_obs(c_idx=t_idx, enc_fn=teammate.encoding_fn) - if type(teammate) == CustomAgent: + # if type(teammate) == CustomAgent: + if isinstance(teammate, CustomAgent): info = {'layout_name': self.layout_name, 'u_env_idx': self.unique_env_idx} joint_action[t_idx] = teammate.predict(obs=tm_obs, deterministic=self.deterministic, info=info)[0] else: joint_action[t_idx] = teammate.predict(obs=tm_obs, deterministic=self.deterministic)[0] - joint_action = [Action.INDEX_TO_ACTION[(a.squeeze() if type(a) != int else a)] for a in joint_action] + # joint_action = [Action.INDEX_TO_ACTION[(a.squeeze() if type(a) != int else a)] for a in joint_action] + joint_action = [Action.INDEX_TO_ACTION[a.squeeze() if isinstance(a, np.ndarray) else a] for a in joint_action] self.joint_action = joint_action # If the state didn't change from the previous timestep and the agent is choosing the same action @@ -260,7 +267,8 @@ def step(self, action): joint_action = deepcopy(self.joint_action) for t_idx in self.t_idxes: tm = self.get_teammate_from_idx(t_idx) - if type(tm) != CustomAgent: + # if type(tm) != CustomAgent: + if not isinstance(tm, CustomAgent): joint_action[t_idx] = Direction.INDEX_TO_DIRECTION[self.step_count % 4] self.prev_state, self.prev_actions = deepcopy(self.state), deepcopy(joint_action) @@ -268,7 +276,8 @@ def step(self, action): self.state, reward, done, info = self.env.step(joint_action) for t_idx in self.t_idxes: # Should be right after env.step tm = self.get_teammate_from_idx(t_idx) - if type(tm) == CustomAgent: + # if type(tm) == CustomAgent: + if isinstance(tm, CustomAgent): tm.update_current_position(layout_name=self.layout_name, new_position=self.env.state.players[t_idx].position, u_env_idx=self.unique_env_idx) if self.shape_rewards and not self.is_eval_env: diff --git a/scripts/bash_scripts/profile.sh b/scripts/bash_scripts/profile.sh index 3c73f7b..00b09d4 100644 --- a/scripts/bash_scripts/profile.sh +++ b/scripts/bash_scripts/profile.sh @@ -16,8 +16,8 @@ source scripts/bash_scripts/env_config.sh EXP_DIR="Test/Profile" N_ENVS=2 WANDB_MODE="disabled" -EPOCH_TIMESTEPS=100000 -N_X_SP_TOTAL_TRAINING_TIMESTEPS=200000 +EPOCH_TIMESTEPS=10000 +N_X_SP_TOTAL_TRAINING_TIMESTEPS=10000 export CURRENT_TIME=$(date +"%Y-%m-%d_%H-%M-%S") From 3cd0a62a553ba02bf5a84cb417caae452613d32d Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Tue, 18 Mar 2025 16:16:17 -0600 Subject: [PATCH 09/26] Only use is instance --- oai_agents/gym_environments/base_overcooked_env.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py index 43f45e3..d8a970b 100644 --- a/oai_agents/gym_environments/base_overcooked_env.py +++ b/oai_agents/gym_environments/base_overcooked_env.py @@ -238,10 +238,7 @@ def step(self, action): if len(self.teammates) == 0: raise ValueError('set_teammates must be set called before starting game.') - # joint_action = [None for _ in range(self.mdp.num_players)] - # joint_action[self.p_idx] = action - - joint_action = np.full(self.mdp.num_players, None, dtype=object) + joint_action = [None for _ in range(self.mdp.num_players)] joint_action[self.p_idx] = action with th.no_grad(): @@ -255,8 +252,7 @@ def step(self, action): else: joint_action[t_idx] = teammate.predict(obs=tm_obs, deterministic=self.deterministic)[0] - # joint_action = [Action.INDEX_TO_ACTION[(a.squeeze() if type(a) != int else a)] for a in joint_action] - joint_action = [Action.INDEX_TO_ACTION[a.squeeze() if isinstance(a, np.ndarray) else a] for a in joint_action] + joint_action = [Action.INDEX_TO_ACTION[(a.squeeze() if type(a) != int else a)] for a in joint_action] self.joint_action = joint_action # If the state didn't change from the previous timestep and the agent is choosing the same action @@ -309,7 +305,7 @@ def reset(self, p_idx=None): if self.reset_info and 'start_position' in self.reset_info: self.reset_info['start_position'] = {} for id in range(len(teammates_ids)): - if type(self.teammates[id]) == CustomAgent: + if not isinstance(self.teammates[id], CustomAgent): self.teammates[id].reset() self.reset_info['start_position'][teammates_ids[id]] = self.teammates[id].get_start_position(self.layout_name, u_env_idx=self.unique_env_idx) From 4d3f76629f73a288f7ccd3e82f5b0b3292b1d925 Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Tue, 18 Mar 2025 16:25:55 -0600 Subject: [PATCH 10/26] Small bug fix --- oai_agents/gym_environments/base_overcooked_env.py | 2 +- scripts/bash_scripts/profile.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py index d8a970b..4d1d018 100644 --- a/oai_agents/gym_environments/base_overcooked_env.py +++ b/oai_agents/gym_environments/base_overcooked_env.py @@ -305,7 +305,7 @@ def reset(self, p_idx=None): if self.reset_info and 'start_position' in self.reset_info: self.reset_info['start_position'] = {} for id in range(len(teammates_ids)): - if not isinstance(self.teammates[id], CustomAgent): + if isinstance(self.teammates[id], CustomAgent): self.teammates[id].reset() self.reset_info['start_position'][teammates_ids[id]] = self.teammates[id].get_start_position(self.layout_name, u_env_idx=self.unique_env_idx) diff --git a/scripts/bash_scripts/profile.sh b/scripts/bash_scripts/profile.sh index 00b09d4..2b29397 100644 --- a/scripts/bash_scripts/profile.sh +++ b/scripts/bash_scripts/profile.sh @@ -14,10 +14,10 @@ PRIMARY_FORCE_TRAINING=false source scripts/bash_scripts/env_config.sh # Overwrite the default values from env_config.sh here if needed: EXP_DIR="Test/Profile" -N_ENVS=2 +N_ENVS=50 WANDB_MODE="disabled" -EPOCH_TIMESTEPS=10000 -N_X_SP_TOTAL_TRAINING_TIMESTEPS=10000 +EPOCH_TIMESTEPS=75000 +N_X_SP_TOTAL_TRAINING_TIMESTEPS=75000 export CURRENT_TIME=$(date +"%Y-%m-%d_%H-%M-%S") From 7cee20b72ce368ecd3e2a780f9f2a61e6495362d Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Tue, 18 Mar 2025 16:36:47 -0600 Subject: [PATCH 11/26] revert isinstance :upside_down_face: --- .../gym_environments/base_overcooked_env.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py index 4d1d018..e75d8a2 100644 --- a/oai_agents/gym_environments/base_overcooked_env.py +++ b/oai_agents/gym_environments/base_overcooked_env.py @@ -245,8 +245,8 @@ def step(self, action): for t_idx in self.t_idxes: teammate = self.get_teammate_from_idx(t_idx) tm_obs = self.get_obs(c_idx=t_idx, enc_fn=teammate.encoding_fn) - # if type(teammate) == CustomAgent: - if isinstance(teammate, CustomAgent): + if type(teammate) == CustomAgent: + # if isinstance(teammate, CustomAgent): info = {'layout_name': self.layout_name, 'u_env_idx': self.unique_env_idx} joint_action[t_idx] = teammate.predict(obs=tm_obs, deterministic=self.deterministic, info=info)[0] else: @@ -263,8 +263,8 @@ def step(self, action): joint_action = deepcopy(self.joint_action) for t_idx in self.t_idxes: tm = self.get_teammate_from_idx(t_idx) - # if type(tm) != CustomAgent: - if not isinstance(tm, CustomAgent): + if type(tm) != CustomAgent: + # if not isinstance(tm, CustomAgent): joint_action[t_idx] = Direction.INDEX_TO_DIRECTION[self.step_count % 4] self.prev_state, self.prev_actions = deepcopy(self.state), deepcopy(joint_action) @@ -272,8 +272,8 @@ def step(self, action): self.state, reward, done, info = self.env.step(joint_action) for t_idx in self.t_idxes: # Should be right after env.step tm = self.get_teammate_from_idx(t_idx) - # if type(tm) == CustomAgent: - if isinstance(tm, CustomAgent): + if type(tm) == CustomAgent: + # if isinstance(tm, CustomAgent): tm.update_current_position(layout_name=self.layout_name, new_position=self.env.state.players[t_idx].position, u_env_idx=self.unique_env_idx) if self.shape_rewards and not self.is_eval_env: @@ -305,7 +305,8 @@ def reset(self, p_idx=None): if self.reset_info and 'start_position' in self.reset_info: self.reset_info['start_position'] = {} for id in range(len(teammates_ids)): - if isinstance(self.teammates[id], CustomAgent): + # if isinstance(self.teammates[id], CustomAgent): + if type(self.teammates[id]) == CustomAgent: self.teammates[id].reset() self.reset_info['start_position'][teammates_ids[id]] = self.teammates[id].get_start_position(self.layout_name, u_env_idx=self.unique_env_idx) From a9c8af0d3d567ce8e32d73d7eb4604677fd0c35a Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Thu, 20 Mar 2025 17:37:29 -0600 Subject: [PATCH 12/26] Perform some code cleaning --- oai_agents/agents/base_agent.py | 56 ++++-------- oai_agents/agents/rl.py | 90 ++++--------------- .../gym_environments/base_overcooked_env.py | 18 +++- scripts/bash_scripts/profile.sh | 17 ++-- scripts/train_agents.py | 3 +- scripts/utils/train_helper.py | 21 +++-- 6 files changed, 76 insertions(+), 129 deletions(-) diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index 616452d..bd72ec1 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -2,7 +2,7 @@ from oai_agents.common.arguments import get_args_to_save, set_args_from_load, get_arguments from oai_agents.common.state_encodings import ENCODING_SCHEMES from oai_agents.common.subtasks import calculate_completed_subtask, get_doable_subtasks, Subtasks -from oai_agents.common.tags import AgentPerformance, TeamType, KeyCheckpoints +from oai_agents.common.tags import AgentPerformance, TeamType, KeyCheckpoints, TeammatesCollection from oai_agents.common.checked_model_name_handler import CheckedModelNameHandler # from oai_agents.gym_environments.base_overcooked_env import USEABLE_COUNTERS @@ -201,11 +201,7 @@ def predict(self, obs, state=None, episode_start=None, deterministic=False): self.policy.set_training_mode(False) obs, vectorized_env = self.policy.obs_to_tensor(obs) with th.no_grad(): - if 'subtask_mask' in obs and np.prod(obs['subtask_mask'].shape) == np.prod(self.agent.action_space.n): - dist = self.policy.get_distribution(obs, action_masks=obs['subtask_mask']) - else: - dist = self.policy.get_distribution(obs) - + dist = self.policy.get_distribution(obs) actions = dist.get_actions(deterministic=deterministic) # Convert to numpy, and reshape to the original action shape actions = actions.cpu().numpy().reshape((-1,) + self.agent.action_space.shape) @@ -225,17 +221,14 @@ def get_distribution(self, obs: th.Tensor): return dist def learn(self, epoch_timesteps): - import cProfile - import pstats - import time - profiler = cProfile.Profile() - profiler.enable() - + # import cProfile + # import time + # profiler = cProfile.Profile() + # profiler.enable() self.agent.learn(total_timesteps=epoch_timesteps, reset_num_timesteps=False) - - profiler.disable() - c_time = time.strftime("%Y%m%d-%H%M%S") - profiler.dump_stats(f'data/profile/learn_{c_time}.prof') + # profiler.disable() + # c_time = time.strftime("%Y-%m-%d_%H-%M-%S") + # profiler.dump_stats(f'data/profile/learn_{c_time}.prof') self.num_timesteps = self.agent.num_timesteps def save(self, path: Path) -> None: @@ -381,10 +374,6 @@ def __init__(self, name, args, seed=None): if th.cuda.is_available(): th.cuda.manual_seed_all(seed) th.backends.cudnn.deterministic = True - - self.eval_teammates_collection = {} - self.teammates_collection = {} - # For environment splits while training self.n_layouts = len(self.args.layout_names) self.splits = [] @@ -424,16 +413,13 @@ def evaluate(self, eval_agent, num_eps_per_layout_per_tm=5, visualize=False, tim selected_p_indexes = random.sample(range(self.args.num_players), min(3, self.args.num_players)) for _, env in enumerate(self.eval_envs): + rew_per_layout_per_teamtype[env.layout_name] = { - teamtype: [] for teamtype in self.eval_teammates_collection[env.layout_name] + teamtype: [] for teamtype in env.teammates_collection[TeammatesCollection.EVAL][env.layout_name] } rew_per_layout[env.layout_name] = 0 - - teamtypes_population = self.eval_teammates_collection[env.layout_name] - - for teamtype in teamtypes_population: - teammates = teamtypes_population[teamtype][np.random.randint(len(teamtypes_population[teamtype]))] - env.set_teammates(teammates) + for teamtype in env.teammates_collection[TeammatesCollection.EVAL][env.layout_name]: + env.set_teammates(teamtype=teamtype) for p_idx in selected_p_indexes: env.set_reset_p_idx(p_idx) @@ -458,21 +444,9 @@ def evaluate(self, eval_agent, num_eps_per_layout_per_tm=5, visualize=False, tim return np.mean(tot_mean_reward), rew_per_layout, rew_per_layout_per_teamtype - def set_new_teammates(self, curriculum): + def set_new_teammates(self): for i in range(self.args.n_envs): - layout_name = self.env.env_method('get_layout_name', indices=i)[0] - population_teamtypes = self.teammates_collection[layout_name] - - teammates = curriculum.select_teammates_for_layout(population_teamtypes=population_teamtypes, - layout=layout_name) - - assert len(teammates) == self.args.teammates_len - assert type(teammates) == list - - for teammate in teammates: - assert type(teammate) in [SB3Wrapper, CustomAgent] - - self.env.env_method('set_teammates', teammates, indices=i) + self.env.env_method('set_teammates', indices=i) def get_agents(self) -> List[OAIAgent]: diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index ba62c19..402ccf9 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -16,7 +16,9 @@ import os from typing import Optional -VEC_ENV_CLS = DummyVecEnv # +# VEC_ENV_CLS = DummyVecEnv # +VEC_ENV_CLS = SubprocVecEnv + class RLAgentTrainer(OAITrainer): ''' Train an RL agent to play with a teammates_collection of agents.''' @@ -59,19 +61,17 @@ def __init__( self.use_policy_clone = use_policy_clone self.learner_type = learner_type - self.env, self.eval_envs = self.get_envs(env, eval_envs, deterministic, learner_type, start_timestep) + self.env, self.eval_envs = self.get_envs(_env=env, _eval_envs=eval_envs, + deterministic=deterministic, learner_type=learner_type, + start_timestep=start_timestep, teammates_collection=teammates_collection, + curriculum=self.curriculum) + # Episode to start training from (usually 0 unless restarted) self.start_step = start_step self.steps = self.start_step # Cumm. timestep to start training from (usually 0 unless restarted) self.start_timestep = start_timestep self.learning_agent, self.agents = self.get_learning_agent(agent) - self.teammates_collection, self.eval_teammates_collection = self.get_teammates_collection( - _tms_clctn = teammates_collection, - learning_agent = self.learning_agent, - train_types = train_types, - eval_types = eval_types - ) self.best_score, self.best_training_rew = -1, float('-inf') @classmethod @@ -120,62 +120,6 @@ def get_learning_agent(self, agent): agents = [learning_agent] return learning_agent, agents - - def get_teammates_collection(self, _tms_clctn, learning_agent, train_types=[], eval_types=[]): - ''' - Returns a dictionary of teammates_collection for training and evaluation - dict - teammates_collection = { - 'layout_name': { - 'TeamType.HIGH_FIRST': [[agent1, agent2], ...], - 'TeamType.MEDIUM_FIRST': [[agent3, agent4], ...], - 'TeamType.LOW_FIRST': [[agent5, agent6], ..], - 'TeamType.RANDOM': [[agent7, agent8], ...], - }, - } - ''' - if _tms_clctn == {}: - _tms_clctn = { - TeammatesCollection.TRAIN: { - layout_name: - {TeamType.SELF_PLAY: [[learning_agent for _ in range(self.teammates_len)]]} - for layout_name in self.args.layout_names - }, - TeammatesCollection.EVAL: { - layout_name: - {TeamType.SELF_PLAY: [[learning_agent for _ in range(self.teammates_len)]]} - for layout_name in self.args.layout_names - } - } - - else: - for layout in self.args.layout_names: - for tt in _tms_clctn[TeammatesCollection.TRAIN][layout]: - if tt == TeamType.SELF_PLAY: - _tms_clctn[TeammatesCollection.TRAIN][layout][TeamType.SELF_PLAY] = [[learning_agent for _ in range(self.teammates_len)]] - for tt in _tms_clctn[TeammatesCollection.EVAL][layout]: - if tt == TeamType.SELF_PLAY: - _tms_clctn[TeammatesCollection.EVAL][layout][TeamType.SELF_PLAY] = [[learning_agent for _ in range(self.teammates_len)]] - - train_teammates_collection = _tms_clctn[TeammatesCollection.TRAIN] - eval_teammates_collection = _tms_clctn[TeammatesCollection.EVAL] - - if train_types: - train_teammates_collection = { - layout: {team_type: train_teammates_collection[layout][team_type] for team_type in train_types} - for layout in train_teammates_collection - } - if eval_types: - eval_teammates_collection = { - layout: {team_type: eval_teammates_collection[layout][team_type] for team_type in eval_types} - for layout in eval_teammates_collection - } - - self.check_teammates_collection_structure(train_teammates_collection) - self.check_teammates_collection_structure(eval_teammates_collection) - return train_teammates_collection, eval_teammates_collection - - def print_tc_helper(self, teammates_collection, message=None): print("-------------------") if message: @@ -190,15 +134,19 @@ def print_tc_helper(self, teammates_collection, message=None): print("-------------------") - def get_envs(self, _env, _eval_envs, deterministic, learner_type, start_timestep: int = 0): + def get_envs(self, _env, _eval_envs, deterministic, learner_type, teammates_collection, curriculum, start_timestep: int = 0): from oai_agents.gym_environments.base_overcooked_env import OvercookedGymEnv if _env is None: env_kwargs = {'shape_rewards': True, 'full_init': False, 'stack_frames': self.use_frame_stack, - 'deterministic': deterministic,'args': self.args, 'learner_type': learner_type, 'start_timestep': start_timestep} + 'deterministic': deterministic,'args': self.args, 'learner_type': learner_type, 'start_timestep': start_timestep, + 'teammates_collection': teammates_collection, 'curriculum': curriculum + } env = make_vec_env(OvercookedGymEnv, n_envs=self.args.n_envs, seed=self.seed, vec_env_cls=VEC_ENV_CLS, env_kwargs=env_kwargs) eval_envs_kwargs = {'is_eval_env': True, 'horizon': 400, 'stack_frames': self.use_frame_stack, - 'deterministic': deterministic, 'args': self.args, 'learner_type': learner_type} + 'deterministic': deterministic, 'args': self.args, 'learner_type': learner_type, + 'teammates_collection': teammates_collection, 'curriculum': curriculum + } eval_envs = [OvercookedGymEnv(**{'env_index': i, **eval_envs_kwargs, 'unique_env_idx':self.args.n_envs+i}) for i in range(self.n_layouts)] else: env = _env @@ -282,10 +230,9 @@ def should_evaluate(self, steps): def log_details(self, experiment_name, total_train_timesteps): print("Training agent: " + self.name + ", for experiment: " + experiment_name) - self.print_tc_helper(self.teammates_collection, "Train TC") - self.print_tc_helper(self.eval_teammates_collection, "Eval TC") + self.print_tc_helper(self.eval_envs[0].teammates_collection[TeammatesCollection.EVAL], "Eval TC") + self.print_tc_helper(self.eval_envs[0].teammates_collection[TeammatesCollection.TRAIN], "Train TC") self.curriculum.print_curriculum() - print("How Long: ", self.args.how_long) print(f"Epoch timesteps: {self.epoch_timesteps}") print(f"Total training timesteps: {total_train_timesteps}") print(f"Number of environments: {self.n_envs}") @@ -345,10 +292,11 @@ def train_agents(self, total_train_timesteps, tag_for_returning_agent, resume_ck while self.learning_agent.num_timesteps < total_train_timesteps: self.curriculum.update(current_step=self.steps) - self.set_new_teammates(curriculum=self.curriculum) + self.set_new_teammates() # In each iteration the agent collects n_envs * n_steps experiences. This continues until self.learning_agent.num_timesteps > epoch_timesteps is reached. self.learning_agent.learn(self.epoch_timesteps) + self.steps += 1 if self.should_evaluate(steps=self.steps): diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py index e75d8a2..9d6d220 100644 --- a/oai_agents/gym_environments/base_overcooked_env.py +++ b/oai_agents/gym_environments/base_overcooked_env.py @@ -2,6 +2,7 @@ from oai_agents.common.subtasks import Subtasks, calculate_completed_subtask, get_doable_subtasks from oai_agents.common.learner import LearnerType, Learner from oai_agents.agents.agent_utils import CustomAgent, DummyAgent +from oai_agents.common.tags import AgentPerformance, TeamType, TeammatesCollection from overcooked_ai_py.mdp.overcooked_mdp import OvercookedGridworld, Action, Direction from overcooked_ai_py.mdp.overcooked_env import OvercookedEnv @@ -35,7 +36,7 @@ class OvercookedGymEnv(Env): metadata = {'render.modes': ['human']} - def __init__(self, learner_type, grid_shape=None, ret_completed_subtasks=False, stack_frames=False, is_eval_env=False, + def __init__(self, learner_type, teammates_collection, curriculum, grid_shape=None, ret_completed_subtasks=False, stack_frames=False, is_eval_env=False, shape_rewards=False, enc_fn=None, full_init=True, args=None, deterministic=False, start_timestep: int = 0, **kwargs): self.is_eval_env = is_eval_env @@ -89,6 +90,8 @@ def __init__(self, learner_type, grid_shape=None, ret_completed_subtasks=False, self.reset_p_idx = None self.learner = Learner(learner_type, args.reward_magnifier) + self.teammates_collection = teammates_collection + self.curriculum = curriculum self.dynamic_reward = args.dynamic_reward self.final_sparse_r_ratio = args.final_sparse_r_ratio @@ -154,11 +157,17 @@ def get_layout_name(self): def get_joint_action(self): return self.joint_action - def set_teammates(self, teammates): - assert isinstance(teammates, list) + def set_teammates(self, teamtype=None): + if teamtype: + assert self.is_eval_env is True, "Teamtype should only be set for evaluation environments" + population_teamtypes = self.teammates_collection[TeammatesCollection.EVAL][self.layout_name] + teammates = population_teamtypes[teamtype][np.random.randint(len(population_teamtypes[teamtype]))] + else: + population_teamtypes = self.teammates_collection[TeammatesCollection.TRAIN][self.layout_name] + teammates = self.curriculum.select_teammates_for_layout(population_teamtypes=population_teamtypes, layout=self.layout_name) + self.teammates = teammates self.reset_info['start_position'] = {} - for t_idx in self.t_idxes: tm = self.get_teammate_from_idx(t_idx) if tm.get_start_position(self.layout_name, u_env_idx=self.unique_env_idx) is not None: @@ -245,6 +254,7 @@ def step(self, action): for t_idx in self.t_idxes: teammate = self.get_teammate_from_idx(t_idx) tm_obs = self.get_obs(c_idx=t_idx, enc_fn=teammate.encoding_fn) + if type(teammate) == CustomAgent: # if isinstance(teammate, CustomAgent): info = {'layout_name': self.layout_name, 'u_env_idx': self.unique_env_idx} diff --git a/scripts/bash_scripts/profile.sh b/scripts/bash_scripts/profile.sh index 2b29397..36eebea 100644 --- a/scripts/bash_scripts/profile.sh +++ b/scripts/bash_scripts/profile.sh @@ -1,23 +1,26 @@ #!/bin/sh -ALGO="SP" +ALGO="SPN_XSPCKP" TEAMMATES_LEN=1 HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="counter_circuit" -TOTAL_EGO_AGENTS=1 +TOTAL_EGO_AGENTS=2 -POP_FORCE_TRAINING=true +POP_FORCE_TRAINING=false ADVERSARY_FORCE_TRAINING=false -PRIMARY_FORCE_TRAINING=false +PRIMARY_FORCE_TRAINING=true source scripts/bash_scripts/env_config.sh # Overwrite the default values from env_config.sh here if needed: -EXP_DIR="Test/Profile" -N_ENVS=50 +EXP_DIR="Test/Profile/pop" +N_ENVS=10 WANDB_MODE="disabled" -EPOCH_TIMESTEPS=75000 +EPOCH_TIMESTEPS=7500 N_X_SP_TOTAL_TRAINING_TIMESTEPS=75000 +FCP_TOTAL_TRAINING_TIMESTEPS=75000 +CUSTOM_AGENT_CK_RATE_GENERATION=1 +# POP_TOTAL_TRAINING_TIMESTEPS=300000 export CURRENT_TIME=$(date +"%Y-%m-%d_%H-%M-%S") diff --git a/scripts/train_agents.py b/scripts/train_agents.py index 92d2a1c..52c15ff 100644 --- a/scripts/train_agents.py +++ b/scripts/train_agents.py @@ -240,7 +240,8 @@ def SPN_XSPCKP(args) -> None: TeamType.SELF_PLAY_STATIC_ADV, ] primary_eval_types = { - 'generate': [TeamType.SELF_PLAY_HIGH, + 'generate': [ + TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_LOW, TeamType.SELF_PLAY_DYNAMIC_ADV, TeamType.SELF_PLAY_STATIC_ADV, diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py index cf65044..15695aa 100644 --- a/scripts/utils/train_helper.py +++ b/scripts/utils/train_helper.py @@ -123,6 +123,15 @@ def gen_ADV_train_N_X_SP(args, population, curriculum, unseen_teammates_len, n_x init_agent = load_agents(args, name=heatmap_source.name, tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL, force_training=False)[0] + # init_agent = RLAgentTrainer.generate_randomly_initialized_agent( # need a cleaner way to do this + # args=args, + # name=name, + # learner_type=args.primary_learner_type, + # hidden_dim=args.N_X_SP_h_dim, + # seed=args.N_X_SP_seed, + # n_envs=args.n_envs + #) + teammates_collection = generate_TC(args=args, population=population, agent=init_agent, @@ -132,6 +141,7 @@ def gen_ADV_train_N_X_SP(args, population, curriculum, unseen_teammates_len, n_x unseen_teammates_len=unseen_teammates_len, use_entire_population_for_train_types_teammates=True) + adversaries = generate_adversaries_based_on_heatmap(args=args, heatmap_source=heatmap_source, current_adversaries={}, teammates_collection=teammates_collection, train_types=curriculum.train_types) total_train_timesteps = args.n_x_sp_total_training_timesteps // args.custom_agent_ck_rate_generation @@ -157,7 +167,6 @@ def gen_ADV_train_N_X_SP(args, population, curriculum, unseen_teammates_len, n_x adversaries=adversaries) init_agent.name = name args.ck_list_offset = (args.num_of_ckpoints - 1) + ((args.num_of_ckpoints - 1) * round // (args.custom_agent_ck_rate_generation)) - n_x_sp_types_trainer = RLAgentTrainer(name=name, args=args, agent=init_agent, @@ -170,9 +179,10 @@ def gen_ADV_train_N_X_SP(args, population, curriculum, unseen_teammates_len, n_x learner_type=args.primary_learner_type, checkpoint_rate= ck_rate, ) - - n_x_sp_types_trainer.train_agents(total_train_timesteps = total_train_timesteps*(round + 1) + args.pop_total_training_timesteps, - tag_for_returning_agent=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL) + train_time = total_train_timesteps * (round + 1) + # train_time = total_train_timesteps*(round + 1) + args.pop_total_training_timesteps + n_x_sp_types_trainer.train_agents(total_train_timesteps=train_time, + tag_for_returning_agent=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL) init_agent = n_x_sp_types_trainer.agents[0] new_adversaries = generate_adversaries_based_on_heatmap(args=args, heatmap_source=init_agent, current_adversaries=adversaries, teammates_collection=teammates_collection, train_types=curriculum.train_types) adversaries = {key: adversaries.get(key, []) + new_adversaries.get(key, []) for key in set(adversaries) | set(new_adversaries)} @@ -285,7 +295,8 @@ def N_X_SP(args, population, curriculum, unseen_teammates_len, n_x_sp_eval_types learner_type=args.primary_learner_type, hidden_dim=args.N_X_SP_h_dim, seed=args.N_X_SP_seed, - n_envs=args.n_envs + n_envs=args.n_envs, + ) teammates_collection = generate_TC( From a6ada3370233cc58f83b75ae03004682209baac7 Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Fri, 21 Mar 2025 11:34:32 -0600 Subject: [PATCH 13/26] Some cleanups --- oai_agents/common/arguments.py | 3 ++- {scripts => sandbox}/generate_agents_for_eval.py | 0 {scripts => sandbox}/table_creator.py | 0 {scripts => sandbox}/training_chart.py | 0 scripts/bash_scripts/profile.sh | 2 +- scripts/{ => eval_scripts}/avg_perf_chart.py | 0 .../avg_perf_chart_multi_seed.py | 0 .../{ => eval_scripts}/avg_perf_chart_unified.py | 0 scripts/{ => eval_scripts}/evaluate_agents.py | 0 scripts/{ => eval_scripts}/evaluate_agents_v2.py | 0 scripts/train_agents.py | 16 ++++++++-------- 11 files changed, 11 insertions(+), 10 deletions(-) rename {scripts => sandbox}/generate_agents_for_eval.py (100%) rename {scripts => sandbox}/table_creator.py (100%) rename {scripts => sandbox}/training_chart.py (100%) rename scripts/{ => eval_scripts}/avg_perf_chart.py (100%) rename scripts/{ => eval_scripts}/avg_perf_chart_multi_seed.py (100%) rename scripts/{ => eval_scripts}/avg_perf_chart_unified.py (100%) rename scripts/{ => eval_scripts}/evaluate_agents.py (100%) rename scripts/{ => eval_scripts}/evaluate_agents_v2.py (100%) diff --git a/oai_agents/common/arguments.py b/oai_agents/common/arguments.py index dda7517..786ed15 100644 --- a/oai_agents/common/arguments.py +++ b/oai_agents/common/arguments.py @@ -105,7 +105,8 @@ def get_arguments(additional_args=[]): args = parser.parse_args() args.base_dir = Path(args.base_dir) - args.device = th.device('cuda' if th.cuda.is_available() else 'cpu') + # args.device = th.device('cuda' if th.cuda.is_available() else 'cpu') + args.device = th.device('cpu') args.layout_names = args.layout_names.split(',') return args diff --git a/scripts/generate_agents_for_eval.py b/sandbox/generate_agents_for_eval.py similarity index 100% rename from scripts/generate_agents_for_eval.py rename to sandbox/generate_agents_for_eval.py diff --git a/scripts/table_creator.py b/sandbox/table_creator.py similarity index 100% rename from scripts/table_creator.py rename to sandbox/table_creator.py diff --git a/scripts/training_chart.py b/sandbox/training_chart.py similarity index 100% rename from scripts/training_chart.py rename to sandbox/training_chart.py diff --git a/scripts/bash_scripts/profile.sh b/scripts/bash_scripts/profile.sh index 36eebea..ac772ce 100644 --- a/scripts/bash_scripts/profile.sh +++ b/scripts/bash_scripts/profile.sh @@ -24,7 +24,7 @@ CUSTOM_AGENT_CK_RATE_GENERATION=1 export CURRENT_TIME=$(date +"%Y-%m-%d_%H-%M-%S") -CUDA_VISIBLE_DEVICES=1 python -m cProfile -o data/profile/profile_results_all_${CURRENT_TIME}.prof scripts/train_agents.py \ +python -m cProfile -o data/profile/profile_results_all_${CURRENT_TIME}.prof scripts/train_agents.py \ --layout-names ${LAYOUT_NAMES} \ --algo-name ${ALGO} \ --exp-dir ${EXP_DIR} \ diff --git a/scripts/avg_perf_chart.py b/scripts/eval_scripts/avg_perf_chart.py similarity index 100% rename from scripts/avg_perf_chart.py rename to scripts/eval_scripts/avg_perf_chart.py diff --git a/scripts/avg_perf_chart_multi_seed.py b/scripts/eval_scripts/avg_perf_chart_multi_seed.py similarity index 100% rename from scripts/avg_perf_chart_multi_seed.py rename to scripts/eval_scripts/avg_perf_chart_multi_seed.py diff --git a/scripts/avg_perf_chart_unified.py b/scripts/eval_scripts/avg_perf_chart_unified.py similarity index 100% rename from scripts/avg_perf_chart_unified.py rename to scripts/eval_scripts/avg_perf_chart_unified.py diff --git a/scripts/evaluate_agents.py b/scripts/eval_scripts/evaluate_agents.py similarity index 100% rename from scripts/evaluate_agents.py rename to scripts/eval_scripts/evaluate_agents.py diff --git a/scripts/evaluate_agents_v2.py b/scripts/eval_scripts/evaluate_agents_v2.py similarity index 100% rename from scripts/evaluate_agents_v2.py rename to scripts/eval_scripts/evaluate_agents_v2.py diff --git a/scripts/train_agents.py b/scripts/train_agents.py index 52c15ff..40b0fba 100644 --- a/scripts/train_agents.py +++ b/scripts/train_agents.py @@ -278,15 +278,15 @@ def SPN_XSPCKP(args) -> None: elif args.algo_name == 'FCP_traditional': FCP_traditional(args=args) - elif args.algo_name == 'FCP_mhri': - FCP_mhri(args=args) + # elif args.algo_name == 'FCP_mhri': + # FCP_mhri(args=args) - elif args.algo_name == 'SPN_1ADV': - SPN_1ADV(args=args) + # elif args.algo_name == 'SPN_1ADV': + # SPN_1ADV(args=args) - elif args.algo_name == 'N_1_FCP': - N_1_FCP(args=args) + # elif args.algo_name == 'N_1_FCP': + # N_1_FCP(args=args) - elif args.algo_name == 'SPN_1ADV_XSPCKP': - SPN_1ADV_XSPCKP(args=args) + # elif args.algo_name == 'SPN_1ADV_XSPCKP': + # SPN_1ADV_XSPCKP(args=args) From 9bd10051ca10d77c0080334725c5639071f1c248 Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Fri, 21 Mar 2025 16:28:53 -0600 Subject: [PATCH 14/26] Add scripts to run the best ego --- oai_agents/agents/rl.py | 2 +- oai_agents/common/arguments.py | 8 +++ sandbox/profile_analyze.py | 9 +++ .../best_baseline_experiment/c1_best_ego.sh | 67 +++++++++++++++++++ .../best_baseline_experiment/c2_best_ego.sh | 67 +++++++++++++++++++ .../best_baseline_experiment/c3_best_ego.sh | 67 +++++++++++++++++++ .../best_baseline_experiment/c4_best_ego.sh | 67 +++++++++++++++++++ scripts/bash_scripts/env_config.sh | 2 +- scripts/train_agents.py | 35 ++++++++++ scripts/utils/__init__.py | 2 +- scripts/utils/train_helper.py | 66 +++++++++++++++++- 11 files changed, 388 insertions(+), 4 deletions(-) create mode 100644 sandbox/profile_analyze.py create mode 100644 scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh create mode 100644 scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh create mode 100644 scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh create mode 100644 scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index 402ccf9..b8e6631 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -130,7 +130,7 @@ def print_tc_helper(self, teammates_collection, message=None): teammates_c = teammates_collection[layout_name][tag] for teammates in teammates_c: for agent in teammates: - print(f'\t{agent.name}, score for layout {layout_name} is: {agent.layout_scores[layout_name]}, start_pos: {agent.get_start_position(layout_name, 0)}, len: {len(teammates)}') + print(f'\t{agent.name}, score for layout {layout_name} is:, start_pos: {agent.get_start_position(layout_name, 0)}, len: {len(teammates)}') print("-------------------") diff --git a/oai_agents/common/arguments.py b/oai_agents/common/arguments.py index 786ed15..c991dea 100644 --- a/oai_agents/common/arguments.py +++ b/oai_agents/common/arguments.py @@ -100,6 +100,11 @@ def get_arguments(additional_args=[]): parser.add_argument("--total-ego-agents", type=int, default=4) parser.add_argument("--ck-list-offset", type=int, default=0) + parser.add_argument('--low-perfs', help='shitty code to run ult baseline exp', default='default') + parser.add_argument('--med-perfs', help='shitty code to run ult baseline exp', default='default') + parser.add_argument('--high-perfs', help='shitty code to run ult baseline exp', default='default') + + for parser_arg, parser_kwargs in additional_args: parser.add_argument(parser_arg, **parser_kwargs) @@ -108,6 +113,9 @@ def get_arguments(additional_args=[]): # args.device = th.device('cuda' if th.cuda.is_available() else 'cpu') args.device = th.device('cpu') args.layout_names = args.layout_names.split(',') + args.low_perfs = args.low_perfs.split(',') + args.med_perfs = args.med_perfs.split(',') + args.high_perfs = args.high_perfs.split(',') return args diff --git a/sandbox/profile_analyze.py b/sandbox/profile_analyze.py new file mode 100644 index 0000000..d7136f9 --- /dev/null +++ b/sandbox/profile_analyze.py @@ -0,0 +1,9 @@ +import pstats +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("name", help="name of the profile file") +args = parser.parse_args() +name = args.name +p = pstats.Stats(f"data/profile/{name}") +p.strip_dirs().sort_stats("cumulative").print_stats(20) # Show top 20 functions \ No newline at end of file diff --git a/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh new file mode 100644 index 0000000..9ac5db5 --- /dev/null +++ b/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh @@ -0,0 +1,67 @@ +#!/bin/sh + +ALGO="best_EGO" +TEAMMATES_LEN=1 +HOW_LONG=20 +NUM_OF_CKPOINTS=40 +LAYOUT_NAMES="c1" +EXP_DIR=${LAYOUT_NAMES} +TOTAL_EGO_AGENTS=1 +QUICK_TEST=false + +L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0" +L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0" +L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0" +L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0" + +M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0" +M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0" +M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0" +M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0" + +H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best" +H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best" +H2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/best" +H3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/best" + +L="${L0},${L1},${L2},${L3}" +M="${M0},${M1},${M2},${M3}" +H="${H0},${H1},${H2},${H3}" + +WANDB_MODE="online" +POP_FORCE_TRAINING=false +ADVERSARY_FORCE_TRAINING=false +PRIMARY_FORCE_TRAINING=false + +source scripts/bash_scripts/env_config.sh + +python scripts/train_agents.py \ + --layout-names ${LAYOUT_NAMES} \ + --algo-name ${ALGO} \ + --exp-dir ${EXP_DIR} \ + --num-of-ckpoints ${NUM_OF_CKPOINTS} \ + --teammates-len ${TEAMMATES_LEN} \ + --num-players ${NUM_PLAYERS} \ + --custom-agent-ck-rate-generation ${CUSTOM_AGENT_CK_RATE_GENERATION} \ + --num-steps-in-traj-for-dyn-adv ${NUM_STEPS_IN_TRAJ_FOR_DYN_ADV} \ + --num-static-advs-per-heatmap ${NUM_STATIC_ADVS_PER_HEATMAP} \ + --num-dynamic-advs-per-heatmap ${NUM_DYNAMIC_ADVS_PER_HEATMAP} \ + --use-val-func-for-heatmap-gen ${USE_VAL_FUNC_FOR_HEATMAP_GEN} \ + --prioritized-sampling ${PRIORITIZED_SAMPLING} \ + --n-envs ${N_ENVS} \ + --epoch-timesteps ${EPOCH_TIMESTEPS} \ + --pop-total-training-timesteps ${POP_TOTAL_TRAINING_TIMESTEPS} \ + --n-x-sp-total-training-timesteps ${N_X_SP_TOTAL_TRAINING_TIMESTEPS} \ + --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ + --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ + --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ + --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --wandb-mode ${WANDB_MODE} \ + --pop-force-training ${POP_FORCE_TRAINING} \ + --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ + --primary-force-training ${PRIMARY_FORCE_TRAINING} \ + --how-long ${HOW_LONG} \ + --exp-name-prefix "${EXP_NAME_PREFIX}" \ + --low-perfs ${L} \ + --med-perfs ${M} \ + --high-perfs ${H} \ \ No newline at end of file diff --git a/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh new file mode 100644 index 0000000..e93a38b --- /dev/null +++ b/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh @@ -0,0 +1,67 @@ +#!/bin/sh + +ALGO="best_EGO" +TEAMMATES_LEN=1 +HOW_LONG=20 +NUM_OF_CKPOINTS=40 +LAYOUT_NAMES="c2" +EXP_DIR=${LAYOUT_NAMES} +TOTAL_EGO_AGENTS=1 +QUICK_TEST=false + +L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0" +L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0" +L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0" +L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0" + +# M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0" +# M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0" +# M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0" +# M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0" + +H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best" +H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best" +H2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/best" +H3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/best" + +L="${L0},${L1},${L2},${L3}" +M="${M0},${M1},${M2},${M3}" +H="${H0},${H1},${H2},${H3}" + +WANDB_MODE="online" +POP_FORCE_TRAINING=false +ADVERSARY_FORCE_TRAINING=false +PRIMARY_FORCE_TRAINING=false + +source scripts/bash_scripts/env_config.sh + +python scripts/train_agents.py \ + --layout-names ${LAYOUT_NAMES} \ + --algo-name ${ALGO} \ + --exp-dir ${EXP_DIR} \ + --num-of-ckpoints ${NUM_OF_CKPOINTS} \ + --teammates-len ${TEAMMATES_LEN} \ + --num-players ${NUM_PLAYERS} \ + --custom-agent-ck-rate-generation ${CUSTOM_AGENT_CK_RATE_GENERATION} \ + --num-steps-in-traj-for-dyn-adv ${NUM_STEPS_IN_TRAJ_FOR_DYN_ADV} \ + --num-static-advs-per-heatmap ${NUM_STATIC_ADVS_PER_HEATMAP} \ + --num-dynamic-advs-per-heatmap ${NUM_DYNAMIC_ADVS_PER_HEATMAP} \ + --use-val-func-for-heatmap-gen ${USE_VAL_FUNC_FOR_HEATMAP_GEN} \ + --prioritized-sampling ${PRIORITIZED_SAMPLING} \ + --n-envs ${N_ENVS} \ + --epoch-timesteps ${EPOCH_TIMESTEPS} \ + --pop-total-training-timesteps ${POP_TOTAL_TRAINING_TIMESTEPS} \ + --n-x-sp-total-training-timesteps ${N_X_SP_TOTAL_TRAINING_TIMESTEPS} \ + --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ + --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ + --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ + --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --wandb-mode ${WANDB_MODE} \ + --pop-force-training ${POP_FORCE_TRAINING} \ + --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ + --primary-force-training ${PRIMARY_FORCE_TRAINING} \ + --how-long ${HOW_LONG} \ + --exp-name-prefix "${EXP_NAME_PREFIX}" \ + --low-perfs ${L} \ + --med-perfs ${M} \ + --high-perfs ${H} \ \ No newline at end of file diff --git a/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh new file mode 100644 index 0000000..fccd096 --- /dev/null +++ b/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh @@ -0,0 +1,67 @@ +#!/bin/sh + +ALGO="best_EGO" +TEAMMATES_LEN=1 +HOW_LONG=20 +NUM_OF_CKPOINTS=40 +LAYOUT_NAMES="c3" +EXP_DIR=${LAYOUT_NAMES} +TOTAL_EGO_AGENTS=1 +QUICK_TEST=false + +L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0" +L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0" +L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0" +L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0" + +# M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0" +# M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0" +# M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0" +# M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0" + +H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best" +H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best" +H2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/best" +H3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/best" + +L="${L0},${L1},${L2},${L3}" +M="${M0},${M1},${M2},${M3}" +H="${H0},${H1},${H2},${H3}" + +WANDB_MODE="online" +POP_FORCE_TRAINING=false +ADVERSARY_FORCE_TRAINING=false +PRIMARY_FORCE_TRAINING=false + +source scripts/bash_scripts/env_config.sh + +python scripts/train_agents.py \ + --layout-names ${LAYOUT_NAMES} \ + --algo-name ${ALGO} \ + --exp-dir ${EXP_DIR} \ + --num-of-ckpoints ${NUM_OF_CKPOINTS} \ + --teammates-len ${TEAMMATES_LEN} \ + --num-players ${NUM_PLAYERS} \ + --custom-agent-ck-rate-generation ${CUSTOM_AGENT_CK_RATE_GENERATION} \ + --num-steps-in-traj-for-dyn-adv ${NUM_STEPS_IN_TRAJ_FOR_DYN_ADV} \ + --num-static-advs-per-heatmap ${NUM_STATIC_ADVS_PER_HEATMAP} \ + --num-dynamic-advs-per-heatmap ${NUM_DYNAMIC_ADVS_PER_HEATMAP} \ + --use-val-func-for-heatmap-gen ${USE_VAL_FUNC_FOR_HEATMAP_GEN} \ + --prioritized-sampling ${PRIORITIZED_SAMPLING} \ + --n-envs ${N_ENVS} \ + --epoch-timesteps ${EPOCH_TIMESTEPS} \ + --pop-total-training-timesteps ${POP_TOTAL_TRAINING_TIMESTEPS} \ + --n-x-sp-total-training-timesteps ${N_X_SP_TOTAL_TRAINING_TIMESTEPS} \ + --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ + --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ + --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ + --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --wandb-mode ${WANDB_MODE} \ + --pop-force-training ${POP_FORCE_TRAINING} \ + --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ + --primary-force-training ${PRIMARY_FORCE_TRAINING} \ + --how-long ${HOW_LONG} \ + --exp-name-prefix "${EXP_NAME_PREFIX}" \ + --low-perfs ${L} \ + --med-perfs ${M} \ + --high-perfs ${H} \ \ No newline at end of file diff --git a/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh new file mode 100644 index 0000000..6ab42f2 --- /dev/null +++ b/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh @@ -0,0 +1,67 @@ +#!/bin/sh + +ALGO="best_EGO" +TEAMMATES_LEN=1 +HOW_LONG=20 +NUM_OF_CKPOINTS=40 +LAYOUT_NAMES="c4" +EXP_DIR=${LAYOUT_NAMES} +TOTAL_EGO_AGENTS=1 +QUICK_TEST=false + +L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0" +L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0" +L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0" +L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0" + +M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_2_rew_192.0" +M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_2_rew_118.0" +M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_54.0" +M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_104.0" + +H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best" +H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best" +H2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/best" +H3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/best" + +L="${L0},${L1},${L2},${L3}" +M="${M0},${M1},${M2},${M3}" +H="${H0},${H1},${H2},${H3}" + +WANDB_MODE="online" +POP_FORCE_TRAINING=false +ADVERSARY_FORCE_TRAINING=false +PRIMARY_FORCE_TRAINING=false + +source scripts/bash_scripts/env_config.sh + +python scripts/train_agents.py \ + --layout-names ${LAYOUT_NAMES} \ + --algo-name ${ALGO} \ + --exp-dir ${EXP_DIR} \ + --num-of-ckpoints ${NUM_OF_CKPOINTS} \ + --teammates-len ${TEAMMATES_LEN} \ + --num-players ${NUM_PLAYERS} \ + --custom-agent-ck-rate-generation ${CUSTOM_AGENT_CK_RATE_GENERATION} \ + --num-steps-in-traj-for-dyn-adv ${NUM_STEPS_IN_TRAJ_FOR_DYN_ADV} \ + --num-static-advs-per-heatmap ${NUM_STATIC_ADVS_PER_HEATMAP} \ + --num-dynamic-advs-per-heatmap ${NUM_DYNAMIC_ADVS_PER_HEATMAP} \ + --use-val-func-for-heatmap-gen ${USE_VAL_FUNC_FOR_HEATMAP_GEN} \ + --prioritized-sampling ${PRIORITIZED_SAMPLING} \ + --n-envs ${N_ENVS} \ + --epoch-timesteps ${EPOCH_TIMESTEPS} \ + --pop-total-training-timesteps ${POP_TOTAL_TRAINING_TIMESTEPS} \ + --n-x-sp-total-training-timesteps ${N_X_SP_TOTAL_TRAINING_TIMESTEPS} \ + --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ + --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ + --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ + --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --wandb-mode ${WANDB_MODE} \ + --pop-force-training ${POP_FORCE_TRAINING} \ + --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ + --primary-force-training ${PRIMARY_FORCE_TRAINING} \ + --how-long ${HOW_LONG} \ + --exp-name-prefix "${EXP_NAME_PREFIX}" \ + --low-perfs ${L} \ + --med-perfs ${M} \ + --high-perfs ${H} \ \ No newline at end of file diff --git a/scripts/bash_scripts/env_config.sh b/scripts/bash_scripts/env_config.sh index 5ee4ff1..3e4951b 100644 --- a/scripts/bash_scripts/env_config.sh +++ b/scripts/bash_scripts/env_config.sh @@ -14,7 +14,7 @@ fi if [ "$QUICK_TEST" = false ]; then WANDB_MODE="online" - N_ENVS=210 + N_ENVS=50 EPOCH_TIMESTEPS=100000 POP_TOTAL_TRAINING_TIMESTEPS=$(echo "$HOW_LONG * 5000000" | bc) N_X_SP_TOTAL_TRAINING_TIMESTEPS=$(echo "$HOW_LONG * 5000000" | bc) diff --git a/scripts/train_agents.py b/scripts/train_agents.py index 40b0fba..74c68a5 100644 --- a/scripts/train_agents.py +++ b/scripts/train_agents.py @@ -11,6 +11,7 @@ get_FCP_agent_w_pop, get_N_X_FCP_agents, get_N_X_SP_agents, + get_best_EGO_agents, ) def SP(args): @@ -266,6 +267,37 @@ def SPN_XSPCKP(args) -> None: ) +def best_EGO(args) -> None: + '''only for 2 players''' + primary_train_types = [ + TeamType.SELF_PLAY_HIGH, + TeamType.SELF_PLAY_MEDIUM, + TeamType.SELF_PLAY_LOW, + ] + primary_eval_types = { + 'generate': [ + TeamType.SELF_PLAY_HIGH, + TeamType.SELF_PLAY_LOW, + ], + 'load': [] + } + if args.prioritized_sampling: + curriculum = Curriculum(train_types=primary_train_types, + eval_types=primary_eval_types, + is_random=False, + prioritized_sampling=True, + priority_scaling=2.0) + else: + curriculum = Curriculum(train_types=primary_train_types, is_random=True) + + get_best_EGO_agents( + args, + curriculum=curriculum, + primary_eval_types=primary_eval_types, + primary_train_types=curriculum.train_types, + ) + + if __name__ == '__main__': args = get_arguments() @@ -277,6 +309,9 @@ def SPN_XSPCKP(args) -> None: elif args.algo_name == 'FCP_traditional': FCP_traditional(args=args) + + elif args.algo_name == 'best_EGO': + best_EGO(args=args) # elif args.algo_name == 'FCP_mhri': # FCP_mhri(args=args) diff --git a/scripts/utils/__init__.py b/scripts/utils/__init__.py index e5e0d57..e97d434 100644 --- a/scripts/utils/__init__.py +++ b/scripts/utils/__init__.py @@ -1,4 +1,4 @@ -from .train_helper import get_SP_agents, get_FCP_agent_w_pop, get_N_X_FCP_agents, get_N_X_SP_agents +from .train_helper import get_SP_agents, get_FCP_agent_w_pop, get_N_X_FCP_agents, get_N_X_SP_agents, get_best_EGO_agents from .eval_helper import get_eval_types_to_load from .eval_utils import * diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py index 15695aa..0925423 100644 --- a/scripts/utils/train_helper.py +++ b/scripts/utils/train_helper.py @@ -6,7 +6,7 @@ from oai_agents.common.heatmap import generate_adversaries_based_on_heatmap from oai_agents.agents.agent_utils import CustomAgent from .common import load_agents, generate_name -from oai_agents.common.tags import Prefix, KeyCheckpoints +from oai_agents.common.tags import Prefix, KeyCheckpoints, TeammatesCollection def get_SP_agents(args, train_types, eval_types, curriculum, tag_for_returning_agent): @@ -527,3 +527,67 @@ def get_N_X_FCP_agents( tag_for_returning_agent=tag ) return fcp_trainer.get_agents()[0], teammates_collection + + + +def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculum): + '''Code purposed for a very specific experiment, assumes n_players = 2''' + from pathlib import Path + + eval_collection = { + layout_name: {ttype: [] for ttype in primary_eval_types['generate']} for layout_name in args.layout_names + } + train_collection = { + layout_name: {ttype: [] for ttype in primary_train_types} for layout_name in args.layout_names + } + + all_perfs = args.low_perfs + args.med_perfs + args.high_perfs + for agent_address in all_perfs: + + path_tag = agent_address.split('/') + path = '/'.join(path_tag[:-1]) + tag = path_tag[-1] + agents, _, _ = RLAgentTrainer.load_agents(args=args, tag=tag, path=Path('agent_models/'+path)) + agent = agents[0] + + + for layout_name in args.layout_names: + if agent_address in args.low_perfs: + ttype = TeamType.SELF_PLAY_LOW + elif agent_address in args.med_perfs: + ttype = TeamType.SELF_PLAY_MEDIUM + elif agent_address in args.high_perfs: + ttype = TeamType.SELF_PLAY_HIGH + + if ttype in train_collection[layout_name]: + train_collection[layout_name][ttype].append([agent]) + + if ttype in eval_collection[layout_name]: + eval_collection[layout_name][ttype] = [[agent]] + + teammates_collection = { + TeammatesCollection.TRAIN: train_collection, + TeammatesCollection.EVAL: eval_collection + } + + best_ego_trainer = RLAgentTrainer( + name=f'best_{args.layout_names[0]}', + args=args, + agent=None, + teammates_collection=teammates_collection, + epoch_timesteps=args.epoch_timesteps, + n_envs=args.n_envs, + + seed=args.N_X_SP_seed, + hidden_dim=args.N_X_SP_h_dim, + curriculum=curriculum, + + learner_type=args.primary_learner_type, + checkpoint_rate=args.n_x_sp_total_training_timesteps // args.num_of_ckpoints, + ) + + best_ego_trainer.train_agents( + total_train_timesteps=args.n_x_fcp_total_training_timesteps, + tag_for_returning_agent=tag + ) + From 46f866f1206ac6cdbbe00abb47d8831e397bb6be Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Fri, 21 Mar 2025 16:47:34 -0600 Subject: [PATCH 15/26] update exp_dir --- scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh | 2 +- scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh | 2 +- scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh | 2 +- scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh index 9ac5db5..bff1e48 100644 --- a/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh +++ b/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh @@ -5,7 +5,7 @@ TEAMMATES_LEN=1 HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c1" -EXP_DIR=${LAYOUT_NAMES} +EXP_DIR="${LAYOUT_NAMES}_best_EGO" TOTAL_EGO_AGENTS=1 QUICK_TEST=false diff --git a/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh index e93a38b..096d928 100644 --- a/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh +++ b/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh @@ -5,7 +5,7 @@ TEAMMATES_LEN=1 HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c2" -EXP_DIR=${LAYOUT_NAMES} +EXP_DIR="${LAYOUT_NAMES}_best_EGO" TOTAL_EGO_AGENTS=1 QUICK_TEST=false diff --git a/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh index fccd096..00eaf83 100644 --- a/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh +++ b/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh @@ -5,7 +5,7 @@ TEAMMATES_LEN=1 HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c3" -EXP_DIR=${LAYOUT_NAMES} +EXP_DIR="${LAYOUT_NAMES}_best_EGO" TOTAL_EGO_AGENTS=1 QUICK_TEST=false diff --git a/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh index 6ab42f2..e7706bd 100644 --- a/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh +++ b/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh @@ -5,7 +5,7 @@ TEAMMATES_LEN=1 HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c4" -EXP_DIR=${LAYOUT_NAMES} +EXP_DIR="${LAYOUT_NAMES}_best_EGO" TOTAL_EGO_AGENTS=1 QUICK_TEST=false From df1096c5bec9d89b74a2b0731f024f9f8c2522eb Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Mon, 24 Mar 2025 11:03:08 -0600 Subject: [PATCH 16/26] Correct training time --- scripts/utils/train_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py index 0925423..6a26478 100644 --- a/scripts/utils/train_helper.py +++ b/scripts/utils/train_helper.py @@ -587,7 +587,7 @@ def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculu ) best_ego_trainer.train_agents( - total_train_timesteps=args.n_x_fcp_total_training_timesteps, + total_train_timesteps=args.n_x_sp_total_training_timesteps, tag_for_returning_agent=tag ) From e9a8104f2210264b17498ee698d43b53a3b38cb5 Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Mon, 24 Mar 2025 18:30:48 -0600 Subject: [PATCH 17/26] Add best cap experiments --- oai_agents/common/curriculum.py | 1 + oai_agents/common/overcooked_gui.py | 24 ++++--- oai_agents/common/overcooked_simulation.py | 15 ++++- .../best_baseline_experiment/c1_best_CAP.sh | 67 +++++++++++++++++++ .../best_baseline_experiment/c2_best_CAP.sh | 67 +++++++++++++++++++ .../best_baseline_experiment/c2_best_ego.sh | 8 +-- .../best_baseline_experiment/c3_best_CAP.sh | 67 +++++++++++++++++++ .../best_baseline_experiment/c3_best_ego.sh | 8 +-- .../best_baseline_experiment/c4_best_CAP.sh | 67 +++++++++++++++++++ scripts/bash_scripts/test_run.sh | 53 +++++++++++++++ scripts/run_overcooked_game.py | 19 ++++-- scripts/train_agents.py | 8 ++- scripts/utils/train_helper.py | 30 +++++++-- 13 files changed, 401 insertions(+), 33 deletions(-) create mode 100644 scripts/bash_scripts/best_baseline_experiment/c1_best_CAP.sh create mode 100644 scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh create mode 100644 scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh create mode 100644 scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh create mode 100644 scripts/bash_scripts/test_run.sh diff --git a/oai_agents/common/curriculum.py b/oai_agents/common/curriculum.py index 2dc95c0..d01d038 100644 --- a/oai_agents/common/curriculum.py +++ b/oai_agents/common/curriculum.py @@ -103,6 +103,7 @@ def select_teammates_for_layout(self, population_teamtypes, layout): population = [population_teamtypes[t] for t in population_teamtypes.keys()] teammates_per_type = population[np.random.randint(len(population))] teammates = teammates_per_type[np.random.randint(len(teammates_per_type))] + elif self.prioritized_sampling: teammates = self.select_teammates_prioritized_sampling(population_teamtypes, layout) else: diff --git a/oai_agents/common/overcooked_gui.py b/oai_agents/common/overcooked_gui.py index 65a4a3b..62ae4e8 100644 --- a/oai_agents/common/overcooked_gui.py +++ b/oai_agents/common/overcooked_gui.py @@ -64,17 +64,23 @@ def __init__(self, args, layout_name=None, agent=None, teammates=None, p_idx=0, self.args = args self.layout_name = layout_name or 'asymmetric_advantages' - self.use_subtask_env = False - if self.use_subtask_env: - kwargs = {'single_subtask_id': 10, 'args': args, 'is_eval_env': True} - self.env = OvercookedSubtaskGymEnv(**p_kwargs, **kwargs) - else: - self.env = OvercookedGymEnv(layout_name=self.layout_name, args=args, ret_completed_subtasks=False, + teammates_collection = { + 'eval': { + args.layout: { + 'run_type': [teammates] + } + } + } + + self.env = OvercookedGymEnv(layout_name=self.layout_name, args=args, ret_completed_subtasks=False, is_eval_env=True, horizon=horizon, learner_type='originaler', + teammates_collection=teammates_collection, curriculum=None, ) self.agent = agent self.p_idx = p_idx - self.env.set_teammates(teammates) + + self.env.set_teammates('run_type') + self.env.reset(p_idx=self.p_idx) if self.agent != 'human': self.agent.set_encoding_params(self.p_idx, self.args.horizon, env=self.env, is_haha=isinstance(self.agent, HierarchicalRL), tune_subtasks=False) @@ -205,9 +211,6 @@ def step_env(self, agent_action): completed_task = calculate_completed_subtask(prev_obj, curr_obj, tile_in_front) # print('----', completed_task) - collision = self.env.mdp.prev_step_was_collision - if collision: - self.num_collisions += 1 # Log data curr_reward = sum(info['sparse_r_by_agent']) @@ -231,7 +234,6 @@ def step_env(self, agent_action): # TEAMMATE and POP(TODO): uncommment it and replace teammate_name by teammate_names # "agent": self.teammate_name, "p_idx": self.p_idx, - "collision": collision, "num_collisions": self.num_collisions } trans_str = json.dumps(transition) diff --git a/oai_agents/common/overcooked_simulation.py b/oai_agents/common/overcooked_simulation.py index 20ba9b0..41e64a9 100644 --- a/oai_agents/common/overcooked_simulation.py +++ b/oai_agents/common/overcooked_simulation.py @@ -13,16 +13,27 @@ def __init__(self, args, agent, teammates, layout_name, p_idx, horizon=400): self.args = args self.layout_name = layout_name + teammates_collection = { + 'eval': { + layout_name: { + 'run_type': [teammates] + } + } + } + self.env = OvercookedGymEnv(args=args, layout_name=self.layout_name, ret_completed_subtasks=False, is_eval_env=True, horizon=horizon, - learner_type='originaler') + learner_type='originaler', + teammates_collection=teammates_collection, + curriculum=None + ) self.agent = agent self.p_idx = p_idx - self.env.set_teammates(teammates) + self.env.set_teammates('run_type') self.env.reset(p_idx=self.p_idx) assert self.agent is not 'human' diff --git a/scripts/bash_scripts/best_baseline_experiment/c1_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c1_best_CAP.sh new file mode 100644 index 0000000..700668a --- /dev/null +++ b/scripts/bash_scripts/best_baseline_experiment/c1_best_CAP.sh @@ -0,0 +1,67 @@ +#!/bin/sh + +ALGO="best_EGO_with_CAP" +TEAMMATES_LEN=1 +HOW_LONG=20 +NUM_OF_CKPOINTS=40 +LAYOUT_NAMES="c1" +EXP_DIR="${LAYOUT_NAMES}_best_EGO_with_CAP" +TOTAL_EGO_AGENTS=1 +QUICK_TEST=false + +L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0" +L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0" +L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0" +L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0" + +M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0" +M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0" +M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0" +M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0" + +H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best" +H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best" +H2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/best" +H3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/best" + +L="${L0},${L1},${L2},${L3}" +M="${M0},${M1},${M2},${M3}" +H="${H0},${H1},${H2},${H3}" + +WANDB_MODE="online" +POP_FORCE_TRAINING=false +ADVERSARY_FORCE_TRAINING=false +PRIMARY_FORCE_TRAINING=false + +source scripts/bash_scripts/env_config.sh + +python scripts/train_agents.py \ + --layout-names ${LAYOUT_NAMES} \ + --algo-name ${ALGO} \ + --exp-dir ${EXP_DIR} \ + --num-of-ckpoints ${NUM_OF_CKPOINTS} \ + --teammates-len ${TEAMMATES_LEN} \ + --num-players ${NUM_PLAYERS} \ + --custom-agent-ck-rate-generation ${CUSTOM_AGENT_CK_RATE_GENERATION} \ + --num-steps-in-traj-for-dyn-adv ${NUM_STEPS_IN_TRAJ_FOR_DYN_ADV} \ + --num-static-advs-per-heatmap ${NUM_STATIC_ADVS_PER_HEATMAP} \ + --num-dynamic-advs-per-heatmap ${NUM_DYNAMIC_ADVS_PER_HEATMAP} \ + --use-val-func-for-heatmap-gen ${USE_VAL_FUNC_FOR_HEATMAP_GEN} \ + --prioritized-sampling ${PRIORITIZED_SAMPLING} \ + --n-envs ${N_ENVS} \ + --epoch-timesteps ${EPOCH_TIMESTEPS} \ + --pop-total-training-timesteps ${POP_TOTAL_TRAINING_TIMESTEPS} \ + --n-x-sp-total-training-timesteps ${N_X_SP_TOTAL_TRAINING_TIMESTEPS} \ + --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ + --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ + --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ + --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --wandb-mode ${WANDB_MODE} \ + --pop-force-training ${POP_FORCE_TRAINING} \ + --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ + --primary-force-training ${PRIMARY_FORCE_TRAINING} \ + --how-long ${HOW_LONG} \ + --exp-name-prefix "${EXP_NAME_PREFIX}" \ + --low-perfs ${L} \ + --med-perfs ${M} \ + --high-perfs ${H} \ \ No newline at end of file diff --git a/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh new file mode 100644 index 0000000..1d67a84 --- /dev/null +++ b/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh @@ -0,0 +1,67 @@ +#!/bin/sh + +ALGO="best_EGO_with_CAP" +TEAMMATES_LEN=1 +HOW_LONG=20 +NUM_OF_CKPOINTS=40 +LAYOUT_NAMES="c2" +EXP_DIR="${LAYOUT_NAMES}_best_EGO_with_CAP" +TOTAL_EGO_AGENTS=1 +QUICK_TEST=false + +L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0" +L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0" +L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0" +L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0" + +M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0" +M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0" +M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0" +M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0" + +H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best" +H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best" +H2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/best" +H3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/best" + +L="${L0},${L1},${L2},${L3}" +M="${M0},${M1},${M2},${M3}" +H="${H0},${H1},${H2},${H3}" + +WANDB_MODE="online" +POP_FORCE_TRAINING=false +ADVERSARY_FORCE_TRAINING=false +PRIMARY_FORCE_TRAINING=false + +source scripts/bash_scripts/env_config.sh + +python scripts/train_agents.py \ + --layout-names ${LAYOUT_NAMES} \ + --algo-name ${ALGO} \ + --exp-dir ${EXP_DIR} \ + --num-of-ckpoints ${NUM_OF_CKPOINTS} \ + --teammates-len ${TEAMMATES_LEN} \ + --num-players ${NUM_PLAYERS} \ + --custom-agent-ck-rate-generation ${CUSTOM_AGENT_CK_RATE_GENERATION} \ + --num-steps-in-traj-for-dyn-adv ${NUM_STEPS_IN_TRAJ_FOR_DYN_ADV} \ + --num-static-advs-per-heatmap ${NUM_STATIC_ADVS_PER_HEATMAP} \ + --num-dynamic-advs-per-heatmap ${NUM_DYNAMIC_ADVS_PER_HEATMAP} \ + --use-val-func-for-heatmap-gen ${USE_VAL_FUNC_FOR_HEATMAP_GEN} \ + --prioritized-sampling ${PRIORITIZED_SAMPLING} \ + --n-envs ${N_ENVS} \ + --epoch-timesteps ${EPOCH_TIMESTEPS} \ + --pop-total-training-timesteps ${POP_TOTAL_TRAINING_TIMESTEPS} \ + --n-x-sp-total-training-timesteps ${N_X_SP_TOTAL_TRAINING_TIMESTEPS} \ + --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ + --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ + --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ + --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --wandb-mode ${WANDB_MODE} \ + --pop-force-training ${POP_FORCE_TRAINING} \ + --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ + --primary-force-training ${PRIMARY_FORCE_TRAINING} \ + --how-long ${HOW_LONG} \ + --exp-name-prefix "${EXP_NAME_PREFIX}" \ + --low-perfs ${L} \ + --med-perfs ${M} \ + --high-perfs ${H} \ \ No newline at end of file diff --git a/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh index 096d928..dfa0c31 100644 --- a/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh +++ b/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh @@ -14,10 +14,10 @@ L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0" L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0" L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0" -# M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0" -# M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0" -# M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0" -# M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0" +M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0" +M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_256.0" +M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_178.0" +M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_186.0" H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best" H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best" diff --git a/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh new file mode 100644 index 0000000..0773e27 --- /dev/null +++ b/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh @@ -0,0 +1,67 @@ +#!/bin/sh + +ALGO="best_EGO_with_CAP" +TEAMMATES_LEN=1 +HOW_LONG=20 +NUM_OF_CKPOINTS=40 +LAYOUT_NAMES="c3" +EXP_DIR="${LAYOUT_NAMES}_best_EGO_with_CAP" +TOTAL_EGO_AGENTS=1 +QUICK_TEST=false + +L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0" +L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0" +L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0" +L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0" + +M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0" +M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0" +M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0" +M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0" + +H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best" +H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best" +H2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/best" +H3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/best" + +L="${L0},${L1},${L2},${L3}" +M="${M0},${M1},${M2},${M3}" +H="${H0},${H1},${H2},${H3}" + +WANDB_MODE="online" +POP_FORCE_TRAINING=false +ADVERSARY_FORCE_TRAINING=false +PRIMARY_FORCE_TRAINING=false + +source scripts/bash_scripts/env_config.sh + +python scripts/train_agents.py \ + --layout-names ${LAYOUT_NAMES} \ + --algo-name ${ALGO} \ + --exp-dir ${EXP_DIR} \ + --num-of-ckpoints ${NUM_OF_CKPOINTS} \ + --teammates-len ${TEAMMATES_LEN} \ + --num-players ${NUM_PLAYERS} \ + --custom-agent-ck-rate-generation ${CUSTOM_AGENT_CK_RATE_GENERATION} \ + --num-steps-in-traj-for-dyn-adv ${NUM_STEPS_IN_TRAJ_FOR_DYN_ADV} \ + --num-static-advs-per-heatmap ${NUM_STATIC_ADVS_PER_HEATMAP} \ + --num-dynamic-advs-per-heatmap ${NUM_DYNAMIC_ADVS_PER_HEATMAP} \ + --use-val-func-for-heatmap-gen ${USE_VAL_FUNC_FOR_HEATMAP_GEN} \ + --prioritized-sampling ${PRIORITIZED_SAMPLING} \ + --n-envs ${N_ENVS} \ + --epoch-timesteps ${EPOCH_TIMESTEPS} \ + --pop-total-training-timesteps ${POP_TOTAL_TRAINING_TIMESTEPS} \ + --n-x-sp-total-training-timesteps ${N_X_SP_TOTAL_TRAINING_TIMESTEPS} \ + --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ + --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ + --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ + --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --wandb-mode ${WANDB_MODE} \ + --pop-force-training ${POP_FORCE_TRAINING} \ + --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ + --primary-force-training ${PRIMARY_FORCE_TRAINING} \ + --how-long ${HOW_LONG} \ + --exp-name-prefix "${EXP_NAME_PREFIX}" \ + --low-perfs ${L} \ + --med-perfs ${M} \ + --high-perfs ${H} \ \ No newline at end of file diff --git a/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh index 00eaf83..8ce7625 100644 --- a/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh +++ b/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh @@ -14,10 +14,10 @@ L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0" L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0" L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0" -# M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0" -# M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0" -# M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0" -# M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0" +M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_16.0" +M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_14.0" +M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_108.0" +M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_90.0" H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best" H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best" diff --git a/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh new file mode 100644 index 0000000..7614f97 --- /dev/null +++ b/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh @@ -0,0 +1,67 @@ +#!/bin/sh + +ALGO="best_EGO_with_CAP" +TEAMMATES_LEN=1 +HOW_LONG=20 +NUM_OF_CKPOINTS=40 +LAYOUT_NAMES="c4" +EXP_DIR="${LAYOUT_NAMES}_best_EGO_with_CAP" +TOTAL_EGO_AGENTS=1 +QUICK_TEST=false + +L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0" +L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0" +L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0" +L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0" + +M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0" +M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0" +M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0" +M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0" + +H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best" +H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best" +H2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/best" +H3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/best" + +L="${L0},${L1},${L2},${L3}" +M="${M0},${M1},${M2},${M3}" +H="${H0},${H1},${H2},${H3}" + +WANDB_MODE="online" +POP_FORCE_TRAINING=false +ADVERSARY_FORCE_TRAINING=false +PRIMARY_FORCE_TRAINING=false + +source scripts/bash_scripts/env_config.sh + +python scripts/train_agents.py \ + --layout-names ${LAYOUT_NAMES} \ + --algo-name ${ALGO} \ + --exp-dir ${EXP_DIR} \ + --num-of-ckpoints ${NUM_OF_CKPOINTS} \ + --teammates-len ${TEAMMATES_LEN} \ + --num-players ${NUM_PLAYERS} \ + --custom-agent-ck-rate-generation ${CUSTOM_AGENT_CK_RATE_GENERATION} \ + --num-steps-in-traj-for-dyn-adv ${NUM_STEPS_IN_TRAJ_FOR_DYN_ADV} \ + --num-static-advs-per-heatmap ${NUM_STATIC_ADVS_PER_HEATMAP} \ + --num-dynamic-advs-per-heatmap ${NUM_DYNAMIC_ADVS_PER_HEATMAP} \ + --use-val-func-for-heatmap-gen ${USE_VAL_FUNC_FOR_HEATMAP_GEN} \ + --prioritized-sampling ${PRIORITIZED_SAMPLING} \ + --n-envs ${N_ENVS} \ + --epoch-timesteps ${EPOCH_TIMESTEPS} \ + --pop-total-training-timesteps ${POP_TOTAL_TRAINING_TIMESTEPS} \ + --n-x-sp-total-training-timesteps ${N_X_SP_TOTAL_TRAINING_TIMESTEPS} \ + --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ + --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ + --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ + --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --wandb-mode ${WANDB_MODE} \ + --pop-force-training ${POP_FORCE_TRAINING} \ + --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ + --primary-force-training ${PRIMARY_FORCE_TRAINING} \ + --how-long ${HOW_LONG} \ + --exp-name-prefix "${EXP_NAME_PREFIX}" \ + --low-perfs ${L} \ + --med-perfs ${M} \ + --high-perfs ${H} \ \ No newline at end of file diff --git a/scripts/bash_scripts/test_run.sh b/scripts/bash_scripts/test_run.sh new file mode 100644 index 0000000..29b9a30 --- /dev/null +++ b/scripts/bash_scripts/test_run.sh @@ -0,0 +1,53 @@ +#!/bin/sh + +ALGO="SPN_XSPCKP" +TEAMMATES_LEN=1 +NUM_PLAYERS=$((TEAMMATES_LEN + 1)) +NUM_OF_CKPOINTS=10 +LAYOUT_NAMES="counter_circuit" +EXP_DIR="$NUM_PLAYERS" # When quick_test=True this will be overwritten to "Test/$EXP_DIR" +TOTAL_EGO_AGENTS=4 +QUICK_TEST=true +HOW_LONG=1 + +POP_FORCE_TRAINING=false +ADVERSARY_FORCE_TRAINING=false +PRIMARY_FORCE_TRAINING=false +# EXP_NAME_PREFIX="test_" + +source scripts/bash_scripts/env_config.sh +# Overwrite the default values from env_config here if needed +N_ENVS=5 +WANDB_MODE="disabled" +EPOCH_TIMESTEPS=3500 +N_X_SP_TOTAL_TRAINING_TIMESTEPS=10000 +FCP_TOTAL_TRAINING_TIMESTEPS=75000 + + +python scripts/train_agents.py \ + --layout-names ${LAYOUT_NAMES} \ + --algo-name ${ALGO} \ + --exp-dir ${EXP_DIR} \ + --num-of-ckpoints ${NUM_OF_CKPOINTS} \ + --teammates-len ${TEAMMATES_LEN} \ + --num-players ${NUM_PLAYERS} \ + --custom-agent-ck-rate-generation ${CUSTOM_AGENT_CK_RATE_GENERATION} \ + --num-steps-in-traj-for-dyn-adv ${NUM_STEPS_IN_TRAJ_FOR_DYN_ADV} \ + --num-static-advs-per-heatmap ${NUM_STATIC_ADVS_PER_HEATMAP} \ + --num-dynamic-advs-per-heatmap ${NUM_DYNAMIC_ADVS_PER_HEATMAP} \ + --use-val-func-for-heatmap-gen ${USE_VAL_FUNC_FOR_HEATMAP_GEN} \ + --prioritized-sampling ${PRIORITIZED_SAMPLING} \ + --n-envs ${N_ENVS} \ + --epoch-timesteps ${EPOCH_TIMESTEPS} \ + --pop-total-training-timesteps ${POP_TOTAL_TRAINING_TIMESTEPS} \ + --n-x-sp-total-training-timesteps ${N_X_SP_TOTAL_TRAINING_TIMESTEPS} \ + --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ + --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ + --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ + --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --wandb-mode ${WANDB_MODE} \ + --pop-force-training ${POP_FORCE_TRAINING} \ + --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ + --primary-force-training ${PRIMARY_FORCE_TRAINING} \ + --how-long ${HOW_LONG} \ + --exp-name-prefix "${EXP_NAME_PREFIX}" \ \ No newline at end of file diff --git a/scripts/run_overcooked_game.py b/scripts/run_overcooked_game.py index 4ed0dda..bb03624 100644 --- a/scripts/run_overcooked_game.py +++ b/scripts/run_overcooked_game.py @@ -17,29 +17,36 @@ def get_teammate_from_pop_file(tm_name, tm_score, pop_path, layout_name): args = get_arguments() args.num_players = 2 - args.layout = f'counter_circuit' + args.layout = f'c1' args.p_idx = 0 args.layout_names = [args.layout] args.n_envs = 1 - # teammates_path = [ + teammates_path = [ + # 'agent_models/c1_v4/SP_s1010_h256_tr[SP]_ran/best' + # 'agent_models/c1_best_EGO/best_c1/best' + + 'agent_models/c4_v4/SP_s1010_h256_tr[SP]_ran/best' + # 'agent_models/c4_best_EGO/best_c4/best' + # 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', # green # 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', # orange # 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', # 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', # 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', - # ] - # teammates = [load_agent(Path(tm_path), args) for tm_path in teammates_path[:args.num_players - 1]] + ] + teammates = [load_agent(Path(tm_path), args) for tm_path in teammates_path[:args.num_players - 1]] # trajectories = tile locations. Top left of the layout is (0, 0), bottom right is (M, N) - teammates = [CustomAgent(args=args, name='human', trajectories={args.layout: [(1, 1), (1, 2)]})] + teammates = [CustomAgent(args=args, name='human', trajectories={args.layout: [(2, 1), (3, 1)]})] # teammates = [DummyAgent(action='random') for _ in range(args.num_players - 1)] # player_path = 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best' + # player_path = 'agent_models/c4_best_EGO/best_c4/best' # player = load_agent(Path(player_path), args) # player = teammates[0] player = 'human' # blue - dc = OvercookedGUI(args, agent=player, teammates=teammates, layout_name=args.layout, p_idx=args.p_idx, fps=10, + dc = OvercookedGUI(args, agent=player, teammates=teammates, layout_name=args.layout, p_idx=args.p_idx, fps=50, horizon=400, gif_name=args.layout) dc.on_execute() diff --git a/scripts/train_agents.py b/scripts/train_agents.py index 74c68a5..3b4b52e 100644 --- a/scripts/train_agents.py +++ b/scripts/train_agents.py @@ -267,7 +267,7 @@ def SPN_XSPCKP(args) -> None: ) -def best_EGO(args) -> None: +def best_EGO(args, add_adv=False) -> None: '''only for 2 players''' primary_train_types = [ TeamType.SELF_PLAY_HIGH, @@ -295,6 +295,7 @@ def best_EGO(args) -> None: curriculum=curriculum, primary_eval_types=primary_eval_types, primary_train_types=curriculum.train_types, + add_adv=add_adv ) @@ -311,7 +312,10 @@ def best_EGO(args) -> None: FCP_traditional(args=args) elif args.algo_name == 'best_EGO': - best_EGO(args=args) + best_EGO(args=args, add_adv=False) + + elif args.algo_name == 'best_EGO_with_CAP': + best_EGO(args=args, add_adv=True) # elif args.algo_name == 'FCP_mhri': # FCP_mhri(args=args) diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py index 6a26478..78c1220 100644 --- a/scripts/utils/train_helper.py +++ b/scripts/utils/train_helper.py @@ -530,7 +530,7 @@ def get_N_X_FCP_agents( -def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculum): +def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculum, add_adv=False): '''Code purposed for a very specific experiment, assumes n_players = 2''' from pathlib import Path @@ -565,13 +565,32 @@ def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculu if ttype in eval_collection[layout_name]: eval_collection[layout_name][ttype] = [[agent]] + name = f'best_{args.layout_names[0]}' + + if add_adv: + random_pos = { + 'c1': [(1, 1), (2, 1), (3, 1), (4, 1), (1, 2), (2, 2), (3, 2), (4, 2)], + 'c2': [(2, 1), (4, 1), (6, 1), (1, 2), (7, 2), (2, 3), (4, 3), (6, 3)], + 'c3': [(2, 1), (4, 1), (6, 1), (1, 2), (7, 2), (2, 3), (4, 3), (6, 3)], + 'c4': [(3, 1), (5, 1), (7, 1), (1, 2), (9, 2), (3, 3), (5, 3), (7, 3)], + } + + custom_agents = [] + for adv_idx in range(len(random_pos[args.layout_names[0]])): + start_position = {layout: [random_pos[layout][adv_idx]] for layout in args.layout_names} + custom_agents.append([CustomAgent(args=args, name=f'SA{adv_idx}', trajectories=start_position)]) + + train_collection[args.layout_names[0]][TeamType.SELF_PLAY_STATIC_ADV] = custom_agents + eval_collection[args.layout_names[0]][TeamType.SELF_PLAY_STATIC_ADV] = custom_agents + name = f'best_{args.layout_names[0]}_adv' + teammates_collection = { TeammatesCollection.TRAIN: train_collection, TeammatesCollection.EVAL: eval_collection } best_ego_trainer = RLAgentTrainer( - name=f'best_{args.layout_names[0]}', + name=name, args=args, agent=None, teammates_collection=teammates_collection, @@ -587,7 +606,10 @@ def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculu ) best_ego_trainer.train_agents( - total_train_timesteps=args.n_x_sp_total_training_timesteps, + total_train_timesteps=args.n_x_fcp_total_training_timesteps, tag_for_returning_agent=tag ) - + + + + From 968967ecbb665db13bb599b8b8a56aca7b64a29c Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Mon, 24 Mar 2025 18:39:26 -0600 Subject: [PATCH 18/26] Fix bash scripts --- .../bash_scripts/best_baseline_experiment/c2_best_CAP.sh | 8 ++++---- .../bash_scripts/best_baseline_experiment/c3_best_CAP.sh | 8 ++++---- .../bash_scripts/best_baseline_experiment/c4_best_CAP.sh | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh index 1d67a84..ce43e2f 100644 --- a/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh +++ b/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh @@ -14,10 +14,10 @@ L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0" L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0" L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0" -M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0" -M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0" -M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0" -M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0" +M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0" +M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_256.0" +M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_178.0" +M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_186.0" H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best" H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best" diff --git a/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh index 0773e27..84b6152 100644 --- a/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh +++ b/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh @@ -14,10 +14,10 @@ L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0" L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0" L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0" -M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0" -M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0" -M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0" -M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0" +M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_16.0" +M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_14.0" +M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_108.0" +M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_90.0" H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best" H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best" diff --git a/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh index 7614f97..2d35934 100644 --- a/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh +++ b/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh @@ -14,10 +14,10 @@ L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0" L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0" L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0" -M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0" -M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0" -M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0" -M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0" +M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_2_rew_192.0" +M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_2_rew_118.0" +M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_54.0" +M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_104.0" H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best" H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best" From 9b17b76d072a51c85e91c9601caf7063e07d7dd0 Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Mon, 24 Mar 2025 19:36:13 -0600 Subject: [PATCH 19/26] Code to calculate resource usage --- oai_agents/common/overcooked_gui.py | 57 +++++++++++++++++++++++++++++ scripts/run_overcooked_game.py | 10 ++--- 2 files changed, 62 insertions(+), 5 deletions(-) diff --git a/oai_agents/common/overcooked_gui.py b/oai_agents/common/overcooked_gui.py index 62ae4e8..b211dd0 100644 --- a/oai_agents/common/overcooked_gui.py +++ b/oai_agents/common/overcooked_gui.py @@ -120,6 +120,22 @@ def __init__(self, args, layout_name=None, agent=None, teammates=None, p_idx=0, self.gif_name = gif_name if not os.path.exists(f'data/screenshots/{self.gif_name}'): os.makedirs(f'data/screenshots/{self.gif_name}') + + + self.resource_locations = {} + for y, row in enumerate(self.env.env.mdp.terrain_mtx): + for x, cell in enumerate(row): + if cell in ['S', 'D', 'P', 'O']: + self.resource_locations[(x, y)] = cell + + self.resource_usage = { + agent_idx: {pos: 0 for pos in self.resource_locations} + for agent_idx in range(len(self.env.state.players)) + } + + print(f"Resource locations: {self.resource_locations}") + + def start_screen(self): pygame.init() @@ -211,6 +227,17 @@ def step_env(self, agent_action): completed_task = calculate_completed_subtask(prev_obj, curr_obj, tile_in_front) # print('----', completed_task) + joint_action = self.env.get_joint_action() + for idx, player in enumerate(self.env.state.players): + # pos_in_front = facing(self.env.env.mdp.terrain_mtx, player) + + x, y = player.position[0] + player.orientation[0], player.position[1] + player.orientation[1] + pos_in_front = (x, y) + + action = joint_action[idx] + if action == Action.INTERACT: + if pos_in_front in self.resource_locations: + self.resource_usage[idx][pos_in_front] += 1 # Log data curr_reward = sum(info['sparse_r_by_agent']) @@ -301,6 +328,36 @@ def on_execute(self): self.on_cleanup() print(f'Trial finished in {self.curr_tick} steps with total reward {self.score}') + # print("Resource usage breakdown by agent and resource position:") + # for agent_idx, usage in self.resource_usage.items(): + # print(f"Agent {agent_idx}:") + # for pos, count in usage.items(): + # if count > 0: + # res_type = self.resource_locations[pos] + # print(f" {res_type} at {pos}: {count} times") + + from collections import defaultdict + + # Step 1: Gather all resource locations and types + all_resource_entries = [] + for pos, res_type in self.resource_locations.items(): + all_resource_entries.append((res_type, pos)) + + # Step 2: Sort by resource type then position + all_resource_entries.sort(key=lambda x: (x[0], x[1])) # Sort by type, then position + + # Step 3: Print header and values + print("Resource usage comparison (Agent 0 vs Agent 1):\n") + print(f"{'Type':<4} {'Position':<10} | {'Agent 0':<8} {'Agent 1':<8}") + print("-" * 36) + + for res_type, pos in all_resource_entries: + a0_count = self.resource_usage[0].get(pos, 0) + a1_count = self.resource_usage[1].get(pos, 0) + if a0_count > 0 or a1_count > 0: # Only show if someone used it + print(f"{res_type:<4} {str(pos):<10} | {a0_count:<8} {a1_count:<8}") + + def save_trajectory(self, data_path): df = pd.DataFrame(self.trajectory) df.to_pickle(data_path / f'{self.layout_name}.{self.trial_id}.pickle') \ No newline at end of file diff --git a/scripts/run_overcooked_game.py b/scripts/run_overcooked_game.py index bb03624..e0b9cbf 100644 --- a/scripts/run_overcooked_game.py +++ b/scripts/run_overcooked_game.py @@ -24,9 +24,9 @@ def get_teammate_from_pop_file(tm_name, tm_score, pop_path, layout_name): teammates_path = [ # 'agent_models/c1_v4/SP_s1010_h256_tr[SP]_ran/best' - # 'agent_models/c1_best_EGO/best_c1/best' + 'agent_models/c1_best_EGO/best_c1/best' - 'agent_models/c4_v4/SP_s1010_h256_tr[SP]_ran/best' + # 'agent_models/c4_v4/SP_s1010_h256_tr[SP]_ran/best' # 'agent_models/c4_best_EGO/best_c4/best' # 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', # green @@ -38,14 +38,14 @@ def get_teammate_from_pop_file(tm_name, tm_score, pop_path, layout_name): teammates = [load_agent(Path(tm_path), args) for tm_path in teammates_path[:args.num_players - 1]] # trajectories = tile locations. Top left of the layout is (0, 0), bottom right is (M, N) - teammates = [CustomAgent(args=args, name='human', trajectories={args.layout: [(2, 1), (3, 1)]})] + # teammates = [CustomAgent(args=args, name='human', trajectories={args.layout: [(2, 1), (3, 1)]})] # teammates = [DummyAgent(action='random') for _ in range(args.num_players - 1)] # player_path = 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best' # player_path = 'agent_models/c4_best_EGO/best_c4/best' # player = load_agent(Path(player_path), args) - # player = teammates[0] - player = 'human' # blue + player = teammates[0] + # player = 'human' # blue dc = OvercookedGUI(args, agent=player, teammates=teammates, layout_name=args.layout, p_idx=args.p_idx, fps=50, horizon=400, gif_name=args.layout) From 260ee9a103c0906ae401c4f20b252717373c1aeb Mon Sep 17 00:00:00 2001 From: Ava Abderezaei Date: Wed, 26 Mar 2025 18:35:12 -0600 Subject: [PATCH 20/26] fix sp tc --- oai_agents/agents/hrl.py | 2 +- oai_agents/agents/mep_population_manager.py | 6 +-- oai_agents/agents/rl.py | 14 +++-- oai_agents/common/arguments.py | 8 +-- oai_agents/common/multi_setup_trainer.py | 51 ++++++++++++++----- oai_agents/common/population.py | 36 ++++++------- oai_agents/common/teammates_collection.py | 2 +- sandbox/generate_agents_for_eval.py | 6 +-- .../best_baseline_experiment/MEP_POP_c1.sh | 4 +- .../best_baseline_experiment/MEP_POP_c2.sh | 4 +- .../best_baseline_experiment/MEP_POP_c3.sh | 4 +- .../best_baseline_experiment/MEP_POP_c4.sh | 4 +- .../best_baseline_experiment/SP_c1_v1.sh | 4 +- .../best_baseline_experiment/SP_c1_v2.sh | 4 +- .../best_baseline_experiment/SP_c1_v3.sh | 4 +- .../best_baseline_experiment/SP_c1_v4.sh | 4 +- .../best_baseline_experiment/SP_c2_v1.sh | 4 +- .../best_baseline_experiment/SP_c2_v2.sh | 4 +- .../best_baseline_experiment/SP_c2_v3.sh | 4 +- .../best_baseline_experiment/SP_c2_v4.sh | 4 +- .../best_baseline_experiment/SP_c3_v1.sh | 4 +- .../best_baseline_experiment/SP_c3_v2.sh | 4 +- .../best_baseline_experiment/SP_c3_v3.sh | 4 +- .../best_baseline_experiment/SP_c3_v4.sh | 4 +- .../best_baseline_experiment/SP_c4_v1.sh | 4 +- .../best_baseline_experiment/SP_c4_v2.sh | 4 +- .../best_baseline_experiment/SP_c4_v3.sh | 4 +- .../best_baseline_experiment/SP_c4_v4.sh | 4 +- .../best_baseline_experiment/c1_best_CAP.sh | 4 +- .../best_baseline_experiment/c1_best_ego.sh | 4 +- .../best_baseline_experiment/c2_best_CAP.sh | 4 +- .../best_baseline_experiment/c2_best_ego.sh | 4 +- .../best_baseline_experiment/c3_best_CAP.sh | 4 +- .../best_baseline_experiment/c3_best_ego.sh | 4 +- .../best_baseline_experiment/c4_best_CAP.sh | 4 +- .../best_baseline_experiment/c4_best_ego.sh | 4 +- scripts/bash_scripts/classic_CAP_2_player.sh | 11 ++-- scripts/bash_scripts/classic_FCP_2_player.sh | 11 ++-- scripts/bash_scripts/profile.sh | 5 +- scripts/bash_scripts/test_run.sh | 12 +++-- scripts/train_agents.py | 27 +++++----- scripts/train_agents_without_bashing.py | 4 +- scripts/utils/train_helper.py | 8 +-- 43 files changed, 182 insertions(+), 133 deletions(-) diff --git a/oai_agents/agents/hrl.py b/oai_agents/agents/hrl.py index f0ee0ef..04c76cb 100644 --- a/oai_agents/agents/hrl.py +++ b/oai_agents/agents/hrl.py @@ -1,5 +1,5 @@ from oai_agents.agents.base_agent import OAIAgent, PolicyClone -from oai_agents.agents.rl import RLAgentTrainer, VEC_ENV_CLS +from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.arguments import get_args_to_save, set_args_from_load from oai_agents.common.subtasks import Subtasks # from oai_agents.gym_environments.worker_env import OvercookedSubtaskGymEnv diff --git a/oai_agents/agents/mep_population_manager.py b/oai_agents/agents/mep_population_manager.py index bffb060..e3040b7 100644 --- a/oai_agents/agents/mep_population_manager.py +++ b/oai_agents/agents/mep_population_manager.py @@ -19,7 +19,7 @@ def __init__(self, population_size, args): self.epoch_timesteps = args.epoch_timesteps # Number of timesteps per training episode seeds, h_dims = generate_hdim_and_seed( for_evaluation=args.gen_pop_for_eval, - total_ego_agents=population_size + total_sp_agents=population_size ) self.population: List[RLAgentTrainer] = [] @@ -204,9 +204,9 @@ def train_population(self, total_timesteps: int, num_of_ckpoints: int, eval_inte set_input(args=args) - args.total_ego_agents = 4 + args.total_sp_agents = 4 - manager = MEPPopulationManager(population_size=args.total_ego_agents, args=args) + manager = MEPPopulationManager(population_size=args.total_sp_agents, args=args) manager.train_population( total_timesteps=args.pop_total_training_timesteps, num_of_ckpoints=args.num_of_ckpoints, diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index 9b6559d..f9fde02 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -9,16 +9,12 @@ import numpy as np from stable_baselines3 import PPO, DQN from stable_baselines3.common.env_util import make_vec_env -from stable_baselines3.common.vec_env import DummyVecEnv +from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv from sb3_contrib import RecurrentPPO import wandb import os from typing import Literal -# VEC_ENV_CLS = DummyVecEnv # -VEC_ENV_CLS = SubprocVecEnv - - class RLAgentTrainer(OAITrainer): ''' Train an RL agent to play with a teammates_collection of agents.''' def __init__( @@ -33,6 +29,8 @@ def __init__( ): train_types = train_types if train_types is not None else [] eval_types = eval_types if eval_types is not None else [] + + # assert teammates_collection, "Teammates collection must be provided" name = name or 'rl_agent' super(RLAgentTrainer, self).__init__(name, args, seed=seed) @@ -140,6 +138,12 @@ def print_tc_helper(self, teammates_collection, message=None): def get_envs(self, _env, _eval_envs, deterministic, learner_type, teammates_collection, curriculum, start_timestep: int = 0): from oai_agents.gym_environments.base_overcooked_env import OvercookedGymEnv + + if self.args.use_multipleprocesses: + VEC_ENV_CLS = SubprocVecEnv + else: + VEC_ENV_CLS = DummyVecEnv + if _env is None: env_kwargs = {'shape_rewards': True, 'full_init': False, 'stack_frames': self.use_frame_stack, 'deterministic': deterministic,'args': self.args, 'learner_type': learner_type, 'start_timestep': start_timestep, diff --git a/oai_agents/common/arguments.py b/oai_agents/common/arguments.py index ff45dad..c084fd8 100644 --- a/oai_agents/common/arguments.py +++ b/oai_agents/common/arguments.py @@ -98,7 +98,10 @@ def get_arguments(additional_args: Optional[List] = None): parser.add_argument("--custom-agent-ck-rate-generation", type=int) parser.add_argument('--gen-pop-for-eval', type=str2bool, default=False, help="Specifies whether to generate a population of agents for evaluation purposes. Currently, this functionality is limited to self-play agents, as support for other methods has not yet been implemented..)") - parser.add_argument("--total-ego-agents", type=int, default=4) + parser.add_argument('--use-cuda', type=str2bool, help="Specifies whether to use cuda for training.") + parser.add_argument('--use-multipleprocesses', type=str2bool, help="SubprocVecEnv vs DummyVecEnv") + + parser.add_argument("--total-sp-agents", type=int, default=4) parser.add_argument("--ck-list-offset", type=int, default=0) parser.add_argument('--low-perfs', help='shitty code to run ult baseline exp', default='default') @@ -112,8 +115,7 @@ def get_arguments(additional_args: Optional[List] = None): args = parser.parse_args() args.base_dir = Path(args.base_dir) - # args.device = th.device('cuda' if th.cuda.is_available() else 'cpu') - args.device = th.device('cpu') + args.device = th.device('cuda' if args.use_cuda and th.cuda.is_available() else 'cpu') args.layout_names = args.layout_names.split(',') args.low_perfs = args.low_perfs.split(',') diff --git a/oai_agents/common/multi_setup_trainer.py b/oai_agents/common/multi_setup_trainer.py index d443f07..1ea8cbb 100644 --- a/oai_agents/common/multi_setup_trainer.py +++ b/oai_agents/common/multi_setup_trainer.py @@ -1,8 +1,10 @@ import concurrent.futures +import dill + from scripts.utils.common import generate_name from oai_agents.common.tags import Prefix from oai_agents.agents.rl import RLAgentTrainer -import dill +from oai_agents.common.teammates_collection import generate_TC class MultiSetupTrainer: @@ -21,7 +23,7 @@ def __init__( self.tag_for_returning_agent = tag_for_returning_agent self.parallel = args.parallel - self.total_ego_agents = args.total_ego_agents + self.total_sp_agents = args.total_sp_agents self.for_evaluation = args.gen_pop_for_eval def get_trained_agent(self, seed, h_dim): @@ -31,10 +33,10 @@ def get_multiple_trained_agents(self): agents = [] seeds, hdims = generate_hdim_and_seed( - for_evaluation=self.for_evaluation, total_ego_agents=self.total_ego_agents) + for_evaluation=self.for_evaluation, total_sp_agents=self.total_sp_agents) inputs = [ (seeds[i], hdims[i]) - for i in range(self.total_ego_agents) + for i in range(self.total_sp_agents) ] if self.args.parallel: @@ -117,9 +119,30 @@ def get_trained_agent(self, seed, h_dim): curriculum=self.curriculum ) + # print('before generate_randomly_initialized_agent') + init_agent = RLAgentTrainer.generate_randomly_initialized_agent( # need a cleaner way to do this + args=self.args, + name=name, + learner_type=self.args.primary_learner_type, + hidden_dim=h_dim, + seed=seed, + n_envs=self.args.n_envs + ) + + population = {layout_name: [] for layout_name in self.args.layout_names} + teammates_collection = generate_TC(args=self.args, + population=population, + agent=init_agent, + train_types=self.train_types, + eval_types_to_generate=self.eval_types['generate'], + eval_types_to_read_from_file=self.eval_types['load'], + unseen_teammates_len=0, + use_entire_population_for_train_types_teammates=True) + + return self.get_reinforcement_agent( name=name, - teammates_collection={}, + teammates_collection=teammates_collection, curriculum=self.curriculum, h_dim=h_dim, seed=seed, @@ -128,7 +151,7 @@ def get_trained_agent(self, seed, h_dim): total_train_timesteps=self.args.pop_total_training_timesteps, ) -def generate_hdim_and_seed(for_evaluation: bool, total_ego_agents: int): +def generate_hdim_and_seed(for_evaluation: bool, total_sp_agents: int): evaluation_seeds = [3031, 4041, 5051, 3708, 3809, 3910, 4607, 5506] evaluation_hdims = [256] * len(evaluation_seeds) @@ -139,23 +162,23 @@ def generate_hdim_and_seed(for_evaluation: bool, total_ego_agents: int): training_hdims = [256] * len(training_seeds) if for_evaluation: - assert total_ego_agents <= len(evaluation_seeds), ( - f"Total ego agents ({total_ego_agents}) cannot exceed the number of evaluation seeds ({len(evaluation_seeds)}). " + assert total_sp_agents <= len(evaluation_seeds), ( + f"Total ego agents ({total_sp_agents}) cannot exceed the number of evaluation seeds ({len(evaluation_seeds)}). " "Please either increase the number of evaluation seeds in the `generate_hdim_and_seed` function or decrease " - f"`self.total_ego_agents` (currently set to {total_ego_agents}, based on `args.total_ego_agents`)." + f"`self.total_sp_agents` (currently set to {total_sp_agents}, based on `args.total_sp_agents`)." ) seeds = evaluation_seeds hdims = evaluation_hdims else: - assert total_ego_agents <= len(training_seeds), ( - f"Total ego agents ({total_ego_agents}) cannot exceed the number of training seeds ({len(training_seeds)}). " + assert total_sp_agents <= len(training_seeds), ( + f"Total ego agents ({total_sp_agents}) cannot exceed the number of training seeds ({len(training_seeds)}). " "Please either increase the number of training seeds in the `generate_hdim_and_seed` function or decrease " - f"`self.total_ego_agents` (currently set to {total_ego_agents}, based on `args.total_ego_agents`)." + f"`self.total_sp_agents` (currently set to {total_sp_agents}, based on `args.total_sp_agents`)." ) seeds = training_seeds hdims = training_hdims - selected_seeds = seeds[:total_ego_agents] - selected_hdims = hdims[:total_ego_agents] + selected_seeds = seeds[:total_sp_agents] + selected_hdims = hdims[:total_sp_agents] return selected_seeds, selected_hdims diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 82f7160..25c450f 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -68,11 +68,11 @@ def train_SP_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_d def ensure_enough_SP_agents(teammates_len, train_types, eval_types, - total_ego_agents, + total_sp_agents, unseen_teammates_len=0, # only used for SPX teamtypes ): - total_population_len = len(AgentPerformance.ALL) * total_ego_agents + total_population_len = len(AgentPerformance.ALL) * total_sp_agents train_agents_len, eval_agents_len = 0, 0 @@ -93,14 +93,14 @@ def ensure_enough_SP_agents(teammates_len, eval_agents_len += unseen_teammates_len assert total_population_len >= train_agents_len + eval_agents_len, "Not enough agents to train and evaluate." \ - " Should increase total_ego_agents." \ + " Should increase total_sp_agents." \ f" Total population len: {total_population_len}," \ f" train_agents len: {train_agents_len}," \ f" eval_agents len: {eval_agents_len}, "\ - f" total_ego_agents: {total_ego_agents}." + f" total_sp_agents: {total_sp_agents}." -def generate_hdim_and_seed(for_evaluation: bool, total_ego_agents: int): +def generate_hdim_and_seed(for_evaluation: bool, total_sp_agents: int): ''' Generates lists of seeds and hidden dimensions for a given number of agents for training or evaluation. @@ -111,7 +111,7 @@ def generate_hdim_and_seed(for_evaluation: bool, total_ego_agents: int): Arguments: for_evaluation -- a boolean indicating whether to generate settings for evluation (True) or training (False). - total_ego_agents -- the number of (hidden_dim, seed) pairs to generate. + total_sp_agents -- the number of (hidden_dim, seed) pairs to generate. Returns: selected_seeds -- list of selected seeds @@ -128,25 +128,25 @@ def generate_hdim_and_seed(for_evaluation: bool, total_ego_agents: int): # Select appropriate predefined settings based on the input setting if for_evaluation: - assert total_ego_agents <= len(evaluation_seeds), ( - f"Total ego agents ({total_ego_agents}) cannot exceed the number of evaluation seeds ({len(evaluation_seeds)}). " + assert total_sp_agents <= len(evaluation_seeds), ( + f"Total ego agents ({total_sp_agents}) cannot exceed the number of evaluation seeds ({len(evaluation_seeds)}). " "Please either increase the number of evaluation seeds in the `generate_hdim_and_seed` function or decrease " - f"`self.total_ego_agents` (currently set to {total_ego_agents}, based on `args.total_ego_agents`)." + f"`self.total_sp_agents` (currently set to {total_sp_agents}, based on `args.total_sp_agents`)." ) seeds = evaluation_seeds hdims = evaluation_hdims else: - assert total_ego_agents <= len(training_seeds), ( - f"Total ego agents ({total_ego_agents}) cannot exceed the number of training seeds ({len(training_seeds)}). " + assert total_sp_agents <= len(training_seeds), ( + f"Total ego agents ({total_sp_agents}) cannot exceed the number of training seeds ({len(training_seeds)}). " "Please either increase the number of training seeds in the `generate_hdim_and_seed` function or decrease " - f"`self.total_ego_agents` (currently set to {total_ego_agents}, based on `args.total_ego_agents`)." + f"`self.total_sp_agents` (currently set to {total_sp_agents}, based on `args.total_sp_agents`)." ) seeds = training_seeds hdims = training_hdims # Initialize selected lists - selected_seeds = seeds[:total_ego_agents] - selected_hdims = hdims[:total_ego_agents] + selected_seeds = seeds[:total_sp_agents] + selected_hdims = hdims[:total_sp_agents] return selected_seeds, selected_hdims @@ -175,7 +175,7 @@ def get_performance_based_population_by_layouts( total_training_timesteps, train_types, eval_types, - total_ego_agents, + total_sp_agents, unseen_teammates_len=0, force_training=False, tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL, @@ -198,14 +198,14 @@ def get_performance_based_population_by_layouts( unseen_teammates_len=unseen_teammates_len, train_types=train_types, eval_types=eval_types, - total_ego_agents=total_ego_agents + total_sp_agents=total_sp_agents ) seed, h_dim = generate_hdim_and_seed( - for_evaluation=args.gen_pop_for_eval, total_ego_agents=total_ego_agents) + for_evaluation=args.gen_pop_for_eval, total_sp_agents=total_sp_agents) inputs = [ (args, total_training_timesteps, ck_rate, seed[i], h_dim[i], True) - for i in range(total_ego_agents) + for i in range(total_sp_agents) ] diff --git a/oai_agents/common/teammates_collection.py b/oai_agents/common/teammates_collection.py index 1a4f823..7c64b7e 100644 --- a/oai_agents/common/teammates_collection.py +++ b/oai_agents/common/teammates_collection.py @@ -72,7 +72,7 @@ def get_teammates(agents_perftag_score:list, teamtypes:list, teammates_len:int, elif teamtype == TeamType.SELF_PLAY: assert agent is not None - all_teammates[teamtype] = [agent for _ in range(teammates_len)] + all_teammates[teamtype] = [[agent for _ in range(teammates_len)]] elif teamtype == TeamType.SELF_PLAY_HIGH: assert agent is not None diff --git a/sandbox/generate_agents_for_eval.py b/sandbox/generate_agents_for_eval.py index 01aeb47..894f095 100644 --- a/sandbox/generate_agents_for_eval.py +++ b/sandbox/generate_agents_for_eval.py @@ -32,7 +32,7 @@ def set_input(args, quick_test=False): args.epoch_timesteps = 1e5 args.pop_total_training_timesteps = 5e6 args.fcp_total_training_timesteps = 5e6 - args.total_ego_agents = 5 + args.total_sp_agents = 5 else: # Used for doing quick tests args.sb_verbose = 1 @@ -41,7 +41,7 @@ def set_input(args, quick_test=False): args.epoch_timesteps = 2 args.pop_total_training_timesteps = 3500 args.fcp_total_training_timesteps = 3500 - args.total_ego_agents = 4 + args.total_sp_agents = 4 if __name__ == "__main__": @@ -69,7 +69,7 @@ def set_input(args, quick_test=False): train_types = TeamType.ALL_TYPES_BESIDES_SP, eval_types_to_generate = [], eval_types_to_load_from_file = [], - total_ego_agents=args.total_ego_agents, + total_sp_agents=args.total_sp_agents, total_training_timesteps = args.pop_total_training_timesteps, force_training=pop_force_training, ) diff --git a/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c1.sh b/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c1.sh index da05e03..8a35dd5 100755 --- a/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c1.sh +++ b/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c1.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c1" EXP_DIR="${ALGO}_${LAYOUT_NAMES}/${NUM_PLAYERS}" -TOTAL_EGO_AGENTS=4 +TOTAL_SP_AGENTS=4 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c2.sh b/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c2.sh index cff658c..982c2f8 100755 --- a/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c2.sh +++ b/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c2.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c2" EXP_DIR="${ALGO}_${LAYOUT_NAMES}/${NUM_PLAYERS}" -TOTAL_EGO_AGENTS=4 +TOTAL_SP_AGENTS=4 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c3.sh b/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c3.sh index 8832d31..5ab19a9 100755 --- a/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c3.sh +++ b/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c3.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c3" EXP_DIR="${ALGO}_${LAYOUT_NAMES}/${NUM_PLAYERS}" -TOTAL_EGO_AGENTS=4 +TOTAL_SP_AGENTS=4 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c4.sh b/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c4.sh index d7e3bb7..6940cdf 100755 --- a/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c4.sh +++ b/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c4.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c4" EXP_DIR="${ALGO}_${LAYOUT_NAMES}/${NUM_PLAYERS}" -TOTAL_EGO_AGENTS=4 +TOTAL_SP_AGENTS=4 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c1_v1.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c1_v1.sh index 3497487..fddde4e 100755 --- a/scripts/bash_scripts/best_baseline_experiment/SP_c1_v1.sh +++ b/scripts/bash_scripts/best_baseline_experiment/SP_c1_v1.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c1_v1" EXP_DIR=${LAYOUT_NAMES} -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c1_v2.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c1_v2.sh index c34e5f6..2333a3f 100644 --- a/scripts/bash_scripts/best_baseline_experiment/SP_c1_v2.sh +++ b/scripts/bash_scripts/best_baseline_experiment/SP_c1_v2.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c1_v2" EXP_DIR=${LAYOUT_NAMES} -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c1_v3.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c1_v3.sh index 28a08aa..3b611aa 100644 --- a/scripts/bash_scripts/best_baseline_experiment/SP_c1_v3.sh +++ b/scripts/bash_scripts/best_baseline_experiment/SP_c1_v3.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c1_v3" EXP_DIR=${LAYOUT_NAMES} -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c1_v4.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c1_v4.sh index 17ed213..b6f60d9 100644 --- a/scripts/bash_scripts/best_baseline_experiment/SP_c1_v4.sh +++ b/scripts/bash_scripts/best_baseline_experiment/SP_c1_v4.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c1_v4" EXP_DIR=${LAYOUT_NAMES} -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c2_v1.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c2_v1.sh index eba16a9..100cd1e 100644 --- a/scripts/bash_scripts/best_baseline_experiment/SP_c2_v1.sh +++ b/scripts/bash_scripts/best_baseline_experiment/SP_c2_v1.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c2_v1" EXP_DIR=${LAYOUT_NAMES} -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c2_v2.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c2_v2.sh index b37ba98..46846a6 100644 --- a/scripts/bash_scripts/best_baseline_experiment/SP_c2_v2.sh +++ b/scripts/bash_scripts/best_baseline_experiment/SP_c2_v2.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c2_v2" EXP_DIR=${LAYOUT_NAMES} -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c2_v3.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c2_v3.sh index 654c80f..f967325 100644 --- a/scripts/bash_scripts/best_baseline_experiment/SP_c2_v3.sh +++ b/scripts/bash_scripts/best_baseline_experiment/SP_c2_v3.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c2_v3" EXP_DIR=${LAYOUT_NAMES} -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c2_v4.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c2_v4.sh index 021f073..44b343c 100644 --- a/scripts/bash_scripts/best_baseline_experiment/SP_c2_v4.sh +++ b/scripts/bash_scripts/best_baseline_experiment/SP_c2_v4.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c2_v4" EXP_DIR=${LAYOUT_NAMES} -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c3_v1.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c3_v1.sh index 7c65766..d92909e 100644 --- a/scripts/bash_scripts/best_baseline_experiment/SP_c3_v1.sh +++ b/scripts/bash_scripts/best_baseline_experiment/SP_c3_v1.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c3_v1" EXP_DIR=${LAYOUT_NAMES} -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c3_v2.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c3_v2.sh index 4a42883..8d05bc0 100644 --- a/scripts/bash_scripts/best_baseline_experiment/SP_c3_v2.sh +++ b/scripts/bash_scripts/best_baseline_experiment/SP_c3_v2.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c3_v2" EXP_DIR=${LAYOUT_NAMES} -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c3_v3.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c3_v3.sh index ca2ca22..e7d075e 100644 --- a/scripts/bash_scripts/best_baseline_experiment/SP_c3_v3.sh +++ b/scripts/bash_scripts/best_baseline_experiment/SP_c3_v3.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c3_v3" EXP_DIR=${LAYOUT_NAMES} -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c3_v4.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c3_v4.sh index c24659f..27884ba 100644 --- a/scripts/bash_scripts/best_baseline_experiment/SP_c3_v4.sh +++ b/scripts/bash_scripts/best_baseline_experiment/SP_c3_v4.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c3_v4" EXP_DIR=${LAYOUT_NAMES} -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c4_v1.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c4_v1.sh index b1fcff9..3f67782 100644 --- a/scripts/bash_scripts/best_baseline_experiment/SP_c4_v1.sh +++ b/scripts/bash_scripts/best_baseline_experiment/SP_c4_v1.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c4_v1" EXP_DIR=${LAYOUT_NAMES} -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c4_v2.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c4_v2.sh index 6133252..00935af 100644 --- a/scripts/bash_scripts/best_baseline_experiment/SP_c4_v2.sh +++ b/scripts/bash_scripts/best_baseline_experiment/SP_c4_v2.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c4_v2" EXP_DIR=${LAYOUT_NAMES} -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c4_v3.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c4_v3.sh index 90f5ad1..32bae60 100644 --- a/scripts/bash_scripts/best_baseline_experiment/SP_c4_v3.sh +++ b/scripts/bash_scripts/best_baseline_experiment/SP_c4_v3.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c4_v3" EXP_DIR=${LAYOUT_NAMES} -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c4_v4.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c4_v4.sh index dfe9068..f0e85ff 100644 --- a/scripts/bash_scripts/best_baseline_experiment/SP_c4_v4.sh +++ b/scripts/bash_scripts/best_baseline_experiment/SP_c4_v4.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c4_v4" EXP_DIR=${LAYOUT_NAMES} -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false WANDB_MODE="online" @@ -36,7 +36,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/c1_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c1_best_CAP.sh index 700668a..b723943 100644 --- a/scripts/bash_scripts/best_baseline_experiment/c1_best_CAP.sh +++ b/scripts/bash_scripts/best_baseline_experiment/c1_best_CAP.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c1" EXP_DIR="${LAYOUT_NAMES}_best_EGO_with_CAP" -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0" @@ -55,7 +55,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh index bff1e48..2679936 100644 --- a/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh +++ b/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c1" EXP_DIR="${LAYOUT_NAMES}_best_EGO" -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0" @@ -55,7 +55,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh index ce43e2f..79d5d55 100644 --- a/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh +++ b/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c2" EXP_DIR="${LAYOUT_NAMES}_best_EGO_with_CAP" -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0" @@ -55,7 +55,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh index dfa0c31..b66f6f2 100644 --- a/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh +++ b/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c2" EXP_DIR="${LAYOUT_NAMES}_best_EGO" -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0" @@ -55,7 +55,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh index 84b6152..5bbada8 100644 --- a/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh +++ b/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c3" EXP_DIR="${LAYOUT_NAMES}_best_EGO_with_CAP" -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0" @@ -55,7 +55,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh index 8ce7625..086716c 100644 --- a/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh +++ b/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c3" EXP_DIR="${LAYOUT_NAMES}_best_EGO" -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0" @@ -55,7 +55,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh index 2d35934..53b8a84 100644 --- a/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh +++ b/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c4" EXP_DIR="${LAYOUT_NAMES}_best_EGO_with_CAP" -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0" @@ -55,7 +55,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh index e7706bd..4696336 100644 --- a/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh +++ b/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh @@ -6,7 +6,7 @@ HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="c4" EXP_DIR="${LAYOUT_NAMES}_best_EGO" -TOTAL_EGO_AGENTS=1 +TOTAL_SP_AGENTS=1 QUICK_TEST=false L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0" @@ -55,7 +55,7 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/classic_CAP_2_player.sh b/scripts/bash_scripts/classic_CAP_2_player.sh index ff36617..d28e057 100644 --- a/scripts/bash_scripts/classic_CAP_2_player.sh +++ b/scripts/bash_scripts/classic_CAP_2_player.sh @@ -2,12 +2,15 @@ ALGO="SPN_XSPCKP" TEAMMATES_LEN=1 +NUM_PLAYERS=$((TEAMMATES_LEN + 1)) HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="counter_circuit,coordination_ring,cramped_room,asymmetric_advantages,forced_coordination" EXP_DIR="Classic/$NUM_PLAYERS" # When quick_test=True this will be overwritten to "Test/$EXP_DIR" -TOTAL_EGO_AGENTS=4 +TOTAL_SP_AGENTS=4 QUICK_TEST=false +USE_CUDA=false +USE_MULTIPLEPROCESSES=false POP_FORCE_TRAINING=false ADVERSARY_FORCE_TRAINING=false @@ -37,10 +40,12 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ --primary-force-training ${PRIMARY_FORCE_TRAINING} \ --how-long ${HOW_LONG} \ - --exp-name-prefix "${EXP_NAME_PREFIX}" \ \ No newline at end of file + --exp-name-prefix "${EXP_NAME_PREFIX}" \ + --use-cuda ${USE_CUDA} \ + --use-multipleprocesses ${USE_MULTIPLEPROCESSES} \ \ No newline at end of file diff --git a/scripts/bash_scripts/classic_FCP_2_player.sh b/scripts/bash_scripts/classic_FCP_2_player.sh index feb7125..da203b9 100644 --- a/scripts/bash_scripts/classic_FCP_2_player.sh +++ b/scripts/bash_scripts/classic_FCP_2_player.sh @@ -2,12 +2,15 @@ ALGO="FCP_traditional" TEAMMATES_LEN=1 +NUM_PLAYERS=$((TEAMMATES_LEN + 1)) HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="counter_circuit,coordination_ring,cramped_room,asymmetric_advantages,forced_coordination" EXP_DIR="Classic/$NUM_PLAYERS" # When quick_test=True this will be overwritten to "Test/$EXP_DIR" -TOTAL_EGO_AGENTS=4 +TOTAL_SP_AGENTS=4 QUICK_TEST=false +USE_CUDA=false +USE_MULTIPLEPROCESSES=false POP_FORCE_TRAINING=false ADVERSARY_FORCE_TRAINING=false @@ -37,10 +40,12 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ --primary-force-training ${PRIMARY_FORCE_TRAINING} \ --how-long ${HOW_LONG} \ - --exp-name-prefix "${EXP_NAME_PREFIX}" \ \ No newline at end of file + --exp-name-prefix "${EXP_NAME_PREFIX}" \ + --use-cuda ${USE_CUDA} \ + --use-multipleprocesses ${USE_MULTIPLEPROCESSES} \ \ No newline at end of file diff --git a/scripts/bash_scripts/profile.sh b/scripts/bash_scripts/profile.sh index dada864..ef328af 100644 --- a/scripts/bash_scripts/profile.sh +++ b/scripts/bash_scripts/profile.sh @@ -2,10 +2,11 @@ ALGO="SPN_XSPCKP" TEAMMATES_LEN=1 +NUM_PLAYERS=$((TEAMMATES_LEN + 1)) HOW_LONG=20 NUM_OF_CKPOINTS=40 LAYOUT_NAMES="counter_circuit" -TOTAL_EGO_AGENTS=2 +TOTAL_SP_AGENTS=2 POP_FORCE_TRAINING=false ADVERSARY_FORCE_TRAINING=false @@ -44,7 +45,7 @@ python -m cProfile -o data/profile/profile_results_all_${CURRENT_TIME}.prof scri --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ diff --git a/scripts/bash_scripts/test_run.sh b/scripts/bash_scripts/test_run.sh index 29b9a30..9c565df 100644 --- a/scripts/bash_scripts/test_run.sh +++ b/scripts/bash_scripts/test_run.sh @@ -1,14 +1,16 @@ #!/bin/sh -ALGO="SPN_XSPCKP" +ALGO="SP" TEAMMATES_LEN=1 NUM_PLAYERS=$((TEAMMATES_LEN + 1)) NUM_OF_CKPOINTS=10 LAYOUT_NAMES="counter_circuit" EXP_DIR="$NUM_PLAYERS" # When quick_test=True this will be overwritten to "Test/$EXP_DIR" -TOTAL_EGO_AGENTS=4 +TOTAL_SP_AGENTS=1 QUICK_TEST=true HOW_LONG=1 +USE_CUDA=false +USE_MULTIPLEPROCESSES=false POP_FORCE_TRAINING=false ADVERSARY_FORCE_TRAINING=false @@ -44,10 +46,12 @@ python scripts/train_agents.py \ --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \ --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \ --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \ - --total-ego-agents ${TOTAL_EGO_AGENTS} \ + --total-sp-agents ${TOTAL_SP_AGENTS} \ --wandb-mode ${WANDB_MODE} \ --pop-force-training ${POP_FORCE_TRAINING} \ --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \ --primary-force-training ${PRIMARY_FORCE_TRAINING} \ --how-long ${HOW_LONG} \ - --exp-name-prefix "${EXP_NAME_PREFIX}" \ \ No newline at end of file + --exp-name-prefix "${EXP_NAME_PREFIX}" \ + --use-cuda ${USE_CUDA} \ + --use-multipleprocesses ${USE_MULTIPLEPROCESSES} \ \ No newline at end of file diff --git a/scripts/train_agents.py b/scripts/train_agents.py index 8191013..f606516 100644 --- a/scripts/train_agents.py +++ b/scripts/train_agents.py @@ -22,7 +22,7 @@ def MEP_POPULATION(args): agents_finder = SelfPlayAgentsFinder(args=args) _, _, training_infos = agents_finder.get_agents_infos() if len(training_infos)==0: - manager = MEPPopulationManager(population_size=args.total_ego_agents, args=args) + manager = MEPPopulationManager(population_size=args.total_sp_agents, args=args) manager.train_population( total_timesteps=args.pop_total_training_timesteps, num_of_ckpoints=args.num_of_ckpoints, @@ -287,7 +287,7 @@ def SPN_XSPCKP(args) -> None: def best_EGO(args, add_adv=False) -> None: - '''only for 2 players''' + '''for a very specifric experimetn: only for 2 players:: ignore this''' primary_train_types = [ TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_MEDIUM, @@ -327,21 +327,24 @@ def best_EGO(args, add_adv=False) -> None: elif args.algo_name == 'SPN_XSPCKP': SPN_XSPCKP(args=args) + elif args.algo_name == 'MEP': + MEP_POPULATION(args=args) + elif args.algo_name == 'FCP_traditional': FCP_traditional(args=args) - elif args.algo_name == 'FCP_mhri': - FCP_mhri(args=args) + # elif args.algo_name == 'best_EGO': + # best_EGO(args=args, add_adv=False) - elif args.algo_name == 'SPN_1ADV': - SPN_1ADV(args=args) + # elif args.algo_name == 'FCP_mhri': + # FCP_mhri(args=args) - elif args.algo_name == 'N_1_FCP': - N_1_FCP(args=args) + # elif args.algo_name == 'SPN_1ADV': + # SPN_1ADV(args=args) - elif args.algo_name == 'SPN_1ADV_XSPCKP': - SPN_1ADV_XSPCKP(args=args) + # elif args.algo_name == 'N_1_FCP': + # N_1_FCP(args=args) - elif args.algo_name == 'MEP': - MEP_POPULATION(args=args) + # elif args.algo_name == 'SPN_1ADV_XSPCKP': + # SPN_1ADV_XSPCKP(args=args) diff --git a/scripts/train_agents_without_bashing.py b/scripts/train_agents_without_bashing.py index c747485..2be4414 100644 --- a/scripts/train_agents_without_bashing.py +++ b/scripts/train_agents_without_bashing.py @@ -54,7 +54,7 @@ def set_input(args): args.adversary_total_training_timesteps = int(5e6 * args.how_long) args.n_x_fcp_total_training_timesteps = int(2 * args.fcp_total_training_timesteps * args.how_long) - args.total_ego_agents = 8 + args.total_sp_agents = 8 print(f"args.layout_names: {args.layout_names}") if args.layout_names == complex_2_chefs_layouts: prefix = 'Complex' @@ -84,7 +84,7 @@ def set_input(args): args.adversary_total_training_timesteps = 1500 args.fcp_total_training_timesteps = 1500 args.n_x_fcp_total_training_timesteps = 1500 * 2 - args.total_ego_agents = 2 + args.total_sp_agents = 2 args.exp_dir = f'Test/{args.num_players}' diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py index 8ee8862..007a729 100644 --- a/scripts/utils/train_helper.py +++ b/scripts/utils/train_helper.py @@ -1,5 +1,6 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.tags import TeamType +from oai_agents.agents.agent_utils import CustomAgent from oai_agents.common.population import get_performance_based_population_by_layouts from oai_agents.common.teammates_collection import generate_TC, get_best_SP_agent, generate_TC_for_ADV_agent, update_TC_w_ADV_teammates, update_TC_w_dynamic_and_static_ADV_teammates from oai_agents.common.curriculum import Curriculum @@ -71,7 +72,7 @@ def get_N_X_SP_agents( train_types=n_x_sp_train_types, eval_types=n_x_sp_eval_types['generate'], unseen_teammates_len = unseen_teammates_len, - total_ego_agents=args.total_ego_agents, + total_sp_agents=args.total_sp_agents, force_training=args.pop_force_training, tag=tag ) @@ -406,7 +407,7 @@ def get_FCP_agent_w_pop( total_training_timesteps=args.pop_total_training_timesteps, train_types=fcp_train_types, eval_types=fcp_eval_types['generate'], - total_ego_agents=args.total_ego_agents, + total_sp_agents=args.total_sp_agents, force_training=args.pop_force_training, tag=tag ) @@ -529,8 +530,9 @@ def get_N_X_FCP_agents( + def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculum, add_adv=False): - '''Code purposed for a very specific experiment, assumes n_players = 2''' + '''Ignore: Code purposed for a very specific experiment, assumes n_players = 2''' from pathlib import Path eval_collection = { From 82845e6a44c1c436a9a297a2746ef70ad1ecc87f Mon Sep 17 00:00:00 2001 From: ava Date: Thu, 27 Mar 2025 15:32:32 -0600 Subject: [PATCH 21/26] Making sure everything works --- oai_agents/common/overcooked_simulation.py | 13 +++++++++++- oai_agents/common/population.py | 24 ++++++++++++++++++++-- scripts/bash_scripts/test_run.sh | 8 ++++---- scripts/train_agents.py | 8 ++++---- 4 files changed, 42 insertions(+), 11 deletions(-) diff --git a/oai_agents/common/overcooked_simulation.py b/oai_agents/common/overcooked_simulation.py index 1e09fa4..3ca2561 100644 --- a/oai_agents/common/overcooked_simulation.py +++ b/oai_agents/common/overcooked_simulation.py @@ -10,12 +10,23 @@ def __init__(self, args, agent, teammates, layout_name, p_idx, horizon=400): self.args = args self.layout_name = layout_name + teammates_collection = { + 'eval': { + self.layout_name: { + 'run_type': [teammates] + } + } + } + self.env = OvercookedGymEnv(args=args, layout_name=self.layout_name, ret_completed_subtasks=False, is_eval_env=True, horizon=horizon, - learner_type='originaler') + learner_type='originaler', + teammates_collection=teammates_collection, + curriculum=None, + ) self.agent = agent self.p_idx = p_idx diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 25c450f..1b7c992 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -4,7 +4,7 @@ from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.tags import AgentPerformance, KeyCheckpoints, TeamType - +from oai_agents.common.teammates_collection import generate_TC from .curriculum import Curriculum @@ -32,12 +32,32 @@ def train_SP_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_d n_envs = training_info["n_envs"] print(f"Restarting training from step: {start_step} (timestep: {start_timestep})") + + init_agent = RLAgentTrainer.generate_randomly_initialized_agent( # need a cleaner way to do this + args=args, + name=name, + learner_type=args.primary_learner_type, + hidden_dim=h_dim, + seed=seed, + n_envs=args.n_envs + ) + + population = {layout_name: [] for layout_name in args.layout_names} + + teammates_collection = generate_TC(args=args, + population=population, + agent=init_agent, + train_types=[TeamType.SELF_PLAY], + eval_types_to_generate=[TeamType.SELF_PLAY], + eval_types_to_read_from_file=[], + unseen_teammates_len=0, + use_entire_population_for_train_types_teammates=True) rlat = RLAgentTrainer( name=name, args=args, agent=agent_ckpt, - teammates_collection={}, # automatically creates SP type + teammates_collection=teammates_collection, # automatically creates SP type epoch_timesteps=args.epoch_timesteps, n_envs=n_envs, hidden_dim=h_dim, diff --git a/scripts/bash_scripts/test_run.sh b/scripts/bash_scripts/test_run.sh index 9c565df..13c38d5 100644 --- a/scripts/bash_scripts/test_run.sh +++ b/scripts/bash_scripts/test_run.sh @@ -1,12 +1,12 @@ #!/bin/sh -ALGO="SP" +ALGO="FCP_traditional" TEAMMATES_LEN=1 NUM_PLAYERS=$((TEAMMATES_LEN + 1)) NUM_OF_CKPOINTS=10 LAYOUT_NAMES="counter_circuit" EXP_DIR="$NUM_PLAYERS" # When quick_test=True this will be overwritten to "Test/$EXP_DIR" -TOTAL_SP_AGENTS=1 +TOTAL_SP_AGENTS=2 QUICK_TEST=true HOW_LONG=1 USE_CUDA=false @@ -21,9 +21,9 @@ source scripts/bash_scripts/env_config.sh # Overwrite the default values from env_config here if needed N_ENVS=5 WANDB_MODE="disabled" -EPOCH_TIMESTEPS=3500 +EPOCH_TIMESTEPS=2500 N_X_SP_TOTAL_TRAINING_TIMESTEPS=10000 -FCP_TOTAL_TRAINING_TIMESTEPS=75000 +FCP_TOTAL_TRAINING_TIMESTEPS=10000 python scripts/train_agents.py \ diff --git a/scripts/train_agents.py b/scripts/train_agents.py index f606516..53d48b9 100644 --- a/scripts/train_agents.py +++ b/scripts/train_agents.py @@ -256,15 +256,15 @@ def SPN_XSPCKP(args) -> None: TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_MEDIUM, TeamType.SELF_PLAY_LOW, - # TeamType.SELF_PLAY_DYNAMIC_ADV, # TODO: read from command line arg - # TeamType.SELF_PLAY_STATIC_ADV, + TeamType.SELF_PLAY_DYNAMIC_ADV, # TODO: read from command line arg + TeamType.SELF_PLAY_STATIC_ADV, ] primary_eval_types = { 'generate': [ TeamType.SELF_PLAY_HIGH, TeamType.SELF_PLAY_LOW, - # TeamType.SELF_PLAY_DYNAMIC_ADV, - # TeamType.SELF_PLAY_STATIC_ADV, + TeamType.SELF_PLAY_DYNAMIC_ADV, + TeamType.SELF_PLAY_STATIC_ADV, ], 'load': [] } From ec23330d3f366e76fb5c2bc9e2acad1b0003da26 Mon Sep 17 00:00:00 2001 From: ava Date: Thu, 27 Mar 2025 15:37:18 -0600 Subject: [PATCH 22/26] Lint fix --- oai_agents/agents/base_agent.py | 10 ++++------ oai_agents/agents/rl.py | 8 ++++---- oai_agents/common/arguments.py | 6 +++--- oai_agents/common/multi_setup_trainer.py | 2 +- oai_agents/common/overcooked_gui.py | 9 ++++----- oai_agents/common/overcooked_simulation.py | 4 ++-- oai_agents/common/population.py | 8 ++++---- .../gym_environments/base_overcooked_env.py | 18 +++++++----------- sandbox/profile_analyze.py | 2 +- scripts/profile_analyze.py | 2 +- scripts/run_overcooked_game.py | 6 +++--- scripts/train_agents.py | 6 +++--- scripts/utils/train_helper.py | 12 ++++++------ 13 files changed, 43 insertions(+), 50 deletions(-) diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index d4b4600..859bda1 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -1,10 +1,8 @@ -from oai_agents.agents.agent_utils import load_agent, CustomAgent +from oai_agents.agents.agent_utils import load_agent from oai_agents.common.arguments import get_args_to_save, set_args_from_load from oai_agents.common.state_encodings import ENCODING_SCHEMES -from oai_agents.common.subtasks import calculate_completed_subtask, get_doable_subtasks, Subtasks -from oai_agents.common.tags import AgentPerformance, TeamType, KeyCheckpoints, TeammatesCollection from oai_agents.common.subtasks import get_doable_subtasks, Subtasks -from oai_agents.common.tags import AgentPerformance, KeyCheckpoints +from oai_agents.common.tags import AgentPerformance, KeyCheckpoints, TeammatesCollection from oai_agents.common.checked_model_name_handler import CheckedModelNameHandler # from oai_agents.gym_environments.base_overcooked_env import USEABLE_COUNTERS @@ -327,7 +325,7 @@ def predict(self, obs, state=None, episode_start=None, deterministic=False): # Updated to include action masking self.policy.set_training_mode(False) obs, vectorized_env = self.policy.obs_to_tensor(obs) - + with th.no_grad(): if 'subtask_mask' in obs and np.prod(obs['subtask_mask'].shape) == np.prod(self.policy.action_space.n): dist = self.policy.get_distribution(obs, action_masks=obs['subtask_mask']) @@ -428,7 +426,7 @@ def evaluate(self, eval_agent, num_eps_per_layout_per_tm=5, visualize=False, tim selected_p_indexes = random.sample(range(self.args.num_players), min(3, self.args.num_players)) for _, env in enumerate(self.eval_envs): - + rew_per_layout_per_teamtype[env.layout_name] = { teamtype: [] for teamtype in env.teammates_collection[TeammatesCollection.EVAL][env.layout_name] } diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index f9fde02..ae98f16 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -2,7 +2,7 @@ from oai_agents.agents.base_agent import SB3Wrapper, SB3LSTMWrapper, OAITrainer, OAIAgent from oai_agents.common.networks import OAISinglePlayerFeatureExtractor from oai_agents.common.state_encodings import ENCODING_SCHEMES -from oai_agents.common.tags import AgentPerformance, TeamType, TeammatesCollection, KeyCheckpoints +from oai_agents.common.tags import AgentPerformance, TeammatesCollection, KeyCheckpoints from oai_agents.agents.agent_utils import CustomAgent from oai_agents.common.checked_model_name_handler import CheckedModelNameHandler @@ -29,7 +29,7 @@ def __init__( ): train_types = train_types if train_types is not None else [] eval_types = eval_types if eval_types is not None else [] - + # assert teammates_collection, "Teammates collection must be provided" name = name or 'rl_agent' @@ -146,13 +146,13 @@ def get_envs(self, _env, _eval_envs, deterministic, learner_type, teammates_coll if _env is None: env_kwargs = {'shape_rewards': True, 'full_init': False, 'stack_frames': self.use_frame_stack, - 'deterministic': deterministic,'args': self.args, 'learner_type': learner_type, 'start_timestep': start_timestep, + 'deterministic': deterministic,'args': self.args, 'learner_type': learner_type, 'start_timestep': start_timestep, 'teammates_collection': teammates_collection, 'curriculum': curriculum } env = make_vec_env(OvercookedGymEnv, n_envs=self.args.n_envs, seed=self.seed, vec_env_cls=VEC_ENV_CLS, env_kwargs=env_kwargs) eval_envs_kwargs = {'is_eval_env': True, 'horizon': 400, 'stack_frames': self.use_frame_stack, - 'deterministic': deterministic, 'args': self.args, 'learner_type': learner_type, + 'deterministic': deterministic, 'args': self.args, 'learner_type': learner_type, 'teammates_collection': teammates_collection, 'curriculum': curriculum } eval_envs = [OvercookedGymEnv(**{'env_index': i, **eval_envs_kwargs, 'unique_env_idx':self.args.n_envs+i}) for i in range(self.n_layouts)] diff --git a/oai_agents/common/arguments.py b/oai_agents/common/arguments.py index c084fd8..f58b7f4 100644 --- a/oai_agents/common/arguments.py +++ b/oai_agents/common/arguments.py @@ -114,14 +114,14 @@ def get_arguments(additional_args: Optional[List] = None): args = parser.parse_args() args.base_dir = Path(args.base_dir) - + args.device = th.device('cuda' if args.use_cuda and th.cuda.is_available() else 'cpu') - + args.layout_names = args.layout_names.split(',') args.low_perfs = args.low_perfs.split(',') args.med_perfs = args.med_perfs.split(',') args.high_perfs = args.high_perfs.split(',') - + if isinstance(args.layout_names, str): args.layout_names = args.layout_names.split(',') diff --git a/oai_agents/common/multi_setup_trainer.py b/oai_agents/common/multi_setup_trainer.py index 1ea8cbb..8648090 100644 --- a/oai_agents/common/multi_setup_trainer.py +++ b/oai_agents/common/multi_setup_trainer.py @@ -127,7 +127,7 @@ def get_trained_agent(self, seed, h_dim): hidden_dim=h_dim, seed=seed, n_envs=self.args.n_envs - ) + ) population = {layout_name: [] for layout_name in self.args.layout_names} teammates_collection = generate_TC(args=self.args, diff --git a/oai_agents/common/overcooked_gui.py b/oai_agents/common/overcooked_gui.py index 44bd551..3ec289b 100644 --- a/oai_agents/common/overcooked_gui.py +++ b/oai_agents/common/overcooked_gui.py @@ -32,7 +32,6 @@ # from oai_agents.agents import Manager from oai_agents.common.subtasks import facing from oai_agents.gym_environments.base_overcooked_env import OvercookedGymEnv -from oai_agents.gym_environments.worker_env import OvercookedSubtaskGymEnv from overcooked_ai_py.mdp.overcooked_mdp import Direction, Action # from overcooked_ai_py.planning.planners import MediumLevelPlanner from overcooked_ai_py.visualization.state_visualizer import StateVisualizer, roboto_path @@ -64,9 +63,9 @@ def __init__(self, args, layout_name=None, agent=None, teammates=None, p_idx=0, ) self.agent = agent self.p_idx = p_idx - + self.env.set_teammates('run_type') - + self.env.reset(p_idx=self.p_idx) if self.agent != 'human': self.agent.set_encoding_params(self.p_idx, self.args.horizon, env=self.env, is_haha=isinstance(self.agent, HierarchicalRL), tune_subtasks=False) @@ -106,7 +105,7 @@ def __init__(self, args, layout_name=None, agent=None, teammates=None, p_idx=0, self.gif_name = gif_name if not os.path.exists(f'data/screenshots/{self.gif_name}'): os.makedirs(f'data/screenshots/{self.gif_name}') - + self.resource_locations = {} for y, row in enumerate(self.env.env.mdp.terrain_mtx): @@ -115,7 +114,7 @@ def __init__(self, args, layout_name=None, agent=None, teammates=None, p_idx=0, self.resource_locations[(x, y)] = cell self.resource_usage = { - agent_idx: {pos: 0 for pos in self.resource_locations} + agent_idx: dict.fromkeys(self.resource_locations, 0) for agent_idx in range(len(self.env.state.players)) } diff --git a/oai_agents/common/overcooked_simulation.py b/oai_agents/common/overcooked_simulation.py index 3ca2561..9a81989 100644 --- a/oai_agents/common/overcooked_simulation.py +++ b/oai_agents/common/overcooked_simulation.py @@ -98,7 +98,7 @@ def run_simulation(self, how_many_times): if __name__ == '__main__': from oai_agents.common.arguments import get_arguments - from oai_agents.agents.agent_utils import DummyAgent, CustomAgent, load_agent + from oai_agents.agents.agent_utils import CustomAgent, load_agent from pathlib import Path args = get_arguments() @@ -114,4 +114,4 @@ def run_simulation(self, how_many_times): teammates = [CustomAgent(args=args, name='tm', trajectories={args.layout_names[0]: [(1, 1), (1, 2)]})] simulation = OvercookedSimulation(args=args, agent=agent, teammates=teammates, layout_name=args.layout_names[0], p_idx=p_idx, horizon=400) - trajectories = simulation.run_simulation(how_many_times=4) \ No newline at end of file + trajectories = simulation.run_simulation(how_many_times=4) diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 1b7c992..3ca1e37 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -32,7 +32,7 @@ def train_SP_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_d n_envs = training_info["n_envs"] print(f"Restarting training from step: {start_step} (timestep: {start_timestep})") - + init_agent = RLAgentTrainer.generate_randomly_initialized_agent( # need a cleaner way to do this args=args, name=name, @@ -40,10 +40,10 @@ def train_SP_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_d hidden_dim=h_dim, seed=seed, n_envs=args.n_envs - ) - + ) + population = {layout_name: [] for layout_name in args.layout_names} - + teammates_collection = generate_TC(args=args, population=population, agent=init_agent, diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py index d4ec65b..17c3158 100644 --- a/oai_agents/gym_environments/base_overcooked_env.py +++ b/oai_agents/gym_environments/base_overcooked_env.py @@ -1,11 +1,8 @@ from oai_agents.common.state_encodings import ENCODING_SCHEMES -from oai_agents.common.subtasks import Subtasks, calculate_completed_subtask, get_doable_subtasks -from oai_agents.common.learner import LearnerType, Learner -from oai_agents.agents.agent_utils import CustomAgent, DummyAgent -from oai_agents.common.tags import AgentPerformance, TeamType, TeammatesCollection from oai_agents.common.subtasks import Subtasks, get_doable_subtasks from oai_agents.common.learner import Learner -from oai_agents.agents.agent_utils import CustomAgent +from oai_agents.agents.agent_utils import CustomAgent, DummyAgent +from oai_agents.common.tags import TeammatesCollection from overcooked_ai_py.mdp.overcooked_mdp import OvercookedGridworld, Action, Direction from overcooked_ai_py.mdp.overcooked_env import OvercookedEnv @@ -18,7 +15,6 @@ import numpy as np import pygame from pygame.locals import HWSURFACE, DOUBLEBUF, RESIZABLE -from stable_baselines3.common.env_checker import check_env from stable_baselines3.common.vec_env.stacked_observations import StackedObservations import torch as th import random @@ -164,7 +160,7 @@ def set_teammates(self, teamtype=None): assert self.is_eval_env is True, "Teamtype should only be set for evaluation environments" population_teamtypes = self.teammates_collection[TeammatesCollection.EVAL][self.layout_name] teammates = population_teamtypes[teamtype][np.random.randint(len(population_teamtypes[teamtype]))] - else: + else: population_teamtypes = self.teammates_collection[TeammatesCollection.TRAIN][self.layout_name] teammates = self.curriculum.select_teammates_for_layout(population_teamtypes=population_teamtypes, layout=self.layout_name) @@ -370,14 +366,14 @@ def set_bonus_getter(self, bonus_getter): env = OvercookedGymEnv(layout_name=args.layout_names[0], args=args, ret_completed_subtasks=False, is_eval_env=True, horizon=400, learner_type='originaler') - - p_idx = 0 + + p_idx = 0 teammates = [DummyAgent()] - + env.set_teammates(teammates) env.reset(p_idx=p_idx) done = False - + while not done: action = np.random.randint(0, Action.NUM_ACTIONS) action_idx = Action.ACTION_TO_INDEX[Action.STAY] diff --git a/sandbox/profile_analyze.py b/sandbox/profile_analyze.py index d7136f9..d416cfc 100644 --- a/sandbox/profile_analyze.py +++ b/sandbox/profile_analyze.py @@ -6,4 +6,4 @@ args = parser.parse_args() name = args.name p = pstats.Stats(f"data/profile/{name}") -p.strip_dirs().sort_stats("cumulative").print_stats(20) # Show top 20 functions \ No newline at end of file +p.strip_dirs().sort_stats("cumulative").print_stats(20) # Show top 20 functions diff --git a/scripts/profile_analyze.py b/scripts/profile_analyze.py index d7136f9..d416cfc 100644 --- a/scripts/profile_analyze.py +++ b/scripts/profile_analyze.py @@ -6,4 +6,4 @@ args = parser.parse_args() name = args.name p = pstats.Stats(f"data/profile/{name}") -p.strip_dirs().sort_stats("cumulative").print_stats(20) # Show top 20 functions \ No newline at end of file +p.strip_dirs().sort_stats("cumulative").print_stats(20) # Show top 20 functions diff --git a/scripts/run_overcooked_game.py b/scripts/run_overcooked_game.py index 5e18dc9..7d127af 100644 --- a/scripts/run_overcooked_game.py +++ b/scripts/run_overcooked_game.py @@ -1,6 +1,6 @@ from pathlib import Path -from oai_agents.agents.agent_utils import DummyAgent, load_agent +from oai_agents.agents.agent_utils import load_agent from oai_agents.agents.rl import RLAgentTrainer from oai_agents.common.arguments import get_arguments from oai_agents.common.overcooked_gui import OvercookedGUI @@ -17,7 +17,7 @@ def get_teammate_from_pop_file(tm_name, tm_score, pop_path, layout_name): args = get_arguments() args.num_players = 2 - args.layout = f'c1' + args.layout = 'c1' args.p_idx = 0 args.layout_names = [args.layout] args.n_envs = 1 @@ -29,7 +29,7 @@ def get_teammate_from_pop_file(tm_name, tm_score, pop_path, layout_name): # 'agent_models/c4_v4/SP_s1010_h256_tr[SP]_ran/best' # 'agent_models/c4_best_EGO/best_c4/best' - # 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', # green + # 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', # green # 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', # orange # 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', # 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', diff --git a/scripts/train_agents.py b/scripts/train_agents.py index 53d48b9..fb6c2dc 100644 --- a/scripts/train_agents.py +++ b/scripts/train_agents.py @@ -301,9 +301,9 @@ def best_EGO(args, add_adv=False) -> None: 'load': [] } if args.prioritized_sampling: - curriculum = Curriculum(train_types=primary_train_types, - eval_types=primary_eval_types, - is_random=False, + curriculum = Curriculum(train_types=primary_train_types, + eval_types=primary_eval_types, + is_random=False, prioritized_sampling=True, priority_scaling=2.0) else: diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py index 007a729..2d6cda3 100644 --- a/scripts/utils/train_helper.py +++ b/scripts/utils/train_helper.py @@ -130,7 +130,7 @@ def gen_ADV_train_N_X_SP(args, population, curriculum, unseen_teammates_len, n_x # hidden_dim=args.N_X_SP_h_dim, # seed=args.N_X_SP_seed, # n_envs=args.n_envs - #) + #) teammates_collection = generate_TC(args=args, population=population, @@ -296,7 +296,7 @@ def N_X_SP(args, population, curriculum, unseen_teammates_len, n_x_sp_eval_types hidden_dim=args.N_X_SP_h_dim, seed=args.N_X_SP_seed, n_envs=args.n_envs, - + ) teammates_collection = generate_TC( @@ -559,13 +559,13 @@ def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculu ttype = TeamType.SELF_PLAY_MEDIUM elif agent_address in args.high_perfs: ttype = TeamType.SELF_PLAY_HIGH - + if ttype in train_collection[layout_name]: train_collection[layout_name][ttype].append([agent]) if ttype in eval_collection[layout_name]: eval_collection[layout_name][ttype] = [[agent]] - + name = f'best_{args.layout_names[0]}' if add_adv: @@ -589,7 +589,7 @@ def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculu TeammatesCollection.TRAIN: train_collection, TeammatesCollection.EVAL: eval_collection } - + best_ego_trainer = RLAgentTrainer( name=name, args=args, @@ -601,7 +601,7 @@ def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculu seed=args.N_X_SP_seed, hidden_dim=args.N_X_SP_h_dim, curriculum=curriculum, - + learner_type=args.primary_learner_type, checkpoint_rate=args.n_x_sp_total_training_timesteps // args.num_of_ckpoints, ) From 215a570eba814958f2897d4d57b2aac6fadf27f4 Mon Sep 17 00:00:00 2001 From: ava Date: Thu, 27 Mar 2025 15:56:17 -0600 Subject: [PATCH 23/26] Finalize PR --- oai_agents/agents/base_agent.py | 14 +++++----- oai_agents/agents/rl.py | 8 +++--- oai_agents/common/arguments.py | 7 ++--- oai_agents/common/multi_setup_trainer.py | 6 ++--- oai_agents/common/overcooked_gui.py | 26 ++----------------- oai_agents/common/overcooked_simulation.py | 10 ++----- oai_agents/common/population.py | 2 +- .../gym_environments/base_overcooked_env.py | 7 ++--- 8 files changed, 26 insertions(+), 54 deletions(-) diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py index 859bda1..c207bfe 100644 --- a/oai_agents/agents/base_agent.py +++ b/oai_agents/agents/base_agent.py @@ -233,14 +233,7 @@ def get_distribution(self, obs: th.Tensor): return dist def learn(self, epoch_timesteps): - # import cProfile - # import time - # profiler = cProfile.Profile() - # profiler.enable() self.agent.learn(total_timesteps=epoch_timesteps, reset_num_timesteps=False) - # profiler.disable() - # c_time = time.strftime("%Y-%m-%d_%H-%M-%S") - # profiler.dump_stats(f'data/profile/learn_{c_time}.prof') self.num_timesteps = self.agent.num_timesteps def save(self, path: Path) -> None: @@ -457,6 +450,13 @@ def evaluate(self, eval_agent, num_eps_per_layout_per_tm=5, visualize=False, tim return np.mean(tot_mean_reward), rew_per_layout, rew_per_layout_per_teamtype def set_new_teammates(self): + """ + The logic for selecting teammates has been moved to `base_overcooked_env` to support + running environments with the SubProcEnv flag enabled. + `teammates_collection` and `curriculum` are now managed within the environment. + The `set_teammates` method in `base_overcooked_env` selects an appropriate teammate + based on the current curriculum settings. + """ for i in range(self.args.n_envs): self.env.env_method('set_teammates', indices=i) diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py index ae98f16..add4d17 100644 --- a/oai_agents/agents/rl.py +++ b/oai_agents/agents/rl.py @@ -5,6 +5,8 @@ from oai_agents.common.tags import AgentPerformance, TeammatesCollection, KeyCheckpoints from oai_agents.agents.agent_utils import CustomAgent from oai_agents.common.checked_model_name_handler import CheckedModelNameHandler +from oai_agents.gym_environments.base_overcooked_env import OvercookedGymEnv + import numpy as np from stable_baselines3 import PPO, DQN @@ -30,8 +32,6 @@ def __init__( train_types = train_types if train_types is not None else [] eval_types = eval_types if eval_types is not None else [] - # assert teammates_collection, "Teammates collection must be provided" - name = name or 'rl_agent' super(RLAgentTrainer, self).__init__(name, args, seed=seed) @@ -63,6 +63,8 @@ def __init__( self.use_policy_clone = use_policy_clone self.learner_type = learner_type + + # teammates_collection and curriculum are passed to the environment instead. self.env, self.eval_envs = self.get_envs(_env=env, _eval_envs=eval_envs, deterministic=deterministic, learner_type=learner_type, start_timestep=start_timestep, teammates_collection=teammates_collection, @@ -137,8 +139,6 @@ def print_tc_helper(self, teammates_collection, message=None): def get_envs(self, _env, _eval_envs, deterministic, learner_type, teammates_collection, curriculum, start_timestep: int = 0): - from oai_agents.gym_environments.base_overcooked_env import OvercookedGymEnv - if self.args.use_multipleprocesses: VEC_ENV_CLS = SubprocVecEnv else: diff --git a/oai_agents/common/arguments.py b/oai_agents/common/arguments.py index f58b7f4..310b288 100644 --- a/oai_agents/common/arguments.py +++ b/oai_agents/common/arguments.py @@ -104,9 +104,10 @@ def get_arguments(additional_args: Optional[List] = None): parser.add_argument("--total-sp-agents", type=int, default=4) parser.add_argument("--ck-list-offset", type=int, default=0) - parser.add_argument('--low-perfs', help='shitty code to run ult baseline exp', default='default') - parser.add_argument('--med-perfs', help='shitty code to run ult baseline exp', default='default') - parser.add_argument('--high-perfs', help='shitty code to run ult baseline exp', default='default') + # The next three args are only to run the ultimate baseline exp, I will clean it later + parser.add_argument('--low-perfs', help='code to run ult baseline exp', default='default') + parser.add_argument('--med-perfs', help='code to run ult baseline exp', default='default') + parser.add_argument('--high-perfs', help='code to run ult baseline exp', default='default') for parser_arg, parser_kwargs in additional_args: diff --git a/oai_agents/common/multi_setup_trainer.py b/oai_agents/common/multi_setup_trainer.py index 8648090..44290fd 100644 --- a/oai_agents/common/multi_setup_trainer.py +++ b/oai_agents/common/multi_setup_trainer.py @@ -119,8 +119,7 @@ def get_trained_agent(self, seed, h_dim): curriculum=self.curriculum ) - # print('before generate_randomly_initialized_agent') - init_agent = RLAgentTrainer.generate_randomly_initialized_agent( # need a cleaner way to do this + init_agent = RLAgentTrainer.generate_randomly_initialized_agent( args=self.args, name=name, learner_type=self.args.primary_learner_type, @@ -128,7 +127,6 @@ def get_trained_agent(self, seed, h_dim): seed=seed, n_envs=self.args.n_envs ) - population = {layout_name: [] for layout_name in self.args.layout_names} teammates_collection = generate_TC(args=self.args, population=population, @@ -138,7 +136,7 @@ def get_trained_agent(self, seed, h_dim): eval_types_to_read_from_file=self.eval_types['load'], unseen_teammates_len=0, use_entire_population_for_train_types_teammates=True) - + # we can't no longer pass empty teammates_collection to the RlAgentTrainer, so for SP we should do this ^ return self.get_reinforcement_agent( name=name, diff --git a/oai_agents/common/overcooked_gui.py b/oai_agents/common/overcooked_gui.py index 3ec289b..09bb99c 100644 --- a/oai_agents/common/overcooked_gui.py +++ b/oai_agents/common/overcooked_gui.py @@ -48,14 +48,7 @@ def __init__(self, args, layout_name=None, agent=None, teammates=None, p_idx=0, self._display_surf = None self.args = args self.layout_name = layout_name or 'asymmetric_advantages' - - teammates_collection = { - 'eval': { - args.layout: { - 'run_type': [teammates] - } - } - } + teammates_collection = {'eval': {self.layout_name: {'run_type': [teammates]}}} self.env = OvercookedGymEnv(layout_name=self.layout_name, args=args, ret_completed_subtasks=False, is_eval_env=True, horizon=horizon, learner_type='originaler', @@ -64,7 +57,7 @@ def __init__(self, args, layout_name=None, agent=None, teammates=None, p_idx=0, self.agent = agent self.p_idx = p_idx - self.env.set_teammates('run_type') + self.env.set_teammates(teamtype='run_type') self.env.reset(p_idx=self.p_idx) if self.agent != 'human': @@ -107,21 +100,6 @@ def __init__(self, args, layout_name=None, agent=None, teammates=None, p_idx=0, os.makedirs(f'data/screenshots/{self.gif_name}') - self.resource_locations = {} - for y, row in enumerate(self.env.env.mdp.terrain_mtx): - for x, cell in enumerate(row): - if cell in ['S', 'D', 'P', 'O']: - self.resource_locations[(x, y)] = cell - - self.resource_usage = { - agent_idx: dict.fromkeys(self.resource_locations, 0) - for agent_idx in range(len(self.env.state.players)) - } - - print(f"Resource locations: {self.resource_locations}") - - - def start_screen(self): pygame.init() surface = StateVisualizer(tile_size=self.tile_size).render_state(self.env.state, diff --git a/oai_agents/common/overcooked_simulation.py b/oai_agents/common/overcooked_simulation.py index 9a81989..e825844 100644 --- a/oai_agents/common/overcooked_simulation.py +++ b/oai_agents/common/overcooked_simulation.py @@ -10,13 +10,7 @@ def __init__(self, args, agent, teammates, layout_name, p_idx, horizon=400): self.args = args self.layout_name = layout_name - teammates_collection = { - 'eval': { - self.layout_name: { - 'run_type': [teammates] - } - } - } + teammates_collection = {'eval': {self.layout_name: {'run_type': [teammates]}}} self.env = OvercookedGymEnv(args=args, layout_name=self.layout_name, @@ -30,7 +24,7 @@ def __init__(self, args, agent, teammates, layout_name, p_idx, horizon=400): self.agent = agent self.p_idx = p_idx - self.env.set_teammates('run_type') + self.env.set_teammates(teamtype='run_type') self.env.reset(p_idx=self.p_idx) assert self.agent != 'human' diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py index 3ca1e37..0d6c577 100644 --- a/oai_agents/common/population.py +++ b/oai_agents/common/population.py @@ -33,7 +33,7 @@ def train_SP_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_d print(f"Restarting training from step: {start_step} (timestep: {start_timestep})") - init_agent = RLAgentTrainer.generate_randomly_initialized_agent( # need a cleaner way to do this + init_agent = RLAgentTrainer.generate_randomly_initialized_agent( args=args, name=name, learner_type=args.primary_learner_type, diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py index 17c3158..7e877df 100644 --- a/oai_agents/gym_environments/base_overcooked_env.py +++ b/oai_agents/gym_environments/base_overcooked_env.py @@ -156,6 +156,10 @@ def get_joint_action(self): return self.joint_action def set_teammates(self, teamtype=None): + ''' + When teamtype is None, teammate is set according to the curriculum + When teamtype is not None, teammate is set according to the teamtype which is only used for evaluation purposes + ''' if teamtype: assert self.is_eval_env is True, "Teamtype should only be set for evaluation environments" population_teamtypes = self.teammates_collection[TeammatesCollection.EVAL][self.layout_name] @@ -253,7 +257,6 @@ def step(self, action): tm_obs = self.get_obs(c_idx=t_idx, enc_fn=teammate.encoding_fn) if type(teammate) == CustomAgent: - # if isinstance(teammate, CustomAgent): info = {'layout_name': self.layout_name, 'u_env_idx': self.unique_env_idx} joint_action[t_idx] = teammate.predict(obs=tm_obs, deterministic=self.deterministic, info=info)[0] else: @@ -279,7 +282,6 @@ def step(self, action): for t_idx in self.t_idxes: # Should be right after env.step tm = self.get_teammate_from_idx(t_idx) if type(tm) == CustomAgent: - # if isinstance(tm, CustomAgent): tm.update_current_position(layout_name=self.layout_name, new_position=self.env.state.players[t_idx].position, u_env_idx=self.unique_env_idx) if self.shape_rewards and not self.is_eval_env: @@ -311,7 +313,6 @@ def reset(self, p_idx=None): if self.reset_info and 'start_position' in self.reset_info: self.reset_info['start_position'] = {} for id in range(len(teammates_ids)): - # if isinstance(self.teammates[id], CustomAgent): if type(self.teammates[id]) == CustomAgent: self.teammates[id].reset() self.reset_info['start_position'][teammates_ids[id]] = self.teammates[id].get_start_position(self.layout_name, u_env_idx=self.unique_env_idx) From cc4ec560b4bbe9058acffde6e376a1e5e8e18585 Mon Sep 17 00:00:00 2001 From: ava Date: Thu, 27 Mar 2025 15:59:44 -0600 Subject: [PATCH 24/26] Fix default n_envs --- scripts/bash_scripts/env_config.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/bash_scripts/env_config.sh b/scripts/bash_scripts/env_config.sh index 3e4951b..5ee4ff1 100755 --- a/scripts/bash_scripts/env_config.sh +++ b/scripts/bash_scripts/env_config.sh @@ -14,7 +14,7 @@ fi if [ "$QUICK_TEST" = false ]; then WANDB_MODE="online" - N_ENVS=50 + N_ENVS=210 EPOCH_TIMESTEPS=100000 POP_TOTAL_TRAINING_TIMESTEPS=$(echo "$HOW_LONG * 5000000" | bc) N_X_SP_TOTAL_TRAINING_TIMESTEPS=$(echo "$HOW_LONG * 5000000" | bc) From 65e7967c2dd84faeda499cc94dc8046ea0b75d11 Mon Sep 17 00:00:00 2001 From: ava Date: Fri, 28 Mar 2025 14:59:04 -0600 Subject: [PATCH 25/26] Small cleanups --- sandbox/visualize_heatmap.py | 76 ++++++++++++---------------------- scripts/run_overcooked_game.py | 19 ++++----- 2 files changed, 34 insertions(+), 61 deletions(-) diff --git a/sandbox/visualize_heatmap.py b/sandbox/visualize_heatmap.py index f0dd8b3..886913d 100644 --- a/sandbox/visualize_heatmap.py +++ b/sandbox/visualize_heatmap.py @@ -9,11 +9,11 @@ from oai_agents.common.overcooked_simulation import OvercookedSimulation -def extract_layout_features(grid): - """ - Extracts layout features such as counters, pots, onions, and player starting positions. - Returns a dictionary with their coordinates and the grid shape. - """ +def extract_layout_features(args): + from overcooked_ai_py.mdp.overcooked_mdp import OvercookedGridworld + mdp = OvercookedGridworld.from_layout_name(args.layout) + grid = mdp.terrain_mtx + layout_features = { "P": [], "O": [], @@ -23,11 +23,11 @@ def extract_layout_features(grid): } feature_positions = set() # Store all feature coordinates for masking - grid_lines = [line.strip() for line in grid.strip().split("\n")] - grid_height = len(grid_lines) - grid_width = max(len(line) for line in grid_lines) # Accounts for irregular widths + # grid_lines = [line.strip() for line in grid.strip().split("\n")] + grid_height = len(grid) + grid_width = max(len(line) for line in grid) # Accounts for irregular widths - for y, row in enumerate(grid_lines): + for y, row in enumerate(grid): for x, char in enumerate(row): if char == "P": layout_features["P"].append((x, y)) @@ -113,64 +113,40 @@ def plot_heatmap(tiles_v, layout_features, feature_positions, title=''): if __name__ == "__main__": args = get_arguments() args.num_players = 2 - args.layout = 'storage_room' - - # grid_layout = """XXXPPXXX - # X 2 X - # D XXXX S - # X 1 X - # XXXOOXXX""" - - grid_layout = """XPXXXXXXXXPX - S XODX S - X 12 X - X XDOX X - XXXXXXXXXXXX""" - - # grid_layout = """XODSXXXXSDXX - # X X - # S PP XX X - # D PP OX 1 X - # O PP DX 2 X - # X SX X - # XSDOXXXXOPXX""" - - # grid_layout = """XXXPPXXX - # X 2 4 X - # S XXXX5S - # X 1 3 X - # XXDOODXX""" + args.layout = 'c4' args.p_idx = 0 args.n_envs = 200 args.layout_names = [args.layout] - # path = 'agent_models/Complex/2/FCP_s1010_h256_tr[AMX]_ran/last' - path = 'agent_models/Complex/2/SP_hd256_seed2602/last' - # path = 'agent_models/Complex/2/N-1-SP_s1010_h256_tr[SPH_SPM_SPL_SPSA]_ran_originaler_attack0/last' - # path = 'agent_models/Complex/2/N-1-SP_s1010_h256_tr[SPH_SPM_SPL_SPSA]_ran_originaler_attack1/last' - # path = 'agent_models/Complex/2/N-1-SP_s1010_h256_tr[SPH_SPM_SPL_SPSA]_ran_originaler_attack2/last' + path = 'agent_models/c4_best_EGO_with_CAP/best_c4_adv/best' + # path = 'agent_models/c4_best_EGO/best_c4/best' agent = load_agent(Path(path), args) title = f'{args.layout}_{path.split("/")[-2]}' - high_perf_teammates = [agent for _ in range(args.num_players - 1)] - low_perf_teammates = [DummyAgent(action='random') for _ in range(args.num_players - 1)] - # Define the environment grid layout (modify this based on the actual layout) + high_perf_paths = [ + 'agent_models/c4_v4/SP_s1010_h256_tr[SP]_ran/best', + 'agent_models/c4_v3/SP_s1010_h256_tr[SP]_ran/best', + 'agent_models/c4_v2/SP_s1010_h256_tr[SP]_ran/best', + 'agent_models/c4_v1/SP_s1010_h256_tr[SP]_ran/best', + ] + high_perf_teammates = [[load_agent(Path(tm_path), args)] for tm_path in high_perf_paths[:args.num_players - 1]] + # high_perf_teammates = [agent for _ in range(args.num_players - 1)] + # low_perf_teammates = [DummyAgent(action='random') for _ in range(args.num_players - 1)] + low_perf_teammates = [] - # Extract layout features, feature positions, and shape dynamically - layout_features, feature_positions, shape = extract_layout_features(grid_layout) - # Initialize heatmap matrices dynamically based on extracted shape - final_tiles_v = np.zeros(shape) + layout_features, feature_positions, shape = extract_layout_features(args) + final_tiles_v = np.zeros(shape) for p_idx in range(args.num_players): - for teammates in [low_perf_teammates, high_perf_teammates]: + for teammates in high_perf_teammates: simulation = OvercookedSimulation(args=args, agent=agent, teammates=teammates, layout_name=args.layout, p_idx=p_idx, horizon=400) trajectories = simulation.run_simulation(how_many_times=args.num_eval_for_heatmap_gen) tile = get_tile_map(args=args, shape=shape, agent=agent, p_idx=p_idx, trajectories=trajectories, interact_actions_only=False) - final_tiles_v += tile['V'] + final_tiles_v += tile['P'] # final_tiles_v = not_used_function_get_tile_v_using_all_states(args=args, agent=agent, layout=args.layout, shape=shape) diff --git a/scripts/run_overcooked_game.py b/scripts/run_overcooked_game.py index 7d127af..12825a2 100644 --- a/scripts/run_overcooked_game.py +++ b/scripts/run_overcooked_game.py @@ -17,32 +17,29 @@ def get_teammate_from_pop_file(tm_name, tm_score, pop_path, layout_name): args = get_arguments() args.num_players = 2 - args.layout = 'c1' + args.layout = 'c4' args.p_idx = 0 args.layout_names = [args.layout] args.n_envs = 1 teammates_path = [ # 'agent_models/c1_v4/SP_s1010_h256_tr[SP]_ran/best' - 'agent_models/c1_best_EGO/best_c1/best' - + # 'agent_models/c1_best_EGO/best_c1/best' # 'agent_models/c4_v4/SP_s1010_h256_tr[SP]_ran/best' # 'agent_models/c4_best_EGO/best_c4/best' - - # 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', # green - # 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', # orange - # 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', - # 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', - # 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', + # 'agent_models/c4_v4/SP_s1010_h256_tr[SP]_ran/best', + # 'agent_models/c4_v3/SP_s1010_h256_tr[SP]_ran/best', + 'agent_models/c4_best_EGO_with_CAP/best_c4_adv/best' ] + teammates = [load_agent(Path(tm_path), args) for tm_path in teammates_path[:args.num_players - 1]] # trajectories = tile locations. Top left of the layout is (0, 0), bottom right is (M, N) # teammates = [CustomAgent(args=args, name='human', trajectories={args.layout: [(2, 1), (3, 1)]})] # teammates = [DummyAgent(action='random') for _ in range(args.num_players - 1)] - # player_path = 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best' - # player_path = 'agent_models/c4_best_EGO/best_c4/best' + # player_path = 'agent_models/c4_best_EGO_with_CAP/best_c4_adv/best' + # # player_path = 'agent_models/c4_best_EGO/best_c4/best' # player = load_agent(Path(player_path), args) player = teammates[0] # player = 'human' # blue From 5a4055e31de2b5b56a7f70a07c8023830bac0782 Mon Sep 17 00:00:00 2001 From: ava Date: Fri, 28 Mar 2025 15:00:22 -0600 Subject: [PATCH 26/26] ruff fix --- sandbox/visualize_heatmap.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sandbox/visualize_heatmap.py b/sandbox/visualize_heatmap.py index 886913d..321610b 100644 --- a/sandbox/visualize_heatmap.py +++ b/sandbox/visualize_heatmap.py @@ -4,7 +4,7 @@ import seaborn as sns from oai_agents.common.heatmap import get_tile_map -from oai_agents.agents.agent_utils import DummyAgent, load_agent +from oai_agents.agents.agent_utils import load_agent from oai_agents.common.arguments import get_arguments from oai_agents.common.overcooked_simulation import OvercookedSimulation