From f1cbfcc83a71a522a3c592d0673a2a90afd8e4ea Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Tue, 18 Mar 2025 12:27:20 -0600
Subject: [PATCH 01/26] Add more profiling

---
 .../gym_environments/base_overcooked_env.py   | 23 +++++++++++++++++++
 scripts/bash_scripts/profile.sh               |  8 ++++---
 scripts/profile_analyze.py                    |  4 ++++
 3 files changed, 32 insertions(+), 3 deletions(-)
 create mode 100644 scripts/profile_analyze.py

diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py
index 5108c8b..6c35882 100644
--- a/oai_agents/gym_environments/base_overcooked_env.py
+++ b/oai_agents/gym_environments/base_overcooked_env.py
@@ -234,6 +234,13 @@ def get_teammate_from_idx(self, idx):
         return self.teammates[id]
 
     def step(self, action):
+        import cProfile
+        import pstats
+        import time
+        profiler = cProfile.Profile()
+        profiler.enable()
+        
+        
         if len(self.teammates) == 0:
             raise ValueError('set_teammates must be set called before starting game.')
 
@@ -278,12 +285,24 @@ def step(self, action):
                 ratio = self.final_sparse_r_ratio
             reward = self.learner.calculate_reward(p_idx=self.p_idx, env_info=info, ratio=ratio, num_players=self.mdp.num_players)
         self.step_count += 1
+
+
+        profiler.disable()
+        c_time = time.strftime("%Y%m%d-%H%M%S")
+        profiler.dump_stats(f'data/profile/base_overcooked_env_step_{c_time}.prof')
         return self.get_obs(self.p_idx, done=done), reward, done, info
 
     def set_reset_p_idx(self, p_idx):
         self.reset_p_idx = p_idx
 
     def reset(self, p_idx=None):
+        import cProfile
+        import pstats
+        import time
+        profiler = cProfile.Profile()
+        profiler.enable()
+
+
         if p_idx is not None:
             self.p_idx = p_idx
         elif self.reset_p_idx is not None:
@@ -313,6 +332,10 @@ def reset(self, p_idx=None):
 
         # Reset subtask counts
         self.completed_tasks = [np.zeros(Subtasks.NUM_SUBTASKS), np.zeros(Subtasks.NUM_SUBTASKS)]
+        
+        profiler.disable()
+        c_time = time.strftime("%Y%m%d-%H%M%S")
+        profiler.dump_stats(f'data/profile/base_overcooked_env_reset_{c_time}.prof')
         return self.get_obs(self.p_idx, on_reset=True)
 
 
diff --git a/scripts/bash_scripts/profile.sh b/scripts/bash_scripts/profile.sh
index 116d85c..91e98d5 100644
--- a/scripts/bash_scripts/profile.sh
+++ b/scripts/bash_scripts/profile.sh
@@ -6,20 +6,22 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="counter_circuit"
 TOTAL_EGO_AGENTS=1
-EXP_DIR="Test/Profile"
 
-POP_FORCE_TRAINING=false
+POP_FORCE_TRAINING=true
 ADVERSARY_FORCE_TRAINING=false
 PRIMARY_FORCE_TRAINING=false
 
 source scripts/bash_scripts/env_config.sh
 # Overwrite the default values from env_config.sh here if needed:
+EXP_DIR="Test/Profile"
 N_ENVS=2
 WANDB_MODE="disabled"
 EPOCH_TIMESTEPS=100000
 N_X_SP_TOTAL_TRAINING_TIMESTEPS=200000
 
-python -m cProfile -o profile_results.prof scripts/train_agents.py \
+export CURRENT_TIME=$(date +"%Y-%m-%d_%H-%M-%S")
+
+python -m cProfile -o data/profile/profile_results_all_${CURRENT_TIME}.prof scripts/train_agents.py \
     --layout-names ${LAYOUT_NAMES} \
     --algo-name ${ALGO} \
     --exp-dir ${EXP_DIR} \
diff --git a/scripts/profile_analyze.py b/scripts/profile_analyze.py
new file mode 100644
index 0000000..0a33862
--- /dev/null
+++ b/scripts/profile_analyze.py
@@ -0,0 +1,4 @@
+import pstats
+
+p = pstats.Stats("data/profile/profile_results.prof")
+p.strip_dirs().sort_stats("cumulative").print_stats(20)  # Show top 20 functions

From 5d3016a07ec35e9f4d83f2deed78e251c06988fa Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Tue, 18 Mar 2025 12:29:15 -0600
Subject: [PATCH 02/26] mask cuda

---
 scripts/bash_scripts/profile.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/bash_scripts/profile.sh b/scripts/bash_scripts/profile.sh
index 91e98d5..3c73f7b 100644
--- a/scripts/bash_scripts/profile.sh
+++ b/scripts/bash_scripts/profile.sh
@@ -21,7 +21,7 @@ N_X_SP_TOTAL_TRAINING_TIMESTEPS=200000
 
 export CURRENT_TIME=$(date +"%Y-%m-%d_%H-%M-%S")
 
-python -m cProfile -o data/profile/profile_results_all_${CURRENT_TIME}.prof scripts/train_agents.py \
+CUDA_VISIBLE_DEVICES=1 python -m cProfile -o data/profile/profile_results_all_${CURRENT_TIME}.prof scripts/train_agents.py \
     --layout-names ${LAYOUT_NAMES} \
     --algo-name ${ALGO} \
     --exp-dir ${EXP_DIR} \

From 6b6297f550e17e863541c4165ff8655542d23a86 Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Tue, 18 Mar 2025 12:36:34 -0600
Subject: [PATCH 03/26] add args for profile_analyze

---
 scripts/profile_analyze.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/scripts/profile_analyze.py b/scripts/profile_analyze.py
index 0a33862..d7136f9 100644
--- a/scripts/profile_analyze.py
+++ b/scripts/profile_analyze.py
@@ -1,4 +1,9 @@
 import pstats
+import argparse
 
-p = pstats.Stats("data/profile/profile_results.prof")
-p.strip_dirs().sort_stats("cumulative").print_stats(20)  # Show top 20 functions
+parser = argparse.ArgumentParser()
+parser.add_argument("name", help="name of the profile file")
+args = parser.parse_args()
+name = args.name
+p = pstats.Stats(f"data/profile/{name}")
+p.strip_dirs().sort_stats("cumulative").print_stats(20)  # Show top 20 functions
\ No newline at end of file

From 864adf5ff1fae542f3228503844108ed54c0d218 Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Tue, 18 Mar 2025 12:45:10 -0600
Subject: [PATCH 04/26] remove profiling from step and reset

---
 .../gym_environments/base_overcooked_env.py   | 23 -------------------
 1 file changed, 23 deletions(-)

diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py
index 6c35882..5108c8b 100644
--- a/oai_agents/gym_environments/base_overcooked_env.py
+++ b/oai_agents/gym_environments/base_overcooked_env.py
@@ -234,13 +234,6 @@ def get_teammate_from_idx(self, idx):
         return self.teammates[id]
 
     def step(self, action):
-        import cProfile
-        import pstats
-        import time
-        profiler = cProfile.Profile()
-        profiler.enable()
-        
-        
         if len(self.teammates) == 0:
             raise ValueError('set_teammates must be set called before starting game.')
 
@@ -285,24 +278,12 @@ def step(self, action):
                 ratio = self.final_sparse_r_ratio
             reward = self.learner.calculate_reward(p_idx=self.p_idx, env_info=info, ratio=ratio, num_players=self.mdp.num_players)
         self.step_count += 1
-
-
-        profiler.disable()
-        c_time = time.strftime("%Y%m%d-%H%M%S")
-        profiler.dump_stats(f'data/profile/base_overcooked_env_step_{c_time}.prof')
         return self.get_obs(self.p_idx, done=done), reward, done, info
 
     def set_reset_p_idx(self, p_idx):
         self.reset_p_idx = p_idx
 
     def reset(self, p_idx=None):
-        import cProfile
-        import pstats
-        import time
-        profiler = cProfile.Profile()
-        profiler.enable()
-
-
         if p_idx is not None:
             self.p_idx = p_idx
         elif self.reset_p_idx is not None:
@@ -332,10 +313,6 @@ def reset(self, p_idx=None):
 
         # Reset subtask counts
         self.completed_tasks = [np.zeros(Subtasks.NUM_SUBTASKS), np.zeros(Subtasks.NUM_SUBTASKS)]
-        
-        profiler.disable()
-        c_time = time.strftime("%Y%m%d-%H%M%S")
-        profiler.dump_stats(f'data/profile/base_overcooked_env_reset_{c_time}.prof')
         return self.get_obs(self.p_idx, on_reset=True)
 
 

From df20f2c9e30a1fa1141548cef8101f3b411ddb84 Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Tue, 18 Mar 2025 12:48:23 -0600
Subject: [PATCH 05/26] add profiling for the learn funciton

---
 oai_agents/agents/base_agent.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py
index 32d7cb8..71962d9 100644
--- a/oai_agents/agents/base_agent.py
+++ b/oai_agents/agents/base_agent.py
@@ -225,7 +225,17 @@ def get_distribution(self, obs: th.Tensor):
         return dist
 
     def learn(self, epoch_timesteps):
+        import cProfile
+        import pstats
+        import time
+        profiler = cProfile.Profile()
+        profiler.enable()
+        
         self.agent.learn(total_timesteps=epoch_timesteps, reset_num_timesteps=False)
+        
+        profiler.disable()
+        c_time = time.strftime("%Y%m%d-%H%M%S")
+        profiler.dump_stats(f'data/profile/learn_{c_time}.prof')
         self.num_timesteps = self.agent.num_timesteps
 
     def save(self, path: Path) -> None:

From a459a9bd338a3a4dcc81f8b8a50f6d8a736bfbbf Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Tue, 18 Mar 2025 13:17:58 -0600
Subject: [PATCH 06/26] run step seperately

---
 oai_agents/common/overcooked_gui.py           |  1 -
 .../gym_environments/base_overcooked_env.py   | 25 ++++++++++++-------
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/oai_agents/common/overcooked_gui.py b/oai_agents/common/overcooked_gui.py
index 43eda13..65a4a3b 100644
--- a/oai_agents/common/overcooked_gui.py
+++ b/oai_agents/common/overcooked_gui.py
@@ -71,7 +71,6 @@ def __init__(self, args, layout_name=None, agent=None, teammates=None, p_idx=0,
         else:
             self.env = OvercookedGymEnv(layout_name=self.layout_name, args=args, ret_completed_subtasks=False,
                                         is_eval_env=True, horizon=horizon, learner_type='originaler',
-                                        
                                         )
         self.agent = agent
         self.p_idx = p_idx
diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py
index 5108c8b..23dac29 100644
--- a/oai_agents/gym_environments/base_overcooked_env.py
+++ b/oai_agents/gym_environments/base_overcooked_env.py
@@ -1,7 +1,7 @@
 from oai_agents.common.state_encodings import ENCODING_SCHEMES
 from oai_agents.common.subtasks import Subtasks, calculate_completed_subtask, get_doable_subtasks
 from oai_agents.common.learner import LearnerType, Learner
-from oai_agents.agents.agent_utils import CustomAgent
+from oai_agents.agents.agent_utils import CustomAgent, DummyAgent
 
 from overcooked_ai_py.mdp.overcooked_mdp import OvercookedGridworld, Action, Direction
 from overcooked_ai_py.mdp.overcooked_env import OvercookedEnv
@@ -335,15 +335,22 @@ def close(self):
 
 if __name__ == '__main__':
     from oai_agents.common.arguments import get_arguments
-
     args = get_arguments()
-    env = OvercookedGymEnv(p1=DummyAgent(),
-                           args=args)  # make('overcooked_ai.agents:OvercookedGymEnv-v0', layout='asymmetric_advantages', encoding_fn=encode_state, args=args)
-    print(check_env(env))
-    env.setup_visualization()
-    env.reset()
-    env.render()
+
+    args.num_players = 2
+
+    env = OvercookedGymEnv(layout_name=args.layout_names[0], args=args, ret_completed_subtasks=False,
+                            is_eval_env=True, horizon=400, learner_type='originaler')
+    
+    p_idx = 0    
+    teammates = [DummyAgent()]
+    
+    env.set_teammates(teammates)
+    env.reset(p_idx=p_idx)
     done = False
+    
     while not done:
-        obs, reward, done, info = env.step(Action.ACTION_TO_INDEX[np.random.choice(Action.ALL_ACTIONS)])
+        action = np.random.randint(0, Action.NUM_ACTIONS)
+        action_idx = Action.ACTION_TO_INDEX[Action.STAY]
+        obs, reward, done, info = env.step(action_idx)
         env.render()

From 3e6641b7721099cead17d1a38f505ecc6a938130 Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Tue, 18 Mar 2025 14:00:06 -0600
Subject: [PATCH 07/26] Add default for overcooked_sim

---
 oai_agents/agents/hrl.py                   |  2 +-
 oai_agents/agents/rl.py                    |  2 +-
 oai_agents/common/arguments.py             |  2 +-
 oai_agents/common/overcooked_simulation.py | 22 ++++++++++++++++++++++
 scripts/run_overcooked_game.py             |  4 ++--
 5 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/oai_agents/agents/hrl.py b/oai_agents/agents/hrl.py
index a9779c4..479f64a 100644
--- a/oai_agents/agents/hrl.py
+++ b/oai_agents/agents/hrl.py
@@ -5,7 +5,7 @@
 from oai_agents.common.arguments import get_arguments, get_args_to_save, set_args_from_load
 from oai_agents.common.subtasks import Subtasks
 # from oai_agents.gym_environments.worker_env import OvercookedSubtaskGymEnv
-from oai_agents.gym_environments.manager_env import OvercookedManagerGymEnv
+# from oai_agents.gym_environments.manager_env import OvercookedManagerGymEnv
 
 from overcooked_ai_py.mdp.overcooked_mdp import Action, OvercookedGridworld
 
diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py
index 2d2fb6a..ba62c19 100644
--- a/oai_agents/agents/rl.py
+++ b/oai_agents/agents/rl.py
@@ -4,7 +4,6 @@
 from oai_agents.common.state_encodings import ENCODING_SCHEMES
 from oai_agents.common.tags import AgentPerformance, TeamType, TeammatesCollection, KeyCheckpoints
 from oai_agents.agents.agent_utils import CustomAgent
-from oai_agents.gym_environments.base_overcooked_env import OvercookedGymEnv
 from oai_agents.common.checked_model_name_handler import CheckedModelNameHandler
 
 import numpy as np
@@ -192,6 +191,7 @@ def print_tc_helper(self, teammates_collection, message=None):
 
 
     def get_envs(self, _env, _eval_envs, deterministic, learner_type, start_timestep: int = 0):
+        from oai_agents.gym_environments.base_overcooked_env import OvercookedGymEnv
         if _env is None:
             env_kwargs = {'shape_rewards': True, 'full_init': False, 'stack_frames': self.use_frame_stack,
                         'deterministic': deterministic,'args': self.args, 'learner_type': learner_type, 'start_timestep': start_timestep}
diff --git a/oai_agents/common/arguments.py b/oai_agents/common/arguments.py
index 1e03433..dda7517 100644
--- a/oai_agents/common/arguments.py
+++ b/oai_agents/common/arguments.py
@@ -11,7 +11,7 @@ def get_arguments(additional_args=[]):
     :return:
     """
     parser = argparse.ArgumentParser(description='PyTorch Soft Actor-Critic Args')
-    parser.add_argument('--layout-names', help='Overcooked maps to use')
+    parser.add_argument('--layout-names', help='Overcooked maps to use', default='default')
     parser.add_argument('--horizon', type=int, default=400, help='Max timesteps in a rollout')
     parser.add_argument('--num_stack', type=int, default=3, help='Number of frame stacks to use in training if frame stacks are being used')
     parser.add_argument('--encoding-fn', type=str, default='OAI_egocentric',
diff --git a/oai_agents/common/overcooked_simulation.py b/oai_agents/common/overcooked_simulation.py
index 0af6cea..20ba9b0 100644
--- a/oai_agents/common/overcooked_simulation.py
+++ b/oai_agents/common/overcooked_simulation.py
@@ -85,3 +85,25 @@ def run_simulation(self, how_many_times):
             trajectory = self._run_simulation()
             trajectories.append(trajectory)
         return trajectories
+
+
+
+if __name__ == '__main__':
+    from oai_agents.common.arguments import get_arguments
+    from oai_agents.agents.agent_utils import DummyAgent, CustomAgent, load_agent
+    from pathlib import Path
+
+    args = get_arguments()
+    args.num_players = 2
+    args.layout_names = ['counter_circuit']
+    args.n_envs = 4
+    p_idx = 0
+
+    path = 'agent_models/Complex/2/SP_hd256_seed2602/last'
+    agent = load_agent(Path(path), args)
+
+    # teammates = [agent]
+    teammates = [CustomAgent(args=args, name='tm', trajectories={args.layout_names[0]: [(1, 1), (1, 2)]})]
+
+    simulation = OvercookedSimulation(args=args, agent=agent, teammates=teammates, layout_name=args.layout_names[0], p_idx=p_idx, horizon=400)
+    trajectories = simulation.run_simulation(how_many_times=4)
\ No newline at end of file
diff --git a/scripts/run_overcooked_game.py b/scripts/run_overcooked_game.py
index 3f03a43..4ed0dda 100644
--- a/scripts/run_overcooked_game.py
+++ b/scripts/run_overcooked_game.py
@@ -32,8 +32,8 @@ def get_teammate_from_pop_file(tm_name, tm_score, pop_path, layout_name):
     # teammates = [load_agent(Path(tm_path), args) for tm_path in teammates_path[:args.num_players - 1]]
 
     # trajectories = tile locations. Top left of the layout is (0, 0), bottom right is (M, N)
-    # teammates = [CustomAgent(args=args, name='human', trajectories={args.layout: [(8, 1), (8, 2), (7, 2), (6, 2)]})]
-    teammates = [DummyAgent(action='random') for _ in range(args.num_players - 1)]
+    teammates = [CustomAgent(args=args, name='human', trajectories={args.layout: [(1, 1), (1, 2)]})]
+    # teammates = [DummyAgent(action='random') for _ in range(args.num_players - 1)]
 
     # player_path = 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best'
     # player = load_agent(Path(player_path), args)

From f28cf44bc3cd017da6c35b3cb845b37a0c0d2107 Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Tue, 18 Mar 2025 15:26:13 -0600
Subject: [PATCH 08/26] small optimizations

---
 oai_agents/agents/base_agent.py               |  1 +
 .../gym_environments/base_overcooked_env.py   | 19 ++++++++++++++-----
 scripts/bash_scripts/profile.sh               |  4 ++--
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py
index 71962d9..616452d 100644
--- a/oai_agents/agents/base_agent.py
+++ b/oai_agents/agents/base_agent.py
@@ -320,6 +320,7 @@ def predict(self, obs, state=None, episode_start=None, deterministic=False):
         # Updated to include action masking
         self.policy.set_training_mode(False)
         obs, vectorized_env = self.policy.obs_to_tensor(obs)
+        
         with th.no_grad():
             if 'subtask_mask' in obs and np.prod(obs['subtask_mask'].shape) == np.prod(self.policy.action_space.n):
                 dist = self.policy.get_distribution(obs, action_masks=obs['subtask_mask'])
diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py
index 23dac29..43f45e3 100644
--- a/oai_agents/gym_environments/base_overcooked_env.py
+++ b/oai_agents/gym_environments/base_overcooked_env.py
@@ -195,6 +195,7 @@ def action_masks(self, p_idx):
         return get_doable_subtasks(self.state, self.prev_subtask[p_idx], self.layout_name, self.terrain, p_idx,
                                    self.valid_counters, USEABLE_COUNTERS.get(self.layout_name, 5)).astype(bool)
 
+
     def get_obs(self, c_idx, done=False, enc_fn=None, on_reset=False, goal_objects=None):
         enc_fn = enc_fn or self.encoding_fn
         obs = enc_fn(self.env.mdp, self.state, self.grid_shape, self.args.horizon, p_idx=c_idx,
@@ -237,19 +238,25 @@ def step(self, action):
         if len(self.teammates) == 0:
             raise ValueError('set_teammates must be set called before starting game.')
 
-        joint_action = [None for _ in range(self.mdp.num_players)]
+        # joint_action = [None for _ in range(self.mdp.num_players)]
+        # joint_action[self.p_idx] = action
+
+        joint_action = np.full(self.mdp.num_players, None, dtype=object)
         joint_action[self.p_idx] = action
+
         with th.no_grad():
             for t_idx in self.t_idxes:
                 teammate = self.get_teammate_from_idx(t_idx)
                 tm_obs = self.get_obs(c_idx=t_idx, enc_fn=teammate.encoding_fn)
-                if type(teammate) == CustomAgent:
+                # if type(teammate) == CustomAgent:
+                if isinstance(teammate, CustomAgent):
                     info = {'layout_name': self.layout_name, 'u_env_idx': self.unique_env_idx}
                     joint_action[t_idx] = teammate.predict(obs=tm_obs, deterministic=self.deterministic, info=info)[0]
                 else:
                     joint_action[t_idx] = teammate.predict(obs=tm_obs, deterministic=self.deterministic)[0]
 
-        joint_action = [Action.INDEX_TO_ACTION[(a.squeeze() if type(a) != int else a)] for a in joint_action]
+        # joint_action = [Action.INDEX_TO_ACTION[(a.squeeze() if type(a) != int else a)] for a in joint_action]
+        joint_action = [Action.INDEX_TO_ACTION[a.squeeze() if isinstance(a, np.ndarray) else a] for a in joint_action]
         self.joint_action = joint_action
 
         # If the state didn't change from the previous timestep and the agent is choosing the same action
@@ -260,7 +267,8 @@ def step(self, action):
                 joint_action = deepcopy(self.joint_action)
                 for t_idx in self.t_idxes:
                     tm = self.get_teammate_from_idx(t_idx)
-                    if type(tm) != CustomAgent:
+                    # if type(tm) != CustomAgent:
+                    if not isinstance(tm, CustomAgent):
                         joint_action[t_idx] = Direction.INDEX_TO_DIRECTION[self.step_count % 4]
             self.prev_state, self.prev_actions = deepcopy(self.state), deepcopy(joint_action)
 
@@ -268,7 +276,8 @@ def step(self, action):
         self.state, reward, done, info = self.env.step(joint_action)
         for t_idx in self.t_idxes: # Should be right after env.step
             tm = self.get_teammate_from_idx(t_idx)
-            if type(tm) == CustomAgent:
+            # if type(tm) == CustomAgent:
+            if isinstance(tm, CustomAgent):
                 tm.update_current_position(layout_name=self.layout_name, new_position=self.env.state.players[t_idx].position, u_env_idx=self.unique_env_idx)
 
         if self.shape_rewards and not self.is_eval_env:
diff --git a/scripts/bash_scripts/profile.sh b/scripts/bash_scripts/profile.sh
index 3c73f7b..00b09d4 100644
--- a/scripts/bash_scripts/profile.sh
+++ b/scripts/bash_scripts/profile.sh
@@ -16,8 +16,8 @@ source scripts/bash_scripts/env_config.sh
 EXP_DIR="Test/Profile"
 N_ENVS=2
 WANDB_MODE="disabled"
-EPOCH_TIMESTEPS=100000
-N_X_SP_TOTAL_TRAINING_TIMESTEPS=200000
+EPOCH_TIMESTEPS=10000
+N_X_SP_TOTAL_TRAINING_TIMESTEPS=10000
 
 export CURRENT_TIME=$(date +"%Y-%m-%d_%H-%M-%S")
 

From 3cd0a62a553ba02bf5a84cb417caae452613d32d Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Tue, 18 Mar 2025 16:16:17 -0600
Subject: [PATCH 09/26] Only use is instance

---
 oai_agents/gym_environments/base_overcooked_env.py | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py
index 43f45e3..d8a970b 100644
--- a/oai_agents/gym_environments/base_overcooked_env.py
+++ b/oai_agents/gym_environments/base_overcooked_env.py
@@ -238,10 +238,7 @@ def step(self, action):
         if len(self.teammates) == 0:
             raise ValueError('set_teammates must be set called before starting game.')
 
-        # joint_action = [None for _ in range(self.mdp.num_players)]
-        # joint_action[self.p_idx] = action
-
-        joint_action = np.full(self.mdp.num_players, None, dtype=object)
+        joint_action = [None for _ in range(self.mdp.num_players)]
         joint_action[self.p_idx] = action
 
         with th.no_grad():
@@ -255,8 +252,7 @@ def step(self, action):
                 else:
                     joint_action[t_idx] = teammate.predict(obs=tm_obs, deterministic=self.deterministic)[0]
 
-        # joint_action = [Action.INDEX_TO_ACTION[(a.squeeze() if type(a) != int else a)] for a in joint_action]
-        joint_action = [Action.INDEX_TO_ACTION[a.squeeze() if isinstance(a, np.ndarray) else a] for a in joint_action]
+        joint_action = [Action.INDEX_TO_ACTION[(a.squeeze() if type(a) != int else a)] for a in joint_action]
         self.joint_action = joint_action
 
         # If the state didn't change from the previous timestep and the agent is choosing the same action
@@ -309,7 +305,7 @@ def reset(self, p_idx=None):
         if self.reset_info and 'start_position' in self.reset_info:
             self.reset_info['start_position'] = {}
             for id in range(len(teammates_ids)):
-                if type(self.teammates[id]) == CustomAgent:
+                if not isinstance(self.teammates[id], CustomAgent):
                     self.teammates[id].reset()
                     self.reset_info['start_position'][teammates_ids[id]] = self.teammates[id].get_start_position(self.layout_name, u_env_idx=self.unique_env_idx)
         

From 4d3f76629f73a288f7ccd3e82f5b0b3292b1d925 Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Tue, 18 Mar 2025 16:25:55 -0600
Subject: [PATCH 10/26] Small bug fix

---
 oai_agents/gym_environments/base_overcooked_env.py | 2 +-
 scripts/bash_scripts/profile.sh                    | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py
index d8a970b..4d1d018 100644
--- a/oai_agents/gym_environments/base_overcooked_env.py
+++ b/oai_agents/gym_environments/base_overcooked_env.py
@@ -305,7 +305,7 @@ def reset(self, p_idx=None):
         if self.reset_info and 'start_position' in self.reset_info:
             self.reset_info['start_position'] = {}
             for id in range(len(teammates_ids)):
-                if not isinstance(self.teammates[id], CustomAgent):
+                if isinstance(self.teammates[id], CustomAgent):
                     self.teammates[id].reset()
                     self.reset_info['start_position'][teammates_ids[id]] = self.teammates[id].get_start_position(self.layout_name, u_env_idx=self.unique_env_idx)
         
diff --git a/scripts/bash_scripts/profile.sh b/scripts/bash_scripts/profile.sh
index 00b09d4..2b29397 100644
--- a/scripts/bash_scripts/profile.sh
+++ b/scripts/bash_scripts/profile.sh
@@ -14,10 +14,10 @@ PRIMARY_FORCE_TRAINING=false
 source scripts/bash_scripts/env_config.sh
 # Overwrite the default values from env_config.sh here if needed:
 EXP_DIR="Test/Profile"
-N_ENVS=2
+N_ENVS=50
 WANDB_MODE="disabled"
-EPOCH_TIMESTEPS=10000
-N_X_SP_TOTAL_TRAINING_TIMESTEPS=10000
+EPOCH_TIMESTEPS=75000
+N_X_SP_TOTAL_TRAINING_TIMESTEPS=75000
 
 export CURRENT_TIME=$(date +"%Y-%m-%d_%H-%M-%S")
 

From 7cee20b72ce368ecd3e2a780f9f2a61e6495362d Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Tue, 18 Mar 2025 16:36:47 -0600
Subject: [PATCH 11/26] revert isinstance :upside_down_face:

---
 .../gym_environments/base_overcooked_env.py       | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py
index 4d1d018..e75d8a2 100644
--- a/oai_agents/gym_environments/base_overcooked_env.py
+++ b/oai_agents/gym_environments/base_overcooked_env.py
@@ -245,8 +245,8 @@ def step(self, action):
             for t_idx in self.t_idxes:
                 teammate = self.get_teammate_from_idx(t_idx)
                 tm_obs = self.get_obs(c_idx=t_idx, enc_fn=teammate.encoding_fn)
-                # if type(teammate) == CustomAgent:
-                if isinstance(teammate, CustomAgent):
+                if type(teammate) == CustomAgent:
+                # if isinstance(teammate, CustomAgent):
                     info = {'layout_name': self.layout_name, 'u_env_idx': self.unique_env_idx}
                     joint_action[t_idx] = teammate.predict(obs=tm_obs, deterministic=self.deterministic, info=info)[0]
                 else:
@@ -263,8 +263,8 @@ def step(self, action):
                 joint_action = deepcopy(self.joint_action)
                 for t_idx in self.t_idxes:
                     tm = self.get_teammate_from_idx(t_idx)
-                    # if type(tm) != CustomAgent:
-                    if not isinstance(tm, CustomAgent):
+                    if type(tm) != CustomAgent:
+                    # if not isinstance(tm, CustomAgent):
                         joint_action[t_idx] = Direction.INDEX_TO_DIRECTION[self.step_count % 4]
             self.prev_state, self.prev_actions = deepcopy(self.state), deepcopy(joint_action)
 
@@ -272,8 +272,8 @@ def step(self, action):
         self.state, reward, done, info = self.env.step(joint_action)
         for t_idx in self.t_idxes: # Should be right after env.step
             tm = self.get_teammate_from_idx(t_idx)
-            # if type(tm) == CustomAgent:
-            if isinstance(tm, CustomAgent):
+            if type(tm) == CustomAgent:
+            # if isinstance(tm, CustomAgent):
                 tm.update_current_position(layout_name=self.layout_name, new_position=self.env.state.players[t_idx].position, u_env_idx=self.unique_env_idx)
 
         if self.shape_rewards and not self.is_eval_env:
@@ -305,7 +305,8 @@ def reset(self, p_idx=None):
         if self.reset_info and 'start_position' in self.reset_info:
             self.reset_info['start_position'] = {}
             for id in range(len(teammates_ids)):
-                if isinstance(self.teammates[id], CustomAgent):
+                # if isinstance(self.teammates[id], CustomAgent):
+                if type(self.teammates[id]) == CustomAgent:
                     self.teammates[id].reset()
                     self.reset_info['start_position'][teammates_ids[id]] = self.teammates[id].get_start_position(self.layout_name, u_env_idx=self.unique_env_idx)
         

From a9c8af0d3d567ce8e32d73d7eb4604677fd0c35a Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Thu, 20 Mar 2025 17:37:29 -0600
Subject: [PATCH 12/26] Perform some code cleaning

---
 oai_agents/agents/base_agent.py               | 56 ++++--------
 oai_agents/agents/rl.py                       | 90 ++++---------------
 .../gym_environments/base_overcooked_env.py   | 18 +++-
 scripts/bash_scripts/profile.sh               | 17 ++--
 scripts/train_agents.py                       |  3 +-
 scripts/utils/train_helper.py                 | 21 +++--
 6 files changed, 76 insertions(+), 129 deletions(-)

diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py
index 616452d..bd72ec1 100644
--- a/oai_agents/agents/base_agent.py
+++ b/oai_agents/agents/base_agent.py
@@ -2,7 +2,7 @@
 from oai_agents.common.arguments import get_args_to_save, set_args_from_load, get_arguments
 from oai_agents.common.state_encodings import ENCODING_SCHEMES
 from oai_agents.common.subtasks import calculate_completed_subtask, get_doable_subtasks, Subtasks
-from oai_agents.common.tags import AgentPerformance, TeamType, KeyCheckpoints
+from oai_agents.common.tags import AgentPerformance, TeamType, KeyCheckpoints, TeammatesCollection
 from oai_agents.common.checked_model_name_handler import CheckedModelNameHandler
 # from oai_agents.gym_environments.base_overcooked_env import USEABLE_COUNTERS
 
@@ -201,11 +201,7 @@ def predict(self, obs, state=None, episode_start=None, deterministic=False):
         self.policy.set_training_mode(False)
         obs, vectorized_env = self.policy.obs_to_tensor(obs)
         with th.no_grad():
-            if 'subtask_mask' in obs and np.prod(obs['subtask_mask'].shape) == np.prod(self.agent.action_space.n):
-                dist = self.policy.get_distribution(obs, action_masks=obs['subtask_mask'])
-            else:
-                dist = self.policy.get_distribution(obs)
-
+            dist = self.policy.get_distribution(obs)
             actions = dist.get_actions(deterministic=deterministic)
         # Convert to numpy, and reshape to the original action shape
         actions = actions.cpu().numpy().reshape((-1,) + self.agent.action_space.shape)
@@ -225,17 +221,14 @@ def get_distribution(self, obs: th.Tensor):
         return dist
 
     def learn(self, epoch_timesteps):
-        import cProfile
-        import pstats
-        import time
-        profiler = cProfile.Profile()
-        profiler.enable()
-        
+        # import cProfile
+        # import time
+        # profiler = cProfile.Profile()
+        # profiler.enable()
         self.agent.learn(total_timesteps=epoch_timesteps, reset_num_timesteps=False)
-        
-        profiler.disable()
-        c_time = time.strftime("%Y%m%d-%H%M%S")
-        profiler.dump_stats(f'data/profile/learn_{c_time}.prof')
+        # profiler.disable()
+        # c_time = time.strftime("%Y-%m-%d_%H-%M-%S")
+        # profiler.dump_stats(f'data/profile/learn_{c_time}.prof')
         self.num_timesteps = self.agent.num_timesteps
 
     def save(self, path: Path) -> None:
@@ -381,10 +374,6 @@ def __init__(self, name, args, seed=None):
             if th.cuda.is_available():
                 th.cuda.manual_seed_all(seed)
             th.backends.cudnn.deterministic = True
-
-        self.eval_teammates_collection = {}
-        self.teammates_collection = {}
-
         # For environment splits while training
         self.n_layouts = len(self.args.layout_names)
         self.splits = []
@@ -424,16 +413,13 @@ def evaluate(self, eval_agent, num_eps_per_layout_per_tm=5, visualize=False, tim
         selected_p_indexes = random.sample(range(self.args.num_players), min(3, self.args.num_players))
 
         for _, env in enumerate(self.eval_envs):
+            
             rew_per_layout_per_teamtype[env.layout_name] = {
-                teamtype: [] for teamtype in self.eval_teammates_collection[env.layout_name]
+                teamtype: [] for teamtype in env.teammates_collection[TeammatesCollection.EVAL][env.layout_name]
             }
             rew_per_layout[env.layout_name] = 0
-
-            teamtypes_population = self.eval_teammates_collection[env.layout_name]
-
-            for teamtype in teamtypes_population:
-                teammates = teamtypes_population[teamtype][np.random.randint(len(teamtypes_population[teamtype]))]
-                env.set_teammates(teammates)
+            for teamtype in env.teammates_collection[TeammatesCollection.EVAL][env.layout_name]:
+                env.set_teammates(teamtype=teamtype)
 
                 for p_idx in selected_p_indexes:
                     env.set_reset_p_idx(p_idx)
@@ -458,21 +444,9 @@ def evaluate(self, eval_agent, num_eps_per_layout_per_tm=5, visualize=False, tim
         return np.mean(tot_mean_reward), rew_per_layout, rew_per_layout_per_teamtype
 
 
-    def set_new_teammates(self, curriculum):
+    def set_new_teammates(self):
         for i in range(self.args.n_envs):
-            layout_name = self.env.env_method('get_layout_name', indices=i)[0]
-            population_teamtypes = self.teammates_collection[layout_name]
-
-            teammates = curriculum.select_teammates_for_layout(population_teamtypes=population_teamtypes,
-                                                               layout=layout_name)
-
-            assert len(teammates) == self.args.teammates_len
-            assert type(teammates) == list
-
-            for teammate in teammates:
-                assert type(teammate) in [SB3Wrapper, CustomAgent]
-
-            self.env.env_method('set_teammates', teammates, indices=i)
+            self.env.env_method('set_teammates', indices=i)
 
 
     def get_agents(self) -> List[OAIAgent]:
diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py
index ba62c19..402ccf9 100644
--- a/oai_agents/agents/rl.py
+++ b/oai_agents/agents/rl.py
@@ -16,7 +16,9 @@
 import os
 from typing import Optional
 
-VEC_ENV_CLS = DummyVecEnv #
+# VEC_ENV_CLS = DummyVecEnv #
+VEC_ENV_CLS = SubprocVecEnv
+
 
 class RLAgentTrainer(OAITrainer):
     ''' Train an RL agent to play with a teammates_collection of agents.'''
@@ -59,19 +61,17 @@ def __init__(
         self.use_policy_clone = use_policy_clone
 
         self.learner_type = learner_type
-        self.env, self.eval_envs = self.get_envs(env, eval_envs, deterministic, learner_type, start_timestep)
+        self.env, self.eval_envs = self.get_envs(_env=env, _eval_envs=eval_envs,
+                                                 deterministic=deterministic, learner_type=learner_type,
+                                                 start_timestep=start_timestep, teammates_collection=teammates_collection,
+                                                 curriculum=self.curriculum)
+
         # Episode to start training from (usually 0 unless restarted)
         self.start_step = start_step
         self.steps = self.start_step
         # Cumm. timestep to start training from (usually 0 unless restarted)
         self.start_timestep = start_timestep
         self.learning_agent, self.agents = self.get_learning_agent(agent)
-        self.teammates_collection, self.eval_teammates_collection = self.get_teammates_collection(
-            _tms_clctn = teammates_collection,
-            learning_agent = self.learning_agent,
-            train_types = train_types,
-            eval_types = eval_types
-        )
         self.best_score, self.best_training_rew = -1, float('-inf')
 
     @classmethod
@@ -120,62 +120,6 @@ def get_learning_agent(self, agent):
         agents = [learning_agent]
         return learning_agent, agents
 
-
-    def get_teammates_collection(self, _tms_clctn, learning_agent, train_types=[], eval_types=[]):
-        '''
-        Returns a dictionary of teammates_collection for training and evaluation
-            dict
-            teammates_collection = {
-                'layout_name': {
-                    'TeamType.HIGH_FIRST': [[agent1, agent2], ...],
-                    'TeamType.MEDIUM_FIRST': [[agent3, agent4], ...],
-                    'TeamType.LOW_FIRST': [[agent5, agent6], ..],
-                    'TeamType.RANDOM': [[agent7, agent8], ...],
-                },
-            }
-        '''
-        if _tms_clctn == {}:
-            _tms_clctn = {
-                TeammatesCollection.TRAIN: {
-                    layout_name:
-                        {TeamType.SELF_PLAY: [[learning_agent for _ in range(self.teammates_len)]]}
-                    for layout_name in self.args.layout_names
-                },
-                TeammatesCollection.EVAL: {
-                    layout_name:
-                        {TeamType.SELF_PLAY: [[learning_agent for _ in range(self.teammates_len)]]}
-                    for layout_name in self.args.layout_names
-                }
-            }
-
-        else:
-            for layout in self.args.layout_names:
-                for tt in _tms_clctn[TeammatesCollection.TRAIN][layout]:
-                    if tt == TeamType.SELF_PLAY:
-                        _tms_clctn[TeammatesCollection.TRAIN][layout][TeamType.SELF_PLAY] = [[learning_agent for _ in range(self.teammates_len)]]
-                for tt in _tms_clctn[TeammatesCollection.EVAL][layout]:
-                    if tt == TeamType.SELF_PLAY:
-                        _tms_clctn[TeammatesCollection.EVAL][layout][TeamType.SELF_PLAY] = [[learning_agent for _ in range(self.teammates_len)]]
-
-        train_teammates_collection = _tms_clctn[TeammatesCollection.TRAIN]
-        eval_teammates_collection = _tms_clctn[TeammatesCollection.EVAL]
-
-        if train_types:
-            train_teammates_collection = {
-                layout: {team_type: train_teammates_collection[layout][team_type] for team_type in train_types}
-                for layout in train_teammates_collection
-            }
-        if eval_types:
-            eval_teammates_collection = {
-                layout: {team_type: eval_teammates_collection[layout][team_type] for team_type in eval_types}
-                for layout in eval_teammates_collection
-            }
-
-        self.check_teammates_collection_structure(train_teammates_collection)
-        self.check_teammates_collection_structure(eval_teammates_collection)
-        return train_teammates_collection, eval_teammates_collection
-
-
     def print_tc_helper(self, teammates_collection, message=None):
         print("-------------------")
         if message:
@@ -190,15 +134,19 @@ def print_tc_helper(self, teammates_collection, message=None):
         print("-------------------")
 
 
-    def get_envs(self, _env, _eval_envs, deterministic, learner_type, start_timestep: int = 0):
+    def get_envs(self, _env, _eval_envs, deterministic, learner_type, teammates_collection, curriculum, start_timestep: int = 0):
         from oai_agents.gym_environments.base_overcooked_env import OvercookedGymEnv
         if _env is None:
             env_kwargs = {'shape_rewards': True, 'full_init': False, 'stack_frames': self.use_frame_stack,
-                        'deterministic': deterministic,'args': self.args, 'learner_type': learner_type, 'start_timestep': start_timestep}
+                        'deterministic': deterministic,'args': self.args, 'learner_type': learner_type, 'start_timestep': start_timestep, 
+                        'teammates_collection': teammates_collection, 'curriculum': curriculum
+                        }
             env = make_vec_env(OvercookedGymEnv, n_envs=self.args.n_envs, seed=self.seed, vec_env_cls=VEC_ENV_CLS, env_kwargs=env_kwargs)
 
             eval_envs_kwargs = {'is_eval_env': True, 'horizon': 400, 'stack_frames': self.use_frame_stack,
-                                 'deterministic': deterministic, 'args': self.args, 'learner_type': learner_type}
+                                 'deterministic': deterministic, 'args': self.args, 'learner_type': learner_type, 
+                                 'teammates_collection': teammates_collection, 'curriculum': curriculum
+                                 }
             eval_envs = [OvercookedGymEnv(**{'env_index': i, **eval_envs_kwargs, 'unique_env_idx':self.args.n_envs+i}) for i in range(self.n_layouts)]
         else:
             env = _env
@@ -282,10 +230,9 @@ def should_evaluate(self, steps):
 
     def log_details(self, experiment_name, total_train_timesteps):
         print("Training agent: " + self.name + ", for experiment: " + experiment_name)
-        self.print_tc_helper(self.teammates_collection, "Train TC")
-        self.print_tc_helper(self.eval_teammates_collection, "Eval TC")
+        self.print_tc_helper(self.eval_envs[0].teammates_collection[TeammatesCollection.EVAL], "Eval TC")
+        self.print_tc_helper(self.eval_envs[0].teammates_collection[TeammatesCollection.TRAIN], "Train TC")
         self.curriculum.print_curriculum()
-        print("How Long: ", self.args.how_long)
         print(f"Epoch timesteps: {self.epoch_timesteps}")
         print(f"Total training timesteps: {total_train_timesteps}")
         print(f"Number of environments: {self.n_envs}")
@@ -345,10 +292,11 @@ def train_agents(self, total_train_timesteps, tag_for_returning_agent, resume_ck
 
         while self.learning_agent.num_timesteps < total_train_timesteps:
             self.curriculum.update(current_step=self.steps)
-            self.set_new_teammates(curriculum=self.curriculum)
+            self.set_new_teammates()
 
             # In each iteration the agent collects n_envs * n_steps experiences. This continues until self.learning_agent.num_timesteps > epoch_timesteps is reached.
             self.learning_agent.learn(self.epoch_timesteps)
+
             self.steps += 1
 
             if self.should_evaluate(steps=self.steps):
diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py
index e75d8a2..9d6d220 100644
--- a/oai_agents/gym_environments/base_overcooked_env.py
+++ b/oai_agents/gym_environments/base_overcooked_env.py
@@ -2,6 +2,7 @@
 from oai_agents.common.subtasks import Subtasks, calculate_completed_subtask, get_doable_subtasks
 from oai_agents.common.learner import LearnerType, Learner
 from oai_agents.agents.agent_utils import CustomAgent, DummyAgent
+from oai_agents.common.tags import AgentPerformance, TeamType, TeammatesCollection
 
 from overcooked_ai_py.mdp.overcooked_mdp import OvercookedGridworld, Action, Direction
 from overcooked_ai_py.mdp.overcooked_env import OvercookedEnv
@@ -35,7 +36,7 @@
 class OvercookedGymEnv(Env):
     metadata = {'render.modes': ['human']}
 
-    def __init__(self, learner_type, grid_shape=None, ret_completed_subtasks=False, stack_frames=False, is_eval_env=False,
+    def __init__(self, learner_type, teammates_collection, curriculum, grid_shape=None, ret_completed_subtasks=False, stack_frames=False, is_eval_env=False,
                  shape_rewards=False, enc_fn=None, full_init=True, args=None, deterministic=False, start_timestep: int = 0,
                  **kwargs):
         self.is_eval_env = is_eval_env
@@ -89,6 +90,8 @@ def __init__(self, learner_type, grid_shape=None, ret_completed_subtasks=False,
         self.reset_p_idx = None
 
         self.learner = Learner(learner_type, args.reward_magnifier)
+        self.teammates_collection = teammates_collection
+        self.curriculum = curriculum
 
         self.dynamic_reward = args.dynamic_reward
         self.final_sparse_r_ratio = args.final_sparse_r_ratio
@@ -154,11 +157,17 @@ def get_layout_name(self):
     def get_joint_action(self):
         return self.joint_action
 
-    def set_teammates(self, teammates):
-        assert isinstance(teammates, list)
+    def set_teammates(self, teamtype=None):
+        if teamtype:
+            assert self.is_eval_env is True, "Teamtype should only be set for evaluation environments"
+            population_teamtypes = self.teammates_collection[TeammatesCollection.EVAL][self.layout_name]
+            teammates = population_teamtypes[teamtype][np.random.randint(len(population_teamtypes[teamtype]))]
+        else:    
+            population_teamtypes = self.teammates_collection[TeammatesCollection.TRAIN][self.layout_name]
+            teammates = self.curriculum.select_teammates_for_layout(population_teamtypes=population_teamtypes, layout=self.layout_name)
+
         self.teammates = teammates
         self.reset_info['start_position'] = {}
-
         for t_idx in self.t_idxes:
             tm = self.get_teammate_from_idx(t_idx)
             if tm.get_start_position(self.layout_name, u_env_idx=self.unique_env_idx) is not None:
@@ -245,6 +254,7 @@ def step(self, action):
             for t_idx in self.t_idxes:
                 teammate = self.get_teammate_from_idx(t_idx)
                 tm_obs = self.get_obs(c_idx=t_idx, enc_fn=teammate.encoding_fn)
+
                 if type(teammate) == CustomAgent:
                 # if isinstance(teammate, CustomAgent):
                     info = {'layout_name': self.layout_name, 'u_env_idx': self.unique_env_idx}
diff --git a/scripts/bash_scripts/profile.sh b/scripts/bash_scripts/profile.sh
index 2b29397..36eebea 100644
--- a/scripts/bash_scripts/profile.sh
+++ b/scripts/bash_scripts/profile.sh
@@ -1,23 +1,26 @@
 #!/bin/sh
 
-ALGO="SP"
+ALGO="SPN_XSPCKP"
 TEAMMATES_LEN=1
 HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="counter_circuit"
-TOTAL_EGO_AGENTS=1
+TOTAL_EGO_AGENTS=2
 
-POP_FORCE_TRAINING=true
+POP_FORCE_TRAINING=false
 ADVERSARY_FORCE_TRAINING=false
-PRIMARY_FORCE_TRAINING=false
+PRIMARY_FORCE_TRAINING=true
 
 source scripts/bash_scripts/env_config.sh
 # Overwrite the default values from env_config.sh here if needed:
-EXP_DIR="Test/Profile"
-N_ENVS=50
+EXP_DIR="Test/Profile/pop"
+N_ENVS=10
 WANDB_MODE="disabled"
-EPOCH_TIMESTEPS=75000
+EPOCH_TIMESTEPS=7500
 N_X_SP_TOTAL_TRAINING_TIMESTEPS=75000
+FCP_TOTAL_TRAINING_TIMESTEPS=75000
+CUSTOM_AGENT_CK_RATE_GENERATION=1
+# POP_TOTAL_TRAINING_TIMESTEPS=300000
 
 export CURRENT_TIME=$(date +"%Y-%m-%d_%H-%M-%S")
 
diff --git a/scripts/train_agents.py b/scripts/train_agents.py
index 92d2a1c..52c15ff 100644
--- a/scripts/train_agents.py
+++ b/scripts/train_agents.py
@@ -240,7 +240,8 @@ def SPN_XSPCKP(args) -> None:
         TeamType.SELF_PLAY_STATIC_ADV,
     ]
     primary_eval_types = {
-        'generate': [TeamType.SELF_PLAY_HIGH,
+        'generate': [
+                    TeamType.SELF_PLAY_HIGH,
                      TeamType.SELF_PLAY_LOW,
                      TeamType.SELF_PLAY_DYNAMIC_ADV,
                      TeamType.SELF_PLAY_STATIC_ADV,
diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py
index cf65044..15695aa 100644
--- a/scripts/utils/train_helper.py
+++ b/scripts/utils/train_helper.py
@@ -123,6 +123,15 @@ def gen_ADV_train_N_X_SP(args, population, curriculum, unseen_teammates_len, n_x
 
     init_agent = load_agents(args, name=heatmap_source.name, tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL, force_training=False)[0]
 
+    # init_agent = RLAgentTrainer.generate_randomly_initialized_agent( # need a cleaner way to do this
+    #         args=args,
+    #         name=name,
+    #         learner_type=args.primary_learner_type,
+    #         hidden_dim=args.N_X_SP_h_dim,
+    #         seed=args.N_X_SP_seed,
+    #         n_envs=args.n_envs
+    #) 
+
     teammates_collection = generate_TC(args=args,
                                         population=population,
                                         agent=init_agent,
@@ -132,6 +141,7 @@ def gen_ADV_train_N_X_SP(args, population, curriculum, unseen_teammates_len, n_x
                                         unseen_teammates_len=unseen_teammates_len,
                                         use_entire_population_for_train_types_teammates=True)
 
+
     adversaries = generate_adversaries_based_on_heatmap(args=args, heatmap_source=heatmap_source, current_adversaries={}, teammates_collection=teammates_collection, train_types=curriculum.train_types)
 
     total_train_timesteps = args.n_x_sp_total_training_timesteps // args.custom_agent_ck_rate_generation
@@ -157,7 +167,6 @@ def gen_ADV_train_N_X_SP(args, population, curriculum, unseen_teammates_len, n_x
                                                                             adversaries=adversaries)
         init_agent.name = name
         args.ck_list_offset = (args.num_of_ckpoints - 1) + ((args.num_of_ckpoints - 1) * round // (args.custom_agent_ck_rate_generation))
-
         n_x_sp_types_trainer = RLAgentTrainer(name=name,
                                                 args=args,
                                                 agent=init_agent,
@@ -170,9 +179,10 @@ def gen_ADV_train_N_X_SP(args, population, curriculum, unseen_teammates_len, n_x
                                                 learner_type=args.primary_learner_type,
                                                 checkpoint_rate= ck_rate,
                                                 )
-
-        n_x_sp_types_trainer.train_agents(total_train_timesteps = total_train_timesteps*(round + 1) + args.pop_total_training_timesteps,
-                                                    tag_for_returning_agent=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL)
+        train_time = total_train_timesteps * (round + 1)
+        # train_time = total_train_timesteps*(round + 1) + args.pop_total_training_timesteps
+        n_x_sp_types_trainer.train_agents(total_train_timesteps=train_time,
+                                          tag_for_returning_agent=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL)
         init_agent = n_x_sp_types_trainer.agents[0]
         new_adversaries = generate_adversaries_based_on_heatmap(args=args, heatmap_source=init_agent, current_adversaries=adversaries, teammates_collection=teammates_collection, train_types=curriculum.train_types)
         adversaries = {key: adversaries.get(key, []) + new_adversaries.get(key, []) for key in set(adversaries) | set(new_adversaries)}
@@ -285,7 +295,8 @@ def N_X_SP(args, population, curriculum, unseen_teammates_len, n_x_sp_eval_types
         learner_type=args.primary_learner_type,
         hidden_dim=args.N_X_SP_h_dim,
         seed=args.N_X_SP_seed,
-        n_envs=args.n_envs
+        n_envs=args.n_envs,
+         
     )
 
     teammates_collection = generate_TC(

From a6ada3370233cc58f83b75ae03004682209baac7 Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Fri, 21 Mar 2025 11:34:32 -0600
Subject: [PATCH 13/26] Some cleanups

---
 oai_agents/common/arguments.py                   |  3 ++-
 {scripts => sandbox}/generate_agents_for_eval.py |  0
 {scripts => sandbox}/table_creator.py            |  0
 {scripts => sandbox}/training_chart.py           |  0
 scripts/bash_scripts/profile.sh                  |  2 +-
 scripts/{ => eval_scripts}/avg_perf_chart.py     |  0
 .../avg_perf_chart_multi_seed.py                 |  0
 .../{ => eval_scripts}/avg_perf_chart_unified.py |  0
 scripts/{ => eval_scripts}/evaluate_agents.py    |  0
 scripts/{ => eval_scripts}/evaluate_agents_v2.py |  0
 scripts/train_agents.py                          | 16 ++++++++--------
 11 files changed, 11 insertions(+), 10 deletions(-)
 rename {scripts => sandbox}/generate_agents_for_eval.py (100%)
 rename {scripts => sandbox}/table_creator.py (100%)
 rename {scripts => sandbox}/training_chart.py (100%)
 rename scripts/{ => eval_scripts}/avg_perf_chart.py (100%)
 rename scripts/{ => eval_scripts}/avg_perf_chart_multi_seed.py (100%)
 rename scripts/{ => eval_scripts}/avg_perf_chart_unified.py (100%)
 rename scripts/{ => eval_scripts}/evaluate_agents.py (100%)
 rename scripts/{ => eval_scripts}/evaluate_agents_v2.py (100%)

diff --git a/oai_agents/common/arguments.py b/oai_agents/common/arguments.py
index dda7517..786ed15 100644
--- a/oai_agents/common/arguments.py
+++ b/oai_agents/common/arguments.py
@@ -105,7 +105,8 @@ def get_arguments(additional_args=[]):
 
     args = parser.parse_args()
     args.base_dir = Path(args.base_dir)
-    args.device = th.device('cuda' if th.cuda.is_available() else 'cpu')
+    # args.device = th.device('cuda' if th.cuda.is_available() else 'cpu')
+    args.device = th.device('cpu')
     args.layout_names = args.layout_names.split(',')
 
     return args
diff --git a/scripts/generate_agents_for_eval.py b/sandbox/generate_agents_for_eval.py
similarity index 100%
rename from scripts/generate_agents_for_eval.py
rename to sandbox/generate_agents_for_eval.py
diff --git a/scripts/table_creator.py b/sandbox/table_creator.py
similarity index 100%
rename from scripts/table_creator.py
rename to sandbox/table_creator.py
diff --git a/scripts/training_chart.py b/sandbox/training_chart.py
similarity index 100%
rename from scripts/training_chart.py
rename to sandbox/training_chart.py
diff --git a/scripts/bash_scripts/profile.sh b/scripts/bash_scripts/profile.sh
index 36eebea..ac772ce 100644
--- a/scripts/bash_scripts/profile.sh
+++ b/scripts/bash_scripts/profile.sh
@@ -24,7 +24,7 @@ CUSTOM_AGENT_CK_RATE_GENERATION=1
 
 export CURRENT_TIME=$(date +"%Y-%m-%d_%H-%M-%S")
 
-CUDA_VISIBLE_DEVICES=1 python -m cProfile -o data/profile/profile_results_all_${CURRENT_TIME}.prof scripts/train_agents.py \
+python -m cProfile -o data/profile/profile_results_all_${CURRENT_TIME}.prof scripts/train_agents.py \
     --layout-names ${LAYOUT_NAMES} \
     --algo-name ${ALGO} \
     --exp-dir ${EXP_DIR} \
diff --git a/scripts/avg_perf_chart.py b/scripts/eval_scripts/avg_perf_chart.py
similarity index 100%
rename from scripts/avg_perf_chart.py
rename to scripts/eval_scripts/avg_perf_chart.py
diff --git a/scripts/avg_perf_chart_multi_seed.py b/scripts/eval_scripts/avg_perf_chart_multi_seed.py
similarity index 100%
rename from scripts/avg_perf_chart_multi_seed.py
rename to scripts/eval_scripts/avg_perf_chart_multi_seed.py
diff --git a/scripts/avg_perf_chart_unified.py b/scripts/eval_scripts/avg_perf_chart_unified.py
similarity index 100%
rename from scripts/avg_perf_chart_unified.py
rename to scripts/eval_scripts/avg_perf_chart_unified.py
diff --git a/scripts/evaluate_agents.py b/scripts/eval_scripts/evaluate_agents.py
similarity index 100%
rename from scripts/evaluate_agents.py
rename to scripts/eval_scripts/evaluate_agents.py
diff --git a/scripts/evaluate_agents_v2.py b/scripts/eval_scripts/evaluate_agents_v2.py
similarity index 100%
rename from scripts/evaluate_agents_v2.py
rename to scripts/eval_scripts/evaluate_agents_v2.py
diff --git a/scripts/train_agents.py b/scripts/train_agents.py
index 52c15ff..40b0fba 100644
--- a/scripts/train_agents.py
+++ b/scripts/train_agents.py
@@ -278,15 +278,15 @@ def SPN_XSPCKP(args) -> None:
     elif args.algo_name == 'FCP_traditional':
         FCP_traditional(args=args)
     
-    elif args.algo_name == 'FCP_mhri':
-        FCP_mhri(args=args)
+    # elif args.algo_name == 'FCP_mhri':
+    #     FCP_mhri(args=args)
     
-    elif args.algo_name == 'SPN_1ADV':
-        SPN_1ADV(args=args)
+    # elif args.algo_name == 'SPN_1ADV':
+    #     SPN_1ADV(args=args)
     
-    elif args.algo_name == 'N_1_FCP':
-        N_1_FCP(args=args)
+    # elif args.algo_name == 'N_1_FCP':
+    #     N_1_FCP(args=args)
     
-    elif args.algo_name == 'SPN_1ADV_XSPCKP':
-        SPN_1ADV_XSPCKP(args=args)
+    # elif args.algo_name == 'SPN_1ADV_XSPCKP':
+    #     SPN_1ADV_XSPCKP(args=args)
 

From 9bd10051ca10d77c0080334725c5639071f1c248 Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Fri, 21 Mar 2025 16:28:53 -0600
Subject: [PATCH 14/26] Add scripts to run the best ego

---
 oai_agents/agents/rl.py                       |  2 +-
 oai_agents/common/arguments.py                |  8 +++
 sandbox/profile_analyze.py                    |  9 +++
 .../best_baseline_experiment/c1_best_ego.sh   | 67 +++++++++++++++++++
 .../best_baseline_experiment/c2_best_ego.sh   | 67 +++++++++++++++++++
 .../best_baseline_experiment/c3_best_ego.sh   | 67 +++++++++++++++++++
 .../best_baseline_experiment/c4_best_ego.sh   | 67 +++++++++++++++++++
 scripts/bash_scripts/env_config.sh            |  2 +-
 scripts/train_agents.py                       | 35 ++++++++++
 scripts/utils/__init__.py                     |  2 +-
 scripts/utils/train_helper.py                 | 66 +++++++++++++++++-
 11 files changed, 388 insertions(+), 4 deletions(-)
 create mode 100644 sandbox/profile_analyze.py
 create mode 100644 scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh
 create mode 100644 scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh
 create mode 100644 scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh
 create mode 100644 scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh

diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py
index 402ccf9..b8e6631 100644
--- a/oai_agents/agents/rl.py
+++ b/oai_agents/agents/rl.py
@@ -130,7 +130,7 @@ def print_tc_helper(self, teammates_collection, message=None):
                 teammates_c = teammates_collection[layout_name][tag]
                 for teammates in teammates_c:
                     for agent in teammates:
-                        print(f'\t{agent.name}, score for layout {layout_name} is: {agent.layout_scores[layout_name]}, start_pos: {agent.get_start_position(layout_name, 0)}, len: {len(teammates)}')
+                        print(f'\t{agent.name}, score for layout {layout_name} is:, start_pos: {agent.get_start_position(layout_name, 0)}, len: {len(teammates)}')
         print("-------------------")
 
 
diff --git a/oai_agents/common/arguments.py b/oai_agents/common/arguments.py
index 786ed15..c991dea 100644
--- a/oai_agents/common/arguments.py
+++ b/oai_agents/common/arguments.py
@@ -100,6 +100,11 @@ def get_arguments(additional_args=[]):
     parser.add_argument("--total-ego-agents", type=int, default=4)
     parser.add_argument("--ck-list-offset", type=int, default=0)
 
+    parser.add_argument('--low-perfs', help='shitty code to run ult baseline exp', default='default')
+    parser.add_argument('--med-perfs', help='shitty code to run ult baseline exp', default='default')
+    parser.add_argument('--high-perfs', help='shitty code to run ult baseline exp', default='default')
+
+
     for parser_arg, parser_kwargs in additional_args:
         parser.add_argument(parser_arg, **parser_kwargs)
 
@@ -108,6 +113,9 @@ def get_arguments(additional_args=[]):
     # args.device = th.device('cuda' if th.cuda.is_available() else 'cpu')
     args.device = th.device('cpu')
     args.layout_names = args.layout_names.split(',')
+    args.low_perfs = args.low_perfs.split(',')
+    args.med_perfs = args.med_perfs.split(',')
+    args.high_perfs = args.high_perfs.split(',')
 
     return args
 
diff --git a/sandbox/profile_analyze.py b/sandbox/profile_analyze.py
new file mode 100644
index 0000000..d7136f9
--- /dev/null
+++ b/sandbox/profile_analyze.py
@@ -0,0 +1,9 @@
+import pstats
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument("name", help="name of the profile file")
+args = parser.parse_args()
+name = args.name
+p = pstats.Stats(f"data/profile/{name}")
+p.strip_dirs().sort_stats("cumulative").print_stats(20)  # Show top 20 functions
\ No newline at end of file
diff --git a/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh
new file mode 100644
index 0000000..9ac5db5
--- /dev/null
+++ b/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+
+ALGO="best_EGO"
+TEAMMATES_LEN=1
+HOW_LONG=20
+NUM_OF_CKPOINTS=40
+LAYOUT_NAMES="c1"
+EXP_DIR=${LAYOUT_NAMES}
+TOTAL_EGO_AGENTS=1
+QUICK_TEST=false
+
+L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0"
+L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0"
+L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0"
+L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0"
+
+M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0"
+M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0"
+M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0"
+M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0"
+
+H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best"
+H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best"
+H2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/best"
+H3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/best"
+
+L="${L0},${L1},${L2},${L3}"
+M="${M0},${M1},${M2},${M3}"
+H="${H0},${H1},${H2},${H3}"
+
+WANDB_MODE="online"
+POP_FORCE_TRAINING=false
+ADVERSARY_FORCE_TRAINING=false
+PRIMARY_FORCE_TRAINING=false
+
+source scripts/bash_scripts/env_config.sh
+
+python scripts/train_agents.py \
+    --layout-names ${LAYOUT_NAMES} \
+    --algo-name ${ALGO} \
+    --exp-dir ${EXP_DIR} \
+    --num-of-ckpoints ${NUM_OF_CKPOINTS} \
+    --teammates-len ${TEAMMATES_LEN} \
+    --num-players ${NUM_PLAYERS} \
+    --custom-agent-ck-rate-generation ${CUSTOM_AGENT_CK_RATE_GENERATION} \
+    --num-steps-in-traj-for-dyn-adv ${NUM_STEPS_IN_TRAJ_FOR_DYN_ADV} \
+    --num-static-advs-per-heatmap ${NUM_STATIC_ADVS_PER_HEATMAP} \
+    --num-dynamic-advs-per-heatmap ${NUM_DYNAMIC_ADVS_PER_HEATMAP} \
+    --use-val-func-for-heatmap-gen ${USE_VAL_FUNC_FOR_HEATMAP_GEN} \
+    --prioritized-sampling ${PRIORITIZED_SAMPLING} \
+    --n-envs ${N_ENVS} \
+    --epoch-timesteps ${EPOCH_TIMESTEPS} \
+    --pop-total-training-timesteps ${POP_TOTAL_TRAINING_TIMESTEPS} \
+    --n-x-sp-total-training-timesteps ${N_X_SP_TOTAL_TRAINING_TIMESTEPS} \
+    --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
+    --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
+    --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
+    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --wandb-mode ${WANDB_MODE} \
+    --pop-force-training ${POP_FORCE_TRAINING} \
+    --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
+    --primary-force-training ${PRIMARY_FORCE_TRAINING} \
+    --how-long ${HOW_LONG} \
+    --exp-name-prefix "${EXP_NAME_PREFIX}" \
+    --low-perfs ${L} \
+    --med-perfs ${M} \
+    --high-perfs ${H} \
\ No newline at end of file
diff --git a/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh
new file mode 100644
index 0000000..e93a38b
--- /dev/null
+++ b/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+
+ALGO="best_EGO"
+TEAMMATES_LEN=1
+HOW_LONG=20
+NUM_OF_CKPOINTS=40
+LAYOUT_NAMES="c2"
+EXP_DIR=${LAYOUT_NAMES}
+TOTAL_EGO_AGENTS=1
+QUICK_TEST=false
+
+L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0"
+L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0"
+L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0"
+L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0"
+
+# M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0"
+# M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0"
+# M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0"
+# M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0"
+
+H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best"
+H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best"
+H2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/best"
+H3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/best"
+
+L="${L0},${L1},${L2},${L3}"
+M="${M0},${M1},${M2},${M3}"
+H="${H0},${H1},${H2},${H3}"
+
+WANDB_MODE="online"
+POP_FORCE_TRAINING=false
+ADVERSARY_FORCE_TRAINING=false
+PRIMARY_FORCE_TRAINING=false
+
+source scripts/bash_scripts/env_config.sh
+
+python scripts/train_agents.py \
+    --layout-names ${LAYOUT_NAMES} \
+    --algo-name ${ALGO} \
+    --exp-dir ${EXP_DIR} \
+    --num-of-ckpoints ${NUM_OF_CKPOINTS} \
+    --teammates-len ${TEAMMATES_LEN} \
+    --num-players ${NUM_PLAYERS} \
+    --custom-agent-ck-rate-generation ${CUSTOM_AGENT_CK_RATE_GENERATION} \
+    --num-steps-in-traj-for-dyn-adv ${NUM_STEPS_IN_TRAJ_FOR_DYN_ADV} \
+    --num-static-advs-per-heatmap ${NUM_STATIC_ADVS_PER_HEATMAP} \
+    --num-dynamic-advs-per-heatmap ${NUM_DYNAMIC_ADVS_PER_HEATMAP} \
+    --use-val-func-for-heatmap-gen ${USE_VAL_FUNC_FOR_HEATMAP_GEN} \
+    --prioritized-sampling ${PRIORITIZED_SAMPLING} \
+    --n-envs ${N_ENVS} \
+    --epoch-timesteps ${EPOCH_TIMESTEPS} \
+    --pop-total-training-timesteps ${POP_TOTAL_TRAINING_TIMESTEPS} \
+    --n-x-sp-total-training-timesteps ${N_X_SP_TOTAL_TRAINING_TIMESTEPS} \
+    --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
+    --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
+    --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
+    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --wandb-mode ${WANDB_MODE} \
+    --pop-force-training ${POP_FORCE_TRAINING} \
+    --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
+    --primary-force-training ${PRIMARY_FORCE_TRAINING} \
+    --how-long ${HOW_LONG} \
+    --exp-name-prefix "${EXP_NAME_PREFIX}" \
+    --low-perfs ${L} \
+    --med-perfs ${M} \
+    --high-perfs ${H} \
\ No newline at end of file
diff --git a/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh
new file mode 100644
index 0000000..fccd096
--- /dev/null
+++ b/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+
+ALGO="best_EGO"
+TEAMMATES_LEN=1
+HOW_LONG=20
+NUM_OF_CKPOINTS=40
+LAYOUT_NAMES="c3"
+EXP_DIR=${LAYOUT_NAMES}
+TOTAL_EGO_AGENTS=1
+QUICK_TEST=false
+
+L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0"
+L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0"
+L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0"
+L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0"
+
+# M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0"
+# M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0"
+# M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0"
+# M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0"
+
+H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best"
+H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best"
+H2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/best"
+H3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/best"
+
+L="${L0},${L1},${L2},${L3}"
+M="${M0},${M1},${M2},${M3}"
+H="${H0},${H1},${H2},${H3}"
+
+WANDB_MODE="online"
+POP_FORCE_TRAINING=false
+ADVERSARY_FORCE_TRAINING=false
+PRIMARY_FORCE_TRAINING=false
+
+source scripts/bash_scripts/env_config.sh
+
+python scripts/train_agents.py \
+    --layout-names ${LAYOUT_NAMES} \
+    --algo-name ${ALGO} \
+    --exp-dir ${EXP_DIR} \
+    --num-of-ckpoints ${NUM_OF_CKPOINTS} \
+    --teammates-len ${TEAMMATES_LEN} \
+    --num-players ${NUM_PLAYERS} \
+    --custom-agent-ck-rate-generation ${CUSTOM_AGENT_CK_RATE_GENERATION} \
+    --num-steps-in-traj-for-dyn-adv ${NUM_STEPS_IN_TRAJ_FOR_DYN_ADV} \
+    --num-static-advs-per-heatmap ${NUM_STATIC_ADVS_PER_HEATMAP} \
+    --num-dynamic-advs-per-heatmap ${NUM_DYNAMIC_ADVS_PER_HEATMAP} \
+    --use-val-func-for-heatmap-gen ${USE_VAL_FUNC_FOR_HEATMAP_GEN} \
+    --prioritized-sampling ${PRIORITIZED_SAMPLING} \
+    --n-envs ${N_ENVS} \
+    --epoch-timesteps ${EPOCH_TIMESTEPS} \
+    --pop-total-training-timesteps ${POP_TOTAL_TRAINING_TIMESTEPS} \
+    --n-x-sp-total-training-timesteps ${N_X_SP_TOTAL_TRAINING_TIMESTEPS} \
+    --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
+    --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
+    --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
+    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --wandb-mode ${WANDB_MODE} \
+    --pop-force-training ${POP_FORCE_TRAINING} \
+    --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
+    --primary-force-training ${PRIMARY_FORCE_TRAINING} \
+    --how-long ${HOW_LONG} \
+    --exp-name-prefix "${EXP_NAME_PREFIX}" \
+    --low-perfs ${L} \
+    --med-perfs ${M} \
+    --high-perfs ${H} \
\ No newline at end of file
diff --git a/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh
new file mode 100644
index 0000000..6ab42f2
--- /dev/null
+++ b/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+
+ALGO="best_EGO"
+TEAMMATES_LEN=1
+HOW_LONG=20
+NUM_OF_CKPOINTS=40
+LAYOUT_NAMES="c4"
+EXP_DIR=${LAYOUT_NAMES}
+TOTAL_EGO_AGENTS=1
+QUICK_TEST=false
+
+L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0"
+L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0"
+L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0"
+L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0"
+
+M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_2_rew_192.0"
+M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_2_rew_118.0"
+M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_54.0"
+M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_104.0"
+
+H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best"
+H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best"
+H2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/best"
+H3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/best"
+
+L="${L0},${L1},${L2},${L3}"
+M="${M0},${M1},${M2},${M3}"
+H="${H0},${H1},${H2},${H3}"
+
+WANDB_MODE="online"
+POP_FORCE_TRAINING=false
+ADVERSARY_FORCE_TRAINING=false
+PRIMARY_FORCE_TRAINING=false
+
+source scripts/bash_scripts/env_config.sh
+
+python scripts/train_agents.py \
+    --layout-names ${LAYOUT_NAMES} \
+    --algo-name ${ALGO} \
+    --exp-dir ${EXP_DIR} \
+    --num-of-ckpoints ${NUM_OF_CKPOINTS} \
+    --teammates-len ${TEAMMATES_LEN} \
+    --num-players ${NUM_PLAYERS} \
+    --custom-agent-ck-rate-generation ${CUSTOM_AGENT_CK_RATE_GENERATION} \
+    --num-steps-in-traj-for-dyn-adv ${NUM_STEPS_IN_TRAJ_FOR_DYN_ADV} \
+    --num-static-advs-per-heatmap ${NUM_STATIC_ADVS_PER_HEATMAP} \
+    --num-dynamic-advs-per-heatmap ${NUM_DYNAMIC_ADVS_PER_HEATMAP} \
+    --use-val-func-for-heatmap-gen ${USE_VAL_FUNC_FOR_HEATMAP_GEN} \
+    --prioritized-sampling ${PRIORITIZED_SAMPLING} \
+    --n-envs ${N_ENVS} \
+    --epoch-timesteps ${EPOCH_TIMESTEPS} \
+    --pop-total-training-timesteps ${POP_TOTAL_TRAINING_TIMESTEPS} \
+    --n-x-sp-total-training-timesteps ${N_X_SP_TOTAL_TRAINING_TIMESTEPS} \
+    --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
+    --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
+    --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
+    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --wandb-mode ${WANDB_MODE} \
+    --pop-force-training ${POP_FORCE_TRAINING} \
+    --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
+    --primary-force-training ${PRIMARY_FORCE_TRAINING} \
+    --how-long ${HOW_LONG} \
+    --exp-name-prefix "${EXP_NAME_PREFIX}" \
+    --low-perfs ${L} \
+    --med-perfs ${M} \
+    --high-perfs ${H} \
\ No newline at end of file
diff --git a/scripts/bash_scripts/env_config.sh b/scripts/bash_scripts/env_config.sh
index 5ee4ff1..3e4951b 100644
--- a/scripts/bash_scripts/env_config.sh
+++ b/scripts/bash_scripts/env_config.sh
@@ -14,7 +14,7 @@ fi
 
 if [ "$QUICK_TEST" = false ]; then
   WANDB_MODE="online"
-  N_ENVS=210
+  N_ENVS=50
   EPOCH_TIMESTEPS=100000
   POP_TOTAL_TRAINING_TIMESTEPS=$(echo "$HOW_LONG * 5000000" | bc)
   N_X_SP_TOTAL_TRAINING_TIMESTEPS=$(echo "$HOW_LONG * 5000000" | bc)
diff --git a/scripts/train_agents.py b/scripts/train_agents.py
index 40b0fba..74c68a5 100644
--- a/scripts/train_agents.py
+++ b/scripts/train_agents.py
@@ -11,6 +11,7 @@
     get_FCP_agent_w_pop,
     get_N_X_FCP_agents,
     get_N_X_SP_agents,
+    get_best_EGO_agents,
 )
 
 def SP(args):
@@ -266,6 +267,37 @@ def SPN_XSPCKP(args) -> None:
     )
 
 
+def best_EGO(args) -> None:
+    '''only for 2 players'''
+    primary_train_types = [
+        TeamType.SELF_PLAY_HIGH,
+        TeamType.SELF_PLAY_MEDIUM,
+        TeamType.SELF_PLAY_LOW,
+    ]
+    primary_eval_types = {
+        'generate': [
+                    TeamType.SELF_PLAY_HIGH,
+                    TeamType.SELF_PLAY_LOW,
+                    ],
+        'load': []
+    }
+    if args.prioritized_sampling:
+        curriculum = Curriculum(train_types=primary_train_types, 
+                                eval_types=primary_eval_types, 
+                                is_random=False, 
+                                prioritized_sampling=True,
+                                priority_scaling=2.0)
+    else:
+        curriculum = Curriculum(train_types=primary_train_types, is_random=True)
+
+    get_best_EGO_agents(
+        args,
+        curriculum=curriculum,
+        primary_eval_types=primary_eval_types,
+        primary_train_types=curriculum.train_types,
+    )
+
+
 if __name__ == '__main__':
     args = get_arguments()
     
@@ -277,6 +309,9 @@ def SPN_XSPCKP(args) -> None:
     
     elif args.algo_name == 'FCP_traditional':
         FCP_traditional(args=args)
+
+    elif args.algo_name == 'best_EGO':
+        best_EGO(args=args)
     
     # elif args.algo_name == 'FCP_mhri':
     #     FCP_mhri(args=args)
diff --git a/scripts/utils/__init__.py b/scripts/utils/__init__.py
index e5e0d57..e97d434 100644
--- a/scripts/utils/__init__.py
+++ b/scripts/utils/__init__.py
@@ -1,4 +1,4 @@
-from .train_helper import get_SP_agents, get_FCP_agent_w_pop, get_N_X_FCP_agents, get_N_X_SP_agents
+from .train_helper import get_SP_agents, get_FCP_agent_w_pop, get_N_X_FCP_agents, get_N_X_SP_agents, get_best_EGO_agents
 from .eval_helper import get_eval_types_to_load
 from .eval_utils import *
 
diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py
index 15695aa..0925423 100644
--- a/scripts/utils/train_helper.py
+++ b/scripts/utils/train_helper.py
@@ -6,7 +6,7 @@
 from oai_agents.common.heatmap import generate_adversaries_based_on_heatmap
 from oai_agents.agents.agent_utils import CustomAgent
 from .common import load_agents, generate_name
-from oai_agents.common.tags import Prefix, KeyCheckpoints
+from oai_agents.common.tags import Prefix, KeyCheckpoints, TeammatesCollection
 
 
 def get_SP_agents(args, train_types, eval_types, curriculum, tag_for_returning_agent):
@@ -527,3 +527,67 @@ def get_N_X_FCP_agents(
         tag_for_returning_agent=tag
     )
     return fcp_trainer.get_agents()[0], teammates_collection
+
+
+
+def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculum):
+    '''Code purposed for a very specific experiment, assumes n_players = 2'''
+    from pathlib import Path
+
+    eval_collection = {
+        layout_name: {ttype: [] for ttype in primary_eval_types['generate']} for layout_name in args.layout_names
+    }
+    train_collection = {
+        layout_name: {ttype: [] for ttype in primary_train_types} for layout_name in args.layout_names
+    }
+
+    all_perfs = args.low_perfs + args.med_perfs + args.high_perfs
+    for agent_address in all_perfs:
+
+        path_tag = agent_address.split('/')
+        path = '/'.join(path_tag[:-1])
+        tag = path_tag[-1]
+        agents, _, _ = RLAgentTrainer.load_agents(args=args, tag=tag, path=Path('agent_models/'+path))
+        agent = agents[0]
+
+
+        for layout_name in args.layout_names:
+            if agent_address in args.low_perfs:
+                ttype = TeamType.SELF_PLAY_LOW
+            elif agent_address in args.med_perfs:
+                ttype = TeamType.SELF_PLAY_MEDIUM
+            elif agent_address in args.high_perfs:
+                ttype = TeamType.SELF_PLAY_HIGH
+            
+            if ttype in train_collection[layout_name]:
+                train_collection[layout_name][ttype].append([agent])
+
+            if ttype in eval_collection[layout_name]:
+                eval_collection[layout_name][ttype] = [[agent]]
+    
+    teammates_collection = {
+        TeammatesCollection.TRAIN: train_collection,
+        TeammatesCollection.EVAL: eval_collection
+    }
+    
+    best_ego_trainer = RLAgentTrainer(
+        name=f'best_{args.layout_names[0]}',
+        args=args,
+        agent=None,
+        teammates_collection=teammates_collection,
+        epoch_timesteps=args.epoch_timesteps,
+        n_envs=args.n_envs,
+
+        seed=args.N_X_SP_seed,
+        hidden_dim=args.N_X_SP_h_dim,
+        curriculum=curriculum,
+        
+        learner_type=args.primary_learner_type,
+        checkpoint_rate=args.n_x_sp_total_training_timesteps // args.num_of_ckpoints,
+    )
+
+    best_ego_trainer.train_agents(
+        total_train_timesteps=args.n_x_fcp_total_training_timesteps,
+        tag_for_returning_agent=tag
+    )
+        

From 46f866f1206ac6cdbbe00abb47d8831e397bb6be Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Fri, 21 Mar 2025 16:47:34 -0600
Subject: [PATCH 15/26] update exp_dir

---
 scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh | 2 +-
 scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh | 2 +-
 scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh | 2 +-
 scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh
index 9ac5db5..bff1e48 100644
--- a/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh
@@ -5,7 +5,7 @@ TEAMMATES_LEN=1
 HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c1"
-EXP_DIR=${LAYOUT_NAMES}
+EXP_DIR="${LAYOUT_NAMES}_best_EGO"
 TOTAL_EGO_AGENTS=1
 QUICK_TEST=false
 
diff --git a/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh
index e93a38b..096d928 100644
--- a/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh
@@ -5,7 +5,7 @@ TEAMMATES_LEN=1
 HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c2"
-EXP_DIR=${LAYOUT_NAMES}
+EXP_DIR="${LAYOUT_NAMES}_best_EGO"
 TOTAL_EGO_AGENTS=1
 QUICK_TEST=false
 
diff --git a/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh
index fccd096..00eaf83 100644
--- a/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh
@@ -5,7 +5,7 @@ TEAMMATES_LEN=1
 HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c3"
-EXP_DIR=${LAYOUT_NAMES}
+EXP_DIR="${LAYOUT_NAMES}_best_EGO"
 TOTAL_EGO_AGENTS=1
 QUICK_TEST=false
 
diff --git a/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh
index 6ab42f2..e7706bd 100644
--- a/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh
@@ -5,7 +5,7 @@ TEAMMATES_LEN=1
 HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c4"
-EXP_DIR=${LAYOUT_NAMES}
+EXP_DIR="${LAYOUT_NAMES}_best_EGO"
 TOTAL_EGO_AGENTS=1
 QUICK_TEST=false
 

From df1096c5bec9d89b74a2b0731f024f9f8c2522eb Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Mon, 24 Mar 2025 11:03:08 -0600
Subject: [PATCH 16/26] Correct training time

---
 scripts/utils/train_helper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py
index 0925423..6a26478 100644
--- a/scripts/utils/train_helper.py
+++ b/scripts/utils/train_helper.py
@@ -587,7 +587,7 @@ def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculu
     )
 
     best_ego_trainer.train_agents(
-        total_train_timesteps=args.n_x_fcp_total_training_timesteps,
+        total_train_timesteps=args.n_x_sp_total_training_timesteps,
         tag_for_returning_agent=tag
     )
         

From e9a8104f2210264b17498ee698d43b53a3b38cb5 Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Mon, 24 Mar 2025 18:30:48 -0600
Subject: [PATCH 17/26] Add best cap experiments

---
 oai_agents/common/curriculum.py               |  1 +
 oai_agents/common/overcooked_gui.py           | 24 ++++---
 oai_agents/common/overcooked_simulation.py    | 15 ++++-
 .../best_baseline_experiment/c1_best_CAP.sh   | 67 +++++++++++++++++++
 .../best_baseline_experiment/c2_best_CAP.sh   | 67 +++++++++++++++++++
 .../best_baseline_experiment/c2_best_ego.sh   |  8 +--
 .../best_baseline_experiment/c3_best_CAP.sh   | 67 +++++++++++++++++++
 .../best_baseline_experiment/c3_best_ego.sh   |  8 +--
 .../best_baseline_experiment/c4_best_CAP.sh   | 67 +++++++++++++++++++
 scripts/bash_scripts/test_run.sh              | 53 +++++++++++++++
 scripts/run_overcooked_game.py                | 19 ++++--
 scripts/train_agents.py                       |  8 ++-
 scripts/utils/train_helper.py                 | 30 +++++++--
 13 files changed, 401 insertions(+), 33 deletions(-)
 create mode 100644 scripts/bash_scripts/best_baseline_experiment/c1_best_CAP.sh
 create mode 100644 scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh
 create mode 100644 scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh
 create mode 100644 scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh
 create mode 100644 scripts/bash_scripts/test_run.sh

diff --git a/oai_agents/common/curriculum.py b/oai_agents/common/curriculum.py
index 2dc95c0..d01d038 100644
--- a/oai_agents/common/curriculum.py
+++ b/oai_agents/common/curriculum.py
@@ -103,6 +103,7 @@ def select_teammates_for_layout(self, population_teamtypes, layout):
             population = [population_teamtypes[t] for t in population_teamtypes.keys()]
             teammates_per_type = population[np.random.randint(len(population))]
             teammates = teammates_per_type[np.random.randint(len(teammates_per_type))]
+
         elif self.prioritized_sampling:
             teammates = self.select_teammates_prioritized_sampling(population_teamtypes, layout)
         else:
diff --git a/oai_agents/common/overcooked_gui.py b/oai_agents/common/overcooked_gui.py
index 65a4a3b..62ae4e8 100644
--- a/oai_agents/common/overcooked_gui.py
+++ b/oai_agents/common/overcooked_gui.py
@@ -64,17 +64,23 @@ def __init__(self, args, layout_name=None, agent=None, teammates=None, p_idx=0,
         self.args = args
         self.layout_name = layout_name or 'asymmetric_advantages'
 
-        self.use_subtask_env = False
-        if self.use_subtask_env:
-            kwargs = {'single_subtask_id': 10, 'args': args, 'is_eval_env': True}
-            self.env = OvercookedSubtaskGymEnv(**p_kwargs, **kwargs)
-        else:
-            self.env = OvercookedGymEnv(layout_name=self.layout_name, args=args, ret_completed_subtasks=False,
+        teammates_collection = {
+            'eval': {
+                args.layout: {
+                    'run_type': [teammates]
+                }
+            }
+        }
+
+        self.env = OvercookedGymEnv(layout_name=self.layout_name, args=args, ret_completed_subtasks=False,
                                         is_eval_env=True, horizon=horizon, learner_type='originaler',
+                                        teammates_collection=teammates_collection, curriculum=None,
                                         )
         self.agent = agent
         self.p_idx = p_idx
-        self.env.set_teammates(teammates)
+        
+        self.env.set_teammates('run_type')
+        
         self.env.reset(p_idx=self.p_idx)
         if self.agent != 'human':
             self.agent.set_encoding_params(self.p_idx, self.args.horizon, env=self.env, is_haha=isinstance(self.agent, HierarchicalRL), tune_subtasks=False)
@@ -205,9 +211,6 @@ def step_env(self, agent_action):
         completed_task = calculate_completed_subtask(prev_obj, curr_obj, tile_in_front)
         # print('----', completed_task)
 
-        collision = self.env.mdp.prev_step_was_collision
-        if collision:
-            self.num_collisions += 1
 
         # Log data
         curr_reward = sum(info['sparse_r_by_agent'])
@@ -231,7 +234,6 @@ def step_env(self, agent_action):
             # TEAMMATE and POP(TODO): uncommment it and replace teammate_name by teammate_names
             # "agent": self.teammate_name,
             "p_idx": self.p_idx,
-            "collision": collision,
             "num_collisions": self.num_collisions
         }
         trans_str = json.dumps(transition)
diff --git a/oai_agents/common/overcooked_simulation.py b/oai_agents/common/overcooked_simulation.py
index 20ba9b0..41e64a9 100644
--- a/oai_agents/common/overcooked_simulation.py
+++ b/oai_agents/common/overcooked_simulation.py
@@ -13,16 +13,27 @@ def __init__(self, args, agent, teammates, layout_name, p_idx, horizon=400):
         self.args = args
         self.layout_name = layout_name
         
+        teammates_collection = {
+            'eval': {
+                layout_name: {
+                    'run_type': [teammates]
+                }
+            }
+        }
+
         self.env = OvercookedGymEnv(args=args, 
                                     layout_name=self.layout_name,
                                     ret_completed_subtasks=False,
                                     is_eval_env=True, 
                                     horizon=horizon, 
-                                    learner_type='originaler')
+                                    learner_type='originaler',
+                                    teammates_collection=teammates_collection,
+                                    curriculum=None
+                                    )
         
         self.agent = agent
         self.p_idx = p_idx
-        self.env.set_teammates(teammates)
+        self.env.set_teammates('run_type')
         self.env.reset(p_idx=self.p_idx)
 
         assert self.agent is not 'human'
diff --git a/scripts/bash_scripts/best_baseline_experiment/c1_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c1_best_CAP.sh
new file mode 100644
index 0000000..700668a
--- /dev/null
+++ b/scripts/bash_scripts/best_baseline_experiment/c1_best_CAP.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+
+ALGO="best_EGO_with_CAP"
+TEAMMATES_LEN=1
+HOW_LONG=20
+NUM_OF_CKPOINTS=40
+LAYOUT_NAMES="c1"
+EXP_DIR="${LAYOUT_NAMES}_best_EGO_with_CAP"
+TOTAL_EGO_AGENTS=1
+QUICK_TEST=false
+
+L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0"
+L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0"
+L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0"
+L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0"
+
+M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0"
+M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0"
+M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0"
+M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0"
+
+H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best"
+H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best"
+H2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/best"
+H3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/best"
+
+L="${L0},${L1},${L2},${L3}"
+M="${M0},${M1},${M2},${M3}"
+H="${H0},${H1},${H2},${H3}"
+
+WANDB_MODE="online"
+POP_FORCE_TRAINING=false
+ADVERSARY_FORCE_TRAINING=false
+PRIMARY_FORCE_TRAINING=false
+
+source scripts/bash_scripts/env_config.sh
+
+python scripts/train_agents.py \
+    --layout-names ${LAYOUT_NAMES} \
+    --algo-name ${ALGO} \
+    --exp-dir ${EXP_DIR} \
+    --num-of-ckpoints ${NUM_OF_CKPOINTS} \
+    --teammates-len ${TEAMMATES_LEN} \
+    --num-players ${NUM_PLAYERS} \
+    --custom-agent-ck-rate-generation ${CUSTOM_AGENT_CK_RATE_GENERATION} \
+    --num-steps-in-traj-for-dyn-adv ${NUM_STEPS_IN_TRAJ_FOR_DYN_ADV} \
+    --num-static-advs-per-heatmap ${NUM_STATIC_ADVS_PER_HEATMAP} \
+    --num-dynamic-advs-per-heatmap ${NUM_DYNAMIC_ADVS_PER_HEATMAP} \
+    --use-val-func-for-heatmap-gen ${USE_VAL_FUNC_FOR_HEATMAP_GEN} \
+    --prioritized-sampling ${PRIORITIZED_SAMPLING} \
+    --n-envs ${N_ENVS} \
+    --epoch-timesteps ${EPOCH_TIMESTEPS} \
+    --pop-total-training-timesteps ${POP_TOTAL_TRAINING_TIMESTEPS} \
+    --n-x-sp-total-training-timesteps ${N_X_SP_TOTAL_TRAINING_TIMESTEPS} \
+    --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
+    --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
+    --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
+    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --wandb-mode ${WANDB_MODE} \
+    --pop-force-training ${POP_FORCE_TRAINING} \
+    --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
+    --primary-force-training ${PRIMARY_FORCE_TRAINING} \
+    --how-long ${HOW_LONG} \
+    --exp-name-prefix "${EXP_NAME_PREFIX}" \
+    --low-perfs ${L} \
+    --med-perfs ${M} \
+    --high-perfs ${H} \
\ No newline at end of file
diff --git a/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh
new file mode 100644
index 0000000..1d67a84
--- /dev/null
+++ b/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+
+ALGO="best_EGO_with_CAP"
+TEAMMATES_LEN=1
+HOW_LONG=20
+NUM_OF_CKPOINTS=40
+LAYOUT_NAMES="c2"
+EXP_DIR="${LAYOUT_NAMES}_best_EGO_with_CAP"
+TOTAL_EGO_AGENTS=1
+QUICK_TEST=false
+
+L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0"
+L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0"
+L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0"
+L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0"
+
+M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0"
+M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0"
+M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0"
+M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0"
+
+H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best"
+H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best"
+H2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/best"
+H3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/best"
+
+L="${L0},${L1},${L2},${L3}"
+M="${M0},${M1},${M2},${M3}"
+H="${H0},${H1},${H2},${H3}"
+
+WANDB_MODE="online"
+POP_FORCE_TRAINING=false
+ADVERSARY_FORCE_TRAINING=false
+PRIMARY_FORCE_TRAINING=false
+
+source scripts/bash_scripts/env_config.sh
+
+python scripts/train_agents.py \
+    --layout-names ${LAYOUT_NAMES} \
+    --algo-name ${ALGO} \
+    --exp-dir ${EXP_DIR} \
+    --num-of-ckpoints ${NUM_OF_CKPOINTS} \
+    --teammates-len ${TEAMMATES_LEN} \
+    --num-players ${NUM_PLAYERS} \
+    --custom-agent-ck-rate-generation ${CUSTOM_AGENT_CK_RATE_GENERATION} \
+    --num-steps-in-traj-for-dyn-adv ${NUM_STEPS_IN_TRAJ_FOR_DYN_ADV} \
+    --num-static-advs-per-heatmap ${NUM_STATIC_ADVS_PER_HEATMAP} \
+    --num-dynamic-advs-per-heatmap ${NUM_DYNAMIC_ADVS_PER_HEATMAP} \
+    --use-val-func-for-heatmap-gen ${USE_VAL_FUNC_FOR_HEATMAP_GEN} \
+    --prioritized-sampling ${PRIORITIZED_SAMPLING} \
+    --n-envs ${N_ENVS} \
+    --epoch-timesteps ${EPOCH_TIMESTEPS} \
+    --pop-total-training-timesteps ${POP_TOTAL_TRAINING_TIMESTEPS} \
+    --n-x-sp-total-training-timesteps ${N_X_SP_TOTAL_TRAINING_TIMESTEPS} \
+    --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
+    --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
+    --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
+    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --wandb-mode ${WANDB_MODE} \
+    --pop-force-training ${POP_FORCE_TRAINING} \
+    --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
+    --primary-force-training ${PRIMARY_FORCE_TRAINING} \
+    --how-long ${HOW_LONG} \
+    --exp-name-prefix "${EXP_NAME_PREFIX}" \
+    --low-perfs ${L} \
+    --med-perfs ${M} \
+    --high-perfs ${H} \
\ No newline at end of file
diff --git a/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh
index 096d928..dfa0c31 100644
--- a/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh
@@ -14,10 +14,10 @@ L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0"
 L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0"
 L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0"
 
-# M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0"
-# M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0"
-# M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0"
-# M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0"
+M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0"
+M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_256.0"
+M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_178.0"
+M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_186.0"
 
 H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best"
 H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best"
diff --git a/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh
new file mode 100644
index 0000000..0773e27
--- /dev/null
+++ b/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+
+ALGO="best_EGO_with_CAP"
+TEAMMATES_LEN=1
+HOW_LONG=20
+NUM_OF_CKPOINTS=40
+LAYOUT_NAMES="c3"
+EXP_DIR="${LAYOUT_NAMES}_best_EGO_with_CAP"
+TOTAL_EGO_AGENTS=1
+QUICK_TEST=false
+
+L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0"
+L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0"
+L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0"
+L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0"
+
+M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0"
+M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0"
+M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0"
+M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0"
+
+H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best"
+H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best"
+H2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/best"
+H3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/best"
+
+L="${L0},${L1},${L2},${L3}"
+M="${M0},${M1},${M2},${M3}"
+H="${H0},${H1},${H2},${H3}"
+
+WANDB_MODE="online"
+POP_FORCE_TRAINING=false
+ADVERSARY_FORCE_TRAINING=false
+PRIMARY_FORCE_TRAINING=false
+
+source scripts/bash_scripts/env_config.sh
+
+python scripts/train_agents.py \
+    --layout-names ${LAYOUT_NAMES} \
+    --algo-name ${ALGO} \
+    --exp-dir ${EXP_DIR} \
+    --num-of-ckpoints ${NUM_OF_CKPOINTS} \
+    --teammates-len ${TEAMMATES_LEN} \
+    --num-players ${NUM_PLAYERS} \
+    --custom-agent-ck-rate-generation ${CUSTOM_AGENT_CK_RATE_GENERATION} \
+    --num-steps-in-traj-for-dyn-adv ${NUM_STEPS_IN_TRAJ_FOR_DYN_ADV} \
+    --num-static-advs-per-heatmap ${NUM_STATIC_ADVS_PER_HEATMAP} \
+    --num-dynamic-advs-per-heatmap ${NUM_DYNAMIC_ADVS_PER_HEATMAP} \
+    --use-val-func-for-heatmap-gen ${USE_VAL_FUNC_FOR_HEATMAP_GEN} \
+    --prioritized-sampling ${PRIORITIZED_SAMPLING} \
+    --n-envs ${N_ENVS} \
+    --epoch-timesteps ${EPOCH_TIMESTEPS} \
+    --pop-total-training-timesteps ${POP_TOTAL_TRAINING_TIMESTEPS} \
+    --n-x-sp-total-training-timesteps ${N_X_SP_TOTAL_TRAINING_TIMESTEPS} \
+    --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
+    --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
+    --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
+    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --wandb-mode ${WANDB_MODE} \
+    --pop-force-training ${POP_FORCE_TRAINING} \
+    --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
+    --primary-force-training ${PRIMARY_FORCE_TRAINING} \
+    --how-long ${HOW_LONG} \
+    --exp-name-prefix "${EXP_NAME_PREFIX}" \
+    --low-perfs ${L} \
+    --med-perfs ${M} \
+    --high-perfs ${H} \
\ No newline at end of file
diff --git a/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh
index 00eaf83..8ce7625 100644
--- a/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh
@@ -14,10 +14,10 @@ L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0"
 L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0"
 L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0"
 
-# M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0"
-# M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0"
-# M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0"
-# M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0"
+M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_16.0"
+M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_14.0"
+M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_108.0"
+M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_90.0"
 
 H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best"
 H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best"
diff --git a/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh
new file mode 100644
index 0000000..7614f97
--- /dev/null
+++ b/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+
+ALGO="best_EGO_with_CAP"
+TEAMMATES_LEN=1
+HOW_LONG=20
+NUM_OF_CKPOINTS=40
+LAYOUT_NAMES="c4"
+EXP_DIR="${LAYOUT_NAMES}_best_EGO_with_CAP"
+TOTAL_EGO_AGENTS=1
+QUICK_TEST=false
+
+L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0"
+L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0"
+L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0"
+L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0"
+
+M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0"
+M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0"
+M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0"
+M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0"
+
+H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best"
+H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best"
+H2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/best"
+H3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/best"
+
+L="${L0},${L1},${L2},${L3}"
+M="${M0},${M1},${M2},${M3}"
+H="${H0},${H1},${H2},${H3}"
+
+WANDB_MODE="online"
+POP_FORCE_TRAINING=false
+ADVERSARY_FORCE_TRAINING=false
+PRIMARY_FORCE_TRAINING=false
+
+source scripts/bash_scripts/env_config.sh
+
+python scripts/train_agents.py \
+    --layout-names ${LAYOUT_NAMES} \
+    --algo-name ${ALGO} \
+    --exp-dir ${EXP_DIR} \
+    --num-of-ckpoints ${NUM_OF_CKPOINTS} \
+    --teammates-len ${TEAMMATES_LEN} \
+    --num-players ${NUM_PLAYERS} \
+    --custom-agent-ck-rate-generation ${CUSTOM_AGENT_CK_RATE_GENERATION} \
+    --num-steps-in-traj-for-dyn-adv ${NUM_STEPS_IN_TRAJ_FOR_DYN_ADV} \
+    --num-static-advs-per-heatmap ${NUM_STATIC_ADVS_PER_HEATMAP} \
+    --num-dynamic-advs-per-heatmap ${NUM_DYNAMIC_ADVS_PER_HEATMAP} \
+    --use-val-func-for-heatmap-gen ${USE_VAL_FUNC_FOR_HEATMAP_GEN} \
+    --prioritized-sampling ${PRIORITIZED_SAMPLING} \
+    --n-envs ${N_ENVS} \
+    --epoch-timesteps ${EPOCH_TIMESTEPS} \
+    --pop-total-training-timesteps ${POP_TOTAL_TRAINING_TIMESTEPS} \
+    --n-x-sp-total-training-timesteps ${N_X_SP_TOTAL_TRAINING_TIMESTEPS} \
+    --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
+    --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
+    --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
+    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --wandb-mode ${WANDB_MODE} \
+    --pop-force-training ${POP_FORCE_TRAINING} \
+    --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
+    --primary-force-training ${PRIMARY_FORCE_TRAINING} \
+    --how-long ${HOW_LONG} \
+    --exp-name-prefix "${EXP_NAME_PREFIX}" \
+    --low-perfs ${L} \
+    --med-perfs ${M} \
+    --high-perfs ${H} \
\ No newline at end of file
diff --git a/scripts/bash_scripts/test_run.sh b/scripts/bash_scripts/test_run.sh
new file mode 100644
index 0000000..29b9a30
--- /dev/null
+++ b/scripts/bash_scripts/test_run.sh
@@ -0,0 +1,53 @@
+#!/bin/sh
+
+ALGO="SPN_XSPCKP"
+TEAMMATES_LEN=1
+NUM_PLAYERS=$((TEAMMATES_LEN + 1))
+NUM_OF_CKPOINTS=10
+LAYOUT_NAMES="counter_circuit"
+EXP_DIR="$NUM_PLAYERS" # When quick_test=True this will be overwritten to "Test/$EXP_DIR"
+TOTAL_EGO_AGENTS=4
+QUICK_TEST=true
+HOW_LONG=1
+
+POP_FORCE_TRAINING=false
+ADVERSARY_FORCE_TRAINING=false
+PRIMARY_FORCE_TRAINING=false
+# EXP_NAME_PREFIX="test_"
+
+source scripts/bash_scripts/env_config.sh
+# Overwrite the default values from env_config here if needed
+N_ENVS=5
+WANDB_MODE="disabled"
+EPOCH_TIMESTEPS=3500
+N_X_SP_TOTAL_TRAINING_TIMESTEPS=10000
+FCP_TOTAL_TRAINING_TIMESTEPS=75000
+
+
+python scripts/train_agents.py \
+    --layout-names ${LAYOUT_NAMES} \
+    --algo-name ${ALGO} \
+    --exp-dir ${EXP_DIR} \
+    --num-of-ckpoints ${NUM_OF_CKPOINTS} \
+    --teammates-len ${TEAMMATES_LEN} \
+    --num-players ${NUM_PLAYERS} \
+    --custom-agent-ck-rate-generation ${CUSTOM_AGENT_CK_RATE_GENERATION} \
+    --num-steps-in-traj-for-dyn-adv ${NUM_STEPS_IN_TRAJ_FOR_DYN_ADV} \
+    --num-static-advs-per-heatmap ${NUM_STATIC_ADVS_PER_HEATMAP} \
+    --num-dynamic-advs-per-heatmap ${NUM_DYNAMIC_ADVS_PER_HEATMAP} \
+    --use-val-func-for-heatmap-gen ${USE_VAL_FUNC_FOR_HEATMAP_GEN} \
+    --prioritized-sampling ${PRIORITIZED_SAMPLING} \
+    --n-envs ${N_ENVS} \
+    --epoch-timesteps ${EPOCH_TIMESTEPS} \
+    --pop-total-training-timesteps ${POP_TOTAL_TRAINING_TIMESTEPS} \
+    --n-x-sp-total-training-timesteps ${N_X_SP_TOTAL_TRAINING_TIMESTEPS} \
+    --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
+    --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
+    --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
+    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --wandb-mode ${WANDB_MODE} \
+    --pop-force-training ${POP_FORCE_TRAINING} \
+    --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
+    --primary-force-training ${PRIMARY_FORCE_TRAINING} \
+    --how-long ${HOW_LONG} \
+    --exp-name-prefix "${EXP_NAME_PREFIX}" \
\ No newline at end of file
diff --git a/scripts/run_overcooked_game.py b/scripts/run_overcooked_game.py
index 4ed0dda..bb03624 100644
--- a/scripts/run_overcooked_game.py
+++ b/scripts/run_overcooked_game.py
@@ -17,29 +17,36 @@ def get_teammate_from_pop_file(tm_name, tm_score, pop_path, layout_name):
     args = get_arguments()
     args.num_players = 2
 
-    args.layout = f'counter_circuit'
+    args.layout = f'c1'
     args.p_idx = 0
     args.layout_names = [args.layout]
     args.n_envs = 1
 
-    # teammates_path = [
+    teammates_path = [
+        # 'agent_models/c1_v4/SP_s1010_h256_tr[SP]_ran/best'
+        # 'agent_models/c1_best_EGO/best_c1/best'
+
+        'agent_models/c4_v4/SP_s1010_h256_tr[SP]_ran/best'
+        # 'agent_models/c4_best_EGO/best_c4/best'
+
     #     'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', # green 
     #     'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', # orange
     #     'agent_models/ALMH_CUR/2/SP_hd64_seed14/best',
     #     'agent_models/ALMH_CUR/2/SP_hd64_seed14/best',
     #     'agent_models/ALMH_CUR/2/SP_hd64_seed14/best',
-    # ]
-    # teammates = [load_agent(Path(tm_path), args) for tm_path in teammates_path[:args.num_players - 1]]
+    ]
+    teammates = [load_agent(Path(tm_path), args) for tm_path in teammates_path[:args.num_players - 1]]
 
     # trajectories = tile locations. Top left of the layout is (0, 0), bottom right is (M, N)
-    teammates = [CustomAgent(args=args, name='human', trajectories={args.layout: [(1, 1), (1, 2)]})]
+    teammates = [CustomAgent(args=args, name='human', trajectories={args.layout: [(2, 1), (3, 1)]})]
     # teammates = [DummyAgent(action='random') for _ in range(args.num_players - 1)]
 
     # player_path = 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best'
+    # player_path = 'agent_models/c4_best_EGO/best_c4/best'
     # player = load_agent(Path(player_path), args)
     # player = teammates[0]
     player = 'human' # blue
 
-    dc = OvercookedGUI(args, agent=player, teammates=teammates, layout_name=args.layout, p_idx=args.p_idx, fps=10,
+    dc = OvercookedGUI(args, agent=player, teammates=teammates, layout_name=args.layout, p_idx=args.p_idx, fps=50,
                         horizon=400, gif_name=args.layout)
     dc.on_execute()
diff --git a/scripts/train_agents.py b/scripts/train_agents.py
index 74c68a5..3b4b52e 100644
--- a/scripts/train_agents.py
+++ b/scripts/train_agents.py
@@ -267,7 +267,7 @@ def SPN_XSPCKP(args) -> None:
     )
 
 
-def best_EGO(args) -> None:
+def best_EGO(args, add_adv=False) -> None:
     '''only for 2 players'''
     primary_train_types = [
         TeamType.SELF_PLAY_HIGH,
@@ -295,6 +295,7 @@ def best_EGO(args) -> None:
         curriculum=curriculum,
         primary_eval_types=primary_eval_types,
         primary_train_types=curriculum.train_types,
+        add_adv=add_adv
     )
 
 
@@ -311,7 +312,10 @@ def best_EGO(args) -> None:
         FCP_traditional(args=args)
 
     elif args.algo_name == 'best_EGO':
-        best_EGO(args=args)
+        best_EGO(args=args, add_adv=False)
+
+    elif args.algo_name == 'best_EGO_with_CAP':
+        best_EGO(args=args, add_adv=True)
     
     # elif args.algo_name == 'FCP_mhri':
     #     FCP_mhri(args=args)
diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py
index 6a26478..78c1220 100644
--- a/scripts/utils/train_helper.py
+++ b/scripts/utils/train_helper.py
@@ -530,7 +530,7 @@ def get_N_X_FCP_agents(
 
 
 
-def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculum):
+def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculum, add_adv=False):
     '''Code purposed for a very specific experiment, assumes n_players = 2'''
     from pathlib import Path
 
@@ -565,13 +565,32 @@ def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculu
             if ttype in eval_collection[layout_name]:
                 eval_collection[layout_name][ttype] = [[agent]]
     
+    name = f'best_{args.layout_names[0]}'
+
+    if add_adv:
+        random_pos = {
+            'c1': [(1, 1), (2, 1), (3, 1), (4, 1), (1, 2), (2, 2), (3, 2), (4, 2)],
+            'c2': [(2, 1), (4, 1), (6, 1), (1, 2), (7, 2), (2, 3), (4, 3), (6, 3)],
+            'c3': [(2, 1), (4, 1), (6, 1), (1, 2), (7, 2), (2, 3), (4, 3), (6, 3)],
+            'c4': [(3, 1), (5, 1), (7, 1), (1, 2), (9, 2), (3, 3), (5, 3), (7, 3)],
+        }
+
+        custom_agents = []
+        for adv_idx in range(len(random_pos[args.layout_names[0]])):
+            start_position = {layout: [random_pos[layout][adv_idx]] for layout in args.layout_names}
+            custom_agents.append([CustomAgent(args=args, name=f'SA{adv_idx}', trajectories=start_position)])
+
+        train_collection[args.layout_names[0]][TeamType.SELF_PLAY_STATIC_ADV] = custom_agents
+        eval_collection[args.layout_names[0]][TeamType.SELF_PLAY_STATIC_ADV] = custom_agents
+        name = f'best_{args.layout_names[0]}_adv'
+
     teammates_collection = {
         TeammatesCollection.TRAIN: train_collection,
         TeammatesCollection.EVAL: eval_collection
     }
     
     best_ego_trainer = RLAgentTrainer(
-        name=f'best_{args.layout_names[0]}',
+        name=name,
         args=args,
         agent=None,
         teammates_collection=teammates_collection,
@@ -587,7 +606,10 @@ def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculu
     )
 
     best_ego_trainer.train_agents(
-        total_train_timesteps=args.n_x_sp_total_training_timesteps,
+        total_train_timesteps=args.n_x_fcp_total_training_timesteps,
         tag_for_returning_agent=tag
     )
-        
+
+
+
+

From 968967ecbb665db13bb599b8b8a56aca7b64a29c Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Mon, 24 Mar 2025 18:39:26 -0600
Subject: [PATCH 18/26] Fix bash scripts

---
 .../bash_scripts/best_baseline_experiment/c2_best_CAP.sh  | 8 ++++----
 .../bash_scripts/best_baseline_experiment/c3_best_CAP.sh  | 8 ++++----
 .../bash_scripts/best_baseline_experiment/c4_best_CAP.sh  | 8 ++++----
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh
index 1d67a84..ce43e2f 100644
--- a/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh
@@ -14,10 +14,10 @@ L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0"
 L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0"
 L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0"
 
-M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0"
-M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0"
-M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0"
-M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0"
+M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0"
+M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_256.0"
+M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_178.0"
+M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_186.0"
 
 H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best"
 H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best"
diff --git a/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh
index 0773e27..84b6152 100644
--- a/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh
@@ -14,10 +14,10 @@ L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0"
 L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0"
 L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0"
 
-M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0"
-M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0"
-M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0"
-M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0"
+M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_16.0"
+M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_14.0"
+M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_108.0"
+M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_90.0"
 
 H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best"
 H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best"
diff --git a/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh
index 7614f97..2d35934 100644
--- a/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh
@@ -14,10 +14,10 @@ L1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_0"
 L2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_0"
 L3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_0"
 
-M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_1_rew_252.0"
-M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_1_rew_284.0"
-M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_234.0"
-M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_246.0"
+M0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_2_rew_192.0"
+M1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/ck_2_rew_118.0"
+M2="${LAYOUT_NAMES}_v3/SP_s1010_h256_tr[SP]_ran/ck_1_rew_54.0"
+M3="${LAYOUT_NAMES}_v4/SP_s1010_h256_tr[SP]_ran/ck_1_rew_104.0"
 
 H0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/best"
 H1="${LAYOUT_NAMES}_v2/SP_s1010_h256_tr[SP]_ran/best"

From 9b17b76d072a51c85e91c9601caf7063e07d7dd0 Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Mon, 24 Mar 2025 19:36:13 -0600
Subject: [PATCH 19/26] Code to calculate resource usage

---
 oai_agents/common/overcooked_gui.py | 57 +++++++++++++++++++++++++++++
 scripts/run_overcooked_game.py      | 10 ++---
 2 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/oai_agents/common/overcooked_gui.py b/oai_agents/common/overcooked_gui.py
index 62ae4e8..b211dd0 100644
--- a/oai_agents/common/overcooked_gui.py
+++ b/oai_agents/common/overcooked_gui.py
@@ -120,6 +120,22 @@ def __init__(self, args, layout_name=None, agent=None, teammates=None, p_idx=0,
         self.gif_name = gif_name
         if not os.path.exists(f'data/screenshots/{self.gif_name}'):
             os.makedirs(f'data/screenshots/{self.gif_name}')
+        
+
+        self.resource_locations = {}
+        for y, row in enumerate(self.env.env.mdp.terrain_mtx):
+            for x, cell in enumerate(row):
+                if cell in ['S', 'D', 'P', 'O']:
+                    self.resource_locations[(x, y)] = cell
+
+        self.resource_usage = {
+            agent_idx: {pos: 0 for pos in self.resource_locations}
+            for agent_idx in range(len(self.env.state.players))
+        }
+
+        print(f"Resource locations: {self.resource_locations}")
+
+
 
     def start_screen(self):
         pygame.init()
@@ -211,6 +227,17 @@ def step_env(self, agent_action):
         completed_task = calculate_completed_subtask(prev_obj, curr_obj, tile_in_front)
         # print('----', completed_task)
 
+        joint_action = self.env.get_joint_action()
+        for idx, player in enumerate(self.env.state.players):
+            # pos_in_front = facing(self.env.env.mdp.terrain_mtx, player)
+
+            x, y = player.position[0] + player.orientation[0], player.position[1] + player.orientation[1]
+            pos_in_front = (x, y)
+
+            action = joint_action[idx]
+            if action == Action.INTERACT:
+                if pos_in_front in self.resource_locations:
+                    self.resource_usage[idx][pos_in_front] += 1
 
         # Log data
         curr_reward = sum(info['sparse_r_by_agent'])
@@ -301,6 +328,36 @@ def on_execute(self):
         self.on_cleanup()
         print(f'Trial finished in {self.curr_tick} steps with total reward {self.score}')
 
+        # print("Resource usage breakdown by agent and resource position:")
+        # for agent_idx, usage in self.resource_usage.items():
+        #     print(f"Agent {agent_idx}:")
+        #     for pos, count in usage.items():
+        #         if count > 0:
+        #             res_type = self.resource_locations[pos]
+        #             print(f"  {res_type} at {pos}: {count} times")
+
+        from collections import defaultdict
+
+        # Step 1: Gather all resource locations and types
+        all_resource_entries = []
+        for pos, res_type in self.resource_locations.items():
+            all_resource_entries.append((res_type, pos))
+
+        # Step 2: Sort by resource type then position
+        all_resource_entries.sort(key=lambda x: (x[0], x[1]))  # Sort by type, then position
+
+        # Step 3: Print header and values
+        print("Resource usage comparison (Agent 0 vs Agent 1):\n")
+        print(f"{'Type':<4} {'Position':<10} | {'Agent 0':<8} {'Agent 1':<8}")
+        print("-" * 36)
+
+        for res_type, pos in all_resource_entries:
+            a0_count = self.resource_usage[0].get(pos, 0)
+            a1_count = self.resource_usage[1].get(pos, 0)
+            if a0_count > 0 or a1_count > 0:  # Only show if someone used it
+                print(f"{res_type:<4} {str(pos):<10} | {a0_count:<8} {a1_count:<8}")
+
+
     def save_trajectory(self, data_path):
         df = pd.DataFrame(self.trajectory)
         df.to_pickle(data_path / f'{self.layout_name}.{self.trial_id}.pickle')
\ No newline at end of file
diff --git a/scripts/run_overcooked_game.py b/scripts/run_overcooked_game.py
index bb03624..e0b9cbf 100644
--- a/scripts/run_overcooked_game.py
+++ b/scripts/run_overcooked_game.py
@@ -24,9 +24,9 @@ def get_teammate_from_pop_file(tm_name, tm_score, pop_path, layout_name):
 
     teammates_path = [
         # 'agent_models/c1_v4/SP_s1010_h256_tr[SP]_ran/best'
-        # 'agent_models/c1_best_EGO/best_c1/best'
+        'agent_models/c1_best_EGO/best_c1/best'
 
-        'agent_models/c4_v4/SP_s1010_h256_tr[SP]_ran/best'
+        # 'agent_models/c4_v4/SP_s1010_h256_tr[SP]_ran/best'
         # 'agent_models/c4_best_EGO/best_c4/best'
 
     #     'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', # green 
@@ -38,14 +38,14 @@ def get_teammate_from_pop_file(tm_name, tm_score, pop_path, layout_name):
     teammates = [load_agent(Path(tm_path), args) for tm_path in teammates_path[:args.num_players - 1]]
 
     # trajectories = tile locations. Top left of the layout is (0, 0), bottom right is (M, N)
-    teammates = [CustomAgent(args=args, name='human', trajectories={args.layout: [(2, 1), (3, 1)]})]
+    # teammates = [CustomAgent(args=args, name='human', trajectories={args.layout: [(2, 1), (3, 1)]})]
     # teammates = [DummyAgent(action='random') for _ in range(args.num_players - 1)]
 
     # player_path = 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best'
     # player_path = 'agent_models/c4_best_EGO/best_c4/best'
     # player = load_agent(Path(player_path), args)
-    # player = teammates[0]
-    player = 'human' # blue
+    player = teammates[0]
+    # player = 'human' # blue
 
     dc = OvercookedGUI(args, agent=player, teammates=teammates, layout_name=args.layout, p_idx=args.p_idx, fps=50,
                         horizon=400, gif_name=args.layout)

From 260ee9a103c0906ae401c4f20b252717373c1aeb Mon Sep 17 00:00:00 2001
From: Ava Abderezaei <ava.abderezaei@gmail.com>
Date: Wed, 26 Mar 2025 18:35:12 -0600
Subject: [PATCH 20/26] fix sp tc

---
 oai_agents/agents/hrl.py                      |  2 +-
 oai_agents/agents/mep_population_manager.py   |  6 +--
 oai_agents/agents/rl.py                       | 14 +++--
 oai_agents/common/arguments.py                |  8 +--
 oai_agents/common/multi_setup_trainer.py      | 51 ++++++++++++++-----
 oai_agents/common/population.py               | 36 ++++++-------
 oai_agents/common/teammates_collection.py     |  2 +-
 sandbox/generate_agents_for_eval.py           |  6 +--
 .../best_baseline_experiment/MEP_POP_c1.sh    |  4 +-
 .../best_baseline_experiment/MEP_POP_c2.sh    |  4 +-
 .../best_baseline_experiment/MEP_POP_c3.sh    |  4 +-
 .../best_baseline_experiment/MEP_POP_c4.sh    |  4 +-
 .../best_baseline_experiment/SP_c1_v1.sh      |  4 +-
 .../best_baseline_experiment/SP_c1_v2.sh      |  4 +-
 .../best_baseline_experiment/SP_c1_v3.sh      |  4 +-
 .../best_baseline_experiment/SP_c1_v4.sh      |  4 +-
 .../best_baseline_experiment/SP_c2_v1.sh      |  4 +-
 .../best_baseline_experiment/SP_c2_v2.sh      |  4 +-
 .../best_baseline_experiment/SP_c2_v3.sh      |  4 +-
 .../best_baseline_experiment/SP_c2_v4.sh      |  4 +-
 .../best_baseline_experiment/SP_c3_v1.sh      |  4 +-
 .../best_baseline_experiment/SP_c3_v2.sh      |  4 +-
 .../best_baseline_experiment/SP_c3_v3.sh      |  4 +-
 .../best_baseline_experiment/SP_c3_v4.sh      |  4 +-
 .../best_baseline_experiment/SP_c4_v1.sh      |  4 +-
 .../best_baseline_experiment/SP_c4_v2.sh      |  4 +-
 .../best_baseline_experiment/SP_c4_v3.sh      |  4 +-
 .../best_baseline_experiment/SP_c4_v4.sh      |  4 +-
 .../best_baseline_experiment/c1_best_CAP.sh   |  4 +-
 .../best_baseline_experiment/c1_best_ego.sh   |  4 +-
 .../best_baseline_experiment/c2_best_CAP.sh   |  4 +-
 .../best_baseline_experiment/c2_best_ego.sh   |  4 +-
 .../best_baseline_experiment/c3_best_CAP.sh   |  4 +-
 .../best_baseline_experiment/c3_best_ego.sh   |  4 +-
 .../best_baseline_experiment/c4_best_CAP.sh   |  4 +-
 .../best_baseline_experiment/c4_best_ego.sh   |  4 +-
 scripts/bash_scripts/classic_CAP_2_player.sh  | 11 ++--
 scripts/bash_scripts/classic_FCP_2_player.sh  | 11 ++--
 scripts/bash_scripts/profile.sh               |  5 +-
 scripts/bash_scripts/test_run.sh              | 12 +++--
 scripts/train_agents.py                       | 27 +++++-----
 scripts/train_agents_without_bashing.py       |  4 +-
 scripts/utils/train_helper.py                 |  8 +--
 43 files changed, 182 insertions(+), 133 deletions(-)

diff --git a/oai_agents/agents/hrl.py b/oai_agents/agents/hrl.py
index f0ee0ef..04c76cb 100644
--- a/oai_agents/agents/hrl.py
+++ b/oai_agents/agents/hrl.py
@@ -1,5 +1,5 @@
 from oai_agents.agents.base_agent import OAIAgent, PolicyClone
-from oai_agents.agents.rl import RLAgentTrainer, VEC_ENV_CLS
+from oai_agents.agents.rl import RLAgentTrainer
 from oai_agents.common.arguments import get_args_to_save, set_args_from_load
 from oai_agents.common.subtasks import Subtasks
 # from oai_agents.gym_environments.worker_env import OvercookedSubtaskGymEnv
diff --git a/oai_agents/agents/mep_population_manager.py b/oai_agents/agents/mep_population_manager.py
index bffb060..e3040b7 100644
--- a/oai_agents/agents/mep_population_manager.py
+++ b/oai_agents/agents/mep_population_manager.py
@@ -19,7 +19,7 @@ def __init__(self, population_size, args):
         self.epoch_timesteps = args.epoch_timesteps  # Number of timesteps per training episode
         seeds, h_dims = generate_hdim_and_seed(
             for_evaluation=args.gen_pop_for_eval,
-            total_ego_agents=population_size
+            total_sp_agents=population_size
         )
 
         self.population: List[RLAgentTrainer] = []
@@ -204,9 +204,9 @@ def train_population(self, total_timesteps: int, num_of_ckpoints: int, eval_inte
 
     set_input(args=args)
 
-    args.total_ego_agents = 4
+    args.total_sp_agents = 4
 
-    manager = MEPPopulationManager(population_size=args.total_ego_agents, args=args)
+    manager = MEPPopulationManager(population_size=args.total_sp_agents, args=args)
     manager.train_population(
         total_timesteps=args.pop_total_training_timesteps,
         num_of_ckpoints=args.num_of_ckpoints,
diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py
index 9b6559d..f9fde02 100644
--- a/oai_agents/agents/rl.py
+++ b/oai_agents/agents/rl.py
@@ -9,16 +9,12 @@
 import numpy as np
 from stable_baselines3 import PPO, DQN
 from stable_baselines3.common.env_util import make_vec_env
-from stable_baselines3.common.vec_env import DummyVecEnv
+from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
 from sb3_contrib import RecurrentPPO
 import wandb
 import os
 from typing import Literal
 
-# VEC_ENV_CLS = DummyVecEnv #
-VEC_ENV_CLS = SubprocVecEnv
-
-
 class RLAgentTrainer(OAITrainer):
     ''' Train an RL agent to play with a teammates_collection of agents.'''
     def __init__(
@@ -33,6 +29,8 @@ def __init__(
         ):
         train_types = train_types if train_types is not None else []
         eval_types = eval_types if eval_types is not None else []
+        
+        # assert teammates_collection, "Teammates collection must be provided"
 
         name = name or 'rl_agent'
         super(RLAgentTrainer, self).__init__(name, args, seed=seed)
@@ -140,6 +138,12 @@ def print_tc_helper(self, teammates_collection, message=None):
 
     def get_envs(self, _env, _eval_envs, deterministic, learner_type, teammates_collection, curriculum, start_timestep: int = 0):
         from oai_agents.gym_environments.base_overcooked_env import OvercookedGymEnv
+
+        if self.args.use_multipleprocesses:
+            VEC_ENV_CLS = SubprocVecEnv
+        else:
+            VEC_ENV_CLS = DummyVecEnv
+
         if _env is None:
             env_kwargs = {'shape_rewards': True, 'full_init': False, 'stack_frames': self.use_frame_stack,
                         'deterministic': deterministic,'args': self.args, 'learner_type': learner_type, 'start_timestep': start_timestep, 
diff --git a/oai_agents/common/arguments.py b/oai_agents/common/arguments.py
index ff45dad..c084fd8 100644
--- a/oai_agents/common/arguments.py
+++ b/oai_agents/common/arguments.py
@@ -98,7 +98,10 @@ def get_arguments(additional_args: Optional[List] = None):
     parser.add_argument("--custom-agent-ck-rate-generation", type=int)
 
     parser.add_argument('--gen-pop-for-eval', type=str2bool, default=False, help="Specifies whether to generate a population of agents for evaluation purposes. Currently, this functionality is limited to self-play agents, as support for other methods has not yet been implemented..)")
-    parser.add_argument("--total-ego-agents", type=int, default=4)
+    parser.add_argument('--use-cuda', type=str2bool, help="Specifies whether to use cuda for training.")
+    parser.add_argument('--use-multipleprocesses', type=str2bool, help="SubprocVecEnv vs DummyVecEnv")
+
+    parser.add_argument("--total-sp-agents", type=int, default=4)
     parser.add_argument("--ck-list-offset", type=int, default=0)
 
     parser.add_argument('--low-perfs', help='shitty code to run ult baseline exp', default='default')
@@ -112,8 +115,7 @@ def get_arguments(additional_args: Optional[List] = None):
     args = parser.parse_args()
     args.base_dir = Path(args.base_dir)
     
-    # args.device = th.device('cuda' if th.cuda.is_available() else 'cpu')
-    args.device = th.device('cpu')
+    args.device = th.device('cuda' if args.use_cuda and th.cuda.is_available() else 'cpu')
     
     args.layout_names = args.layout_names.split(',')
     args.low_perfs = args.low_perfs.split(',')
diff --git a/oai_agents/common/multi_setup_trainer.py b/oai_agents/common/multi_setup_trainer.py
index d443f07..1ea8cbb 100644
--- a/oai_agents/common/multi_setup_trainer.py
+++ b/oai_agents/common/multi_setup_trainer.py
@@ -1,8 +1,10 @@
 import concurrent.futures
+import dill
+
 from scripts.utils.common import generate_name
 from oai_agents.common.tags import Prefix
 from oai_agents.agents.rl import RLAgentTrainer
-import dill
+from oai_agents.common.teammates_collection import generate_TC
 
 
 class MultiSetupTrainer:
@@ -21,7 +23,7 @@ def __init__(
         self.tag_for_returning_agent = tag_for_returning_agent
 
         self.parallel = args.parallel
-        self.total_ego_agents = args.total_ego_agents
+        self.total_sp_agents = args.total_sp_agents
         self.for_evaluation = args.gen_pop_for_eval
 
     def get_trained_agent(self, seed, h_dim):
@@ -31,10 +33,10 @@ def get_multiple_trained_agents(self):
         agents = []
 
         seeds, hdims = generate_hdim_and_seed(
-            for_evaluation=self.for_evaluation, total_ego_agents=self.total_ego_agents)
+            for_evaluation=self.for_evaluation, total_sp_agents=self.total_sp_agents)
         inputs = [
             (seeds[i], hdims[i])
-            for i in range(self.total_ego_agents)
+            for i in range(self.total_sp_agents)
         ]
 
         if self.args.parallel:
@@ -117,9 +119,30 @@ def get_trained_agent(self, seed, h_dim):
             curriculum=self.curriculum
         )
 
+        # print('before generate_randomly_initialized_agent')
+        init_agent = RLAgentTrainer.generate_randomly_initialized_agent( # need a cleaner way to do this
+            args=self.args,
+            name=name,
+            learner_type=self.args.primary_learner_type,
+            hidden_dim=h_dim,
+            seed=seed,
+            n_envs=self.args.n_envs
+        ) 
+
+        population = {layout_name: [] for layout_name in self.args.layout_names}
+        teammates_collection = generate_TC(args=self.args,
+                                            population=population,
+                                            agent=init_agent,
+                                            train_types=self.train_types,
+                                            eval_types_to_generate=self.eval_types['generate'],
+                                            eval_types_to_read_from_file=self.eval_types['load'],
+                                            unseen_teammates_len=0,
+                                            use_entire_population_for_train_types_teammates=True)
+
+
         return self.get_reinforcement_agent(
             name=name,
-            teammates_collection={},
+            teammates_collection=teammates_collection,
             curriculum=self.curriculum,
             h_dim=h_dim,
             seed=seed,
@@ -128,7 +151,7 @@ def get_trained_agent(self, seed, h_dim):
             total_train_timesteps=self.args.pop_total_training_timesteps,
         )
 
-def generate_hdim_and_seed(for_evaluation: bool, total_ego_agents: int):
+def generate_hdim_and_seed(for_evaluation: bool, total_sp_agents: int):
     evaluation_seeds = [3031, 4041, 5051, 3708, 3809, 3910, 4607, 5506]
     evaluation_hdims = [256] * len(evaluation_seeds)
 
@@ -139,23 +162,23 @@ def generate_hdim_and_seed(for_evaluation: bool, total_ego_agents: int):
     training_hdims = [256] * len(training_seeds)
 
     if for_evaluation:
-        assert total_ego_agents <= len(evaluation_seeds), (
-            f"Total ego agents ({total_ego_agents}) cannot exceed the number of evaluation seeds ({len(evaluation_seeds)}). "
+        assert total_sp_agents <= len(evaluation_seeds), (
+            f"Total ego agents ({total_sp_agents}) cannot exceed the number of evaluation seeds ({len(evaluation_seeds)}). "
             "Please either increase the number of evaluation seeds in the `generate_hdim_and_seed` function or decrease "
-            f"`self.total_ego_agents` (currently set to {total_ego_agents}, based on `args.total_ego_agents`)."
+            f"`self.total_sp_agents` (currently set to {total_sp_agents}, based on `args.total_sp_agents`)."
         )
         seeds = evaluation_seeds
         hdims = evaluation_hdims
     else:
-        assert total_ego_agents <= len(training_seeds), (
-            f"Total ego agents ({total_ego_agents}) cannot exceed the number of training seeds ({len(training_seeds)}). "
+        assert total_sp_agents <= len(training_seeds), (
+            f"Total ego agents ({total_sp_agents}) cannot exceed the number of training seeds ({len(training_seeds)}). "
             "Please either increase the number of training seeds in the `generate_hdim_and_seed` function or decrease "
-            f"`self.total_ego_agents` (currently set to {total_ego_agents}, based on `args.total_ego_agents`)."
+            f"`self.total_sp_agents` (currently set to {total_sp_agents}, based on `args.total_sp_agents`)."
         )
         seeds = training_seeds
         hdims = training_hdims
 
-    selected_seeds = seeds[:total_ego_agents]
-    selected_hdims = hdims[:total_ego_agents]
+    selected_seeds = seeds[:total_sp_agents]
+    selected_hdims = hdims[:total_sp_agents]
 
     return selected_seeds, selected_hdims
diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py
index 82f7160..25c450f 100644
--- a/oai_agents/common/population.py
+++ b/oai_agents/common/population.py
@@ -68,11 +68,11 @@ def train_SP_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_d
 def ensure_enough_SP_agents(teammates_len,
                             train_types,
                             eval_types,
-                            total_ego_agents,
+                            total_sp_agents,
                             unseen_teammates_len=0, # only used for SPX teamtypes
                         ):
 
-    total_population_len = len(AgentPerformance.ALL) * total_ego_agents
+    total_population_len = len(AgentPerformance.ALL) * total_sp_agents
 
     train_agents_len, eval_agents_len = 0, 0
 
@@ -93,14 +93,14 @@ def ensure_enough_SP_agents(teammates_len,
             eval_agents_len += unseen_teammates_len
 
     assert total_population_len >= train_agents_len + eval_agents_len, "Not enough agents to train and evaluate." \
-                                                                        " Should increase total_ego_agents." \
+                                                                        " Should increase total_sp_agents." \
                                                                         f" Total population len: {total_population_len}," \
                                                                         f" train_agents len: {train_agents_len}," \
                                                                         f" eval_agents len: {eval_agents_len}, "\
-                                                                        f" total_ego_agents: {total_ego_agents}."
+                                                                        f" total_sp_agents: {total_sp_agents}."
 
 
-def generate_hdim_and_seed(for_evaluation: bool, total_ego_agents: int):
+def generate_hdim_and_seed(for_evaluation: bool, total_sp_agents: int):
     '''
     Generates lists of seeds and hidden dimensions for a given number of agents for training or evaluation.
 
@@ -111,7 +111,7 @@ def generate_hdim_and_seed(for_evaluation: bool, total_ego_agents: int):
 
     Arguments:
     for_evaluation -- a boolean indicating whether to generate settings for evluation (True) or training (False).
-    total_ego_agents -- the number of (hidden_dim, seed) pairs to generate.
+    total_sp_agents -- the number of (hidden_dim, seed) pairs to generate.
 
     Returns:
     selected_seeds -- list of selected seeds
@@ -128,25 +128,25 @@ def generate_hdim_and_seed(for_evaluation: bool, total_ego_agents: int):
 
     # Select appropriate predefined settings based on the input setting
     if for_evaluation:
-        assert total_ego_agents <= len(evaluation_seeds), (
-            f"Total ego agents ({total_ego_agents}) cannot exceed the number of evaluation seeds ({len(evaluation_seeds)}). "
+        assert total_sp_agents <= len(evaluation_seeds), (
+            f"Total ego agents ({total_sp_agents}) cannot exceed the number of evaluation seeds ({len(evaluation_seeds)}). "
             "Please either increase the number of evaluation seeds in the `generate_hdim_and_seed` function or decrease "
-            f"`self.total_ego_agents` (currently set to {total_ego_agents}, based on `args.total_ego_agents`)."
+            f"`self.total_sp_agents` (currently set to {total_sp_agents}, based on `args.total_sp_agents`)."
         )
         seeds = evaluation_seeds
         hdims = evaluation_hdims
     else:
-        assert total_ego_agents <= len(training_seeds), (
-            f"Total ego agents ({total_ego_agents}) cannot exceed the number of training seeds ({len(training_seeds)}). "
+        assert total_sp_agents <= len(training_seeds), (
+            f"Total ego agents ({total_sp_agents}) cannot exceed the number of training seeds ({len(training_seeds)}). "
             "Please either increase the number of training seeds in the `generate_hdim_and_seed` function or decrease "
-            f"`self.total_ego_agents` (currently set to {total_ego_agents}, based on `args.total_ego_agents`)."
+            f"`self.total_sp_agents` (currently set to {total_sp_agents}, based on `args.total_sp_agents`)."
         )
         seeds = training_seeds
         hdims = training_hdims
 
     # Initialize selected lists
-    selected_seeds = seeds[:total_ego_agents]
-    selected_hdims = hdims[:total_ego_agents]
+    selected_seeds = seeds[:total_sp_agents]
+    selected_hdims = hdims[:total_sp_agents]
 
     return selected_seeds, selected_hdims
 
@@ -175,7 +175,7 @@ def get_performance_based_population_by_layouts(
         total_training_timesteps,
         train_types,
         eval_types,
-        total_ego_agents,
+        total_sp_agents,
         unseen_teammates_len=0,
         force_training=False,
         tag=KeyCheckpoints.MOST_RECENT_TRAINED_MODEL,
@@ -198,14 +198,14 @@ def get_performance_based_population_by_layouts(
             unseen_teammates_len=unseen_teammates_len,
             train_types=train_types,
             eval_types=eval_types,
-            total_ego_agents=total_ego_agents
+            total_sp_agents=total_sp_agents
         )
 
         seed, h_dim = generate_hdim_and_seed(
-            for_evaluation=args.gen_pop_for_eval, total_ego_agents=total_ego_agents)
+            for_evaluation=args.gen_pop_for_eval, total_sp_agents=total_sp_agents)
         inputs = [
             (args, total_training_timesteps, ck_rate, seed[i], h_dim[i], True)
-            for i in range(total_ego_agents)
+            for i in range(total_sp_agents)
         ]
 
 
diff --git a/oai_agents/common/teammates_collection.py b/oai_agents/common/teammates_collection.py
index 1a4f823..7c64b7e 100644
--- a/oai_agents/common/teammates_collection.py
+++ b/oai_agents/common/teammates_collection.py
@@ -72,7 +72,7 @@ def get_teammates(agents_perftag_score:list, teamtypes:list, teammates_len:int,
 
             elif teamtype == TeamType.SELF_PLAY:
                 assert agent is not None
-                all_teammates[teamtype] = [agent for _ in range(teammates_len)]
+                all_teammates[teamtype] = [[agent for _ in range(teammates_len)]]
 
             elif teamtype == TeamType.SELF_PLAY_HIGH:
                 assert agent is not None
diff --git a/sandbox/generate_agents_for_eval.py b/sandbox/generate_agents_for_eval.py
index 01aeb47..894f095 100644
--- a/sandbox/generate_agents_for_eval.py
+++ b/sandbox/generate_agents_for_eval.py
@@ -32,7 +32,7 @@ def set_input(args, quick_test=False):
         args.epoch_timesteps = 1e5
         args.pop_total_training_timesteps = 5e6
         args.fcp_total_training_timesteps = 5e6
-        args.total_ego_agents = 5
+        args.total_sp_agents = 5
 
     else: # Used for doing quick tests
         args.sb_verbose = 1
@@ -41,7 +41,7 @@ def set_input(args, quick_test=False):
         args.epoch_timesteps = 2
         args.pop_total_training_timesteps = 3500
         args.fcp_total_training_timesteps = 3500
-        args.total_ego_agents = 4
+        args.total_sp_agents = 4
 
 
 if __name__ == "__main__":
@@ -69,7 +69,7 @@ def set_input(args, quick_test=False):
                                               train_types = TeamType.ALL_TYPES_BESIDES_SP,
                                               eval_types_to_generate = [],
                                               eval_types_to_load_from_file = [],
-                                              total_ego_agents=args.total_ego_agents,
+                                              total_sp_agents=args.total_sp_agents,
                                               total_training_timesteps = args.pop_total_training_timesteps,
                                               force_training=pop_force_training,
                                             )
diff --git a/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c1.sh b/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c1.sh
index da05e03..8a35dd5 100755
--- a/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c1.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c1.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c1"
 EXP_DIR="${ALGO}_${LAYOUT_NAMES}/${NUM_PLAYERS}"
-TOTAL_EGO_AGENTS=4
+TOTAL_SP_AGENTS=4
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c2.sh b/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c2.sh
index cff658c..982c2f8 100755
--- a/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c2.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c2.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c2"
 EXP_DIR="${ALGO}_${LAYOUT_NAMES}/${NUM_PLAYERS}"
-TOTAL_EGO_AGENTS=4
+TOTAL_SP_AGENTS=4
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c3.sh b/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c3.sh
index 8832d31..5ab19a9 100755
--- a/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c3.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c3.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c3"
 EXP_DIR="${ALGO}_${LAYOUT_NAMES}/${NUM_PLAYERS}"
-TOTAL_EGO_AGENTS=4
+TOTAL_SP_AGENTS=4
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c4.sh b/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c4.sh
index d7e3bb7..6940cdf 100755
--- a/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c4.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/MEP_POP_c4.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c4"
 EXP_DIR="${ALGO}_${LAYOUT_NAMES}/${NUM_PLAYERS}"
-TOTAL_EGO_AGENTS=4
+TOTAL_SP_AGENTS=4
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c1_v1.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c1_v1.sh
index 3497487..fddde4e 100755
--- a/scripts/bash_scripts/best_baseline_experiment/SP_c1_v1.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/SP_c1_v1.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c1_v1"
 EXP_DIR=${LAYOUT_NAMES}
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c1_v2.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c1_v2.sh
index c34e5f6..2333a3f 100644
--- a/scripts/bash_scripts/best_baseline_experiment/SP_c1_v2.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/SP_c1_v2.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c1_v2"
 EXP_DIR=${LAYOUT_NAMES}
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c1_v3.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c1_v3.sh
index 28a08aa..3b611aa 100644
--- a/scripts/bash_scripts/best_baseline_experiment/SP_c1_v3.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/SP_c1_v3.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c1_v3"
 EXP_DIR=${LAYOUT_NAMES}
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c1_v4.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c1_v4.sh
index 17ed213..b6f60d9 100644
--- a/scripts/bash_scripts/best_baseline_experiment/SP_c1_v4.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/SP_c1_v4.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c1_v4"
 EXP_DIR=${LAYOUT_NAMES}
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c2_v1.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c2_v1.sh
index eba16a9..100cd1e 100644
--- a/scripts/bash_scripts/best_baseline_experiment/SP_c2_v1.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/SP_c2_v1.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c2_v1"
 EXP_DIR=${LAYOUT_NAMES}
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c2_v2.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c2_v2.sh
index b37ba98..46846a6 100644
--- a/scripts/bash_scripts/best_baseline_experiment/SP_c2_v2.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/SP_c2_v2.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c2_v2"
 EXP_DIR=${LAYOUT_NAMES}
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c2_v3.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c2_v3.sh
index 654c80f..f967325 100644
--- a/scripts/bash_scripts/best_baseline_experiment/SP_c2_v3.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/SP_c2_v3.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c2_v3"
 EXP_DIR=${LAYOUT_NAMES}
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c2_v4.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c2_v4.sh
index 021f073..44b343c 100644
--- a/scripts/bash_scripts/best_baseline_experiment/SP_c2_v4.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/SP_c2_v4.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c2_v4"
 EXP_DIR=${LAYOUT_NAMES}
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c3_v1.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c3_v1.sh
index 7c65766..d92909e 100644
--- a/scripts/bash_scripts/best_baseline_experiment/SP_c3_v1.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/SP_c3_v1.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c3_v1"
 EXP_DIR=${LAYOUT_NAMES}
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c3_v2.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c3_v2.sh
index 4a42883..8d05bc0 100644
--- a/scripts/bash_scripts/best_baseline_experiment/SP_c3_v2.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/SP_c3_v2.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c3_v2"
 EXP_DIR=${LAYOUT_NAMES}
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c3_v3.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c3_v3.sh
index ca2ca22..e7d075e 100644
--- a/scripts/bash_scripts/best_baseline_experiment/SP_c3_v3.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/SP_c3_v3.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c3_v3"
 EXP_DIR=${LAYOUT_NAMES}
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c3_v4.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c3_v4.sh
index c24659f..27884ba 100644
--- a/scripts/bash_scripts/best_baseline_experiment/SP_c3_v4.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/SP_c3_v4.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c3_v4"
 EXP_DIR=${LAYOUT_NAMES}
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c4_v1.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c4_v1.sh
index b1fcff9..3f67782 100644
--- a/scripts/bash_scripts/best_baseline_experiment/SP_c4_v1.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/SP_c4_v1.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c4_v1"
 EXP_DIR=${LAYOUT_NAMES}
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c4_v2.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c4_v2.sh
index 6133252..00935af 100644
--- a/scripts/bash_scripts/best_baseline_experiment/SP_c4_v2.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/SP_c4_v2.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c4_v2"
 EXP_DIR=${LAYOUT_NAMES}
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c4_v3.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c4_v3.sh
index 90f5ad1..32bae60 100644
--- a/scripts/bash_scripts/best_baseline_experiment/SP_c4_v3.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/SP_c4_v3.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c4_v3"
 EXP_DIR=${LAYOUT_NAMES}
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/SP_c4_v4.sh b/scripts/bash_scripts/best_baseline_experiment/SP_c4_v4.sh
index dfe9068..f0e85ff 100644
--- a/scripts/bash_scripts/best_baseline_experiment/SP_c4_v4.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/SP_c4_v4.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c4_v4"
 EXP_DIR=${LAYOUT_NAMES}
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 WANDB_MODE="online"
@@ -36,7 +36,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/c1_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c1_best_CAP.sh
index 700668a..b723943 100644
--- a/scripts/bash_scripts/best_baseline_experiment/c1_best_CAP.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/c1_best_CAP.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c1"
 EXP_DIR="${LAYOUT_NAMES}_best_EGO_with_CAP"
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0"
@@ -55,7 +55,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh
index bff1e48..2679936 100644
--- a/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/c1_best_ego.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c1"
 EXP_DIR="${LAYOUT_NAMES}_best_EGO"
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0"
@@ -55,7 +55,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh
index ce43e2f..79d5d55 100644
--- a/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/c2_best_CAP.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c2"
 EXP_DIR="${LAYOUT_NAMES}_best_EGO_with_CAP"
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0"
@@ -55,7 +55,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh
index dfa0c31..b66f6f2 100644
--- a/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/c2_best_ego.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c2"
 EXP_DIR="${LAYOUT_NAMES}_best_EGO"
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0"
@@ -55,7 +55,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh
index 84b6152..5bbada8 100644
--- a/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/c3_best_CAP.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c3"
 EXP_DIR="${LAYOUT_NAMES}_best_EGO_with_CAP"
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0"
@@ -55,7 +55,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh
index 8ce7625..086716c 100644
--- a/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/c3_best_ego.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c3"
 EXP_DIR="${LAYOUT_NAMES}_best_EGO"
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0"
@@ -55,7 +55,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh b/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh
index 2d35934..53b8a84 100644
--- a/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/c4_best_CAP.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c4"
 EXP_DIR="${LAYOUT_NAMES}_best_EGO_with_CAP"
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0"
@@ -55,7 +55,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh b/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh
index e7706bd..4696336 100644
--- a/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh
+++ b/scripts/bash_scripts/best_baseline_experiment/c4_best_ego.sh
@@ -6,7 +6,7 @@ HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="c4"
 EXP_DIR="${LAYOUT_NAMES}_best_EGO"
-TOTAL_EGO_AGENTS=1
+TOTAL_SP_AGENTS=1
 QUICK_TEST=false
 
 L0="${LAYOUT_NAMES}_v1/SP_s1010_h256_tr[SP]_ran/ck_0"
@@ -55,7 +55,7 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/classic_CAP_2_player.sh b/scripts/bash_scripts/classic_CAP_2_player.sh
index ff36617..d28e057 100644
--- a/scripts/bash_scripts/classic_CAP_2_player.sh
+++ b/scripts/bash_scripts/classic_CAP_2_player.sh
@@ -2,12 +2,15 @@
 
 ALGO="SPN_XSPCKP"
 TEAMMATES_LEN=1
+NUM_PLAYERS=$((TEAMMATES_LEN + 1))
 HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="counter_circuit,coordination_ring,cramped_room,asymmetric_advantages,forced_coordination"
 EXP_DIR="Classic/$NUM_PLAYERS" # When quick_test=True this will be overwritten to "Test/$EXP_DIR"
-TOTAL_EGO_AGENTS=4
+TOTAL_SP_AGENTS=4
 QUICK_TEST=false
+USE_CUDA=false
+USE_MULTIPLEPROCESSES=false
 
 POP_FORCE_TRAINING=false
 ADVERSARY_FORCE_TRAINING=false
@@ -37,10 +40,12 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
     --primary-force-training ${PRIMARY_FORCE_TRAINING} \
     --how-long ${HOW_LONG} \
-    --exp-name-prefix "${EXP_NAME_PREFIX}" \
\ No newline at end of file
+    --exp-name-prefix "${EXP_NAME_PREFIX}" \
+    --use-cuda ${USE_CUDA} \
+    --use-multipleprocesses ${USE_MULTIPLEPROCESSES} \
\ No newline at end of file
diff --git a/scripts/bash_scripts/classic_FCP_2_player.sh b/scripts/bash_scripts/classic_FCP_2_player.sh
index feb7125..da203b9 100644
--- a/scripts/bash_scripts/classic_FCP_2_player.sh
+++ b/scripts/bash_scripts/classic_FCP_2_player.sh
@@ -2,12 +2,15 @@
 
 ALGO="FCP_traditional"
 TEAMMATES_LEN=1
+NUM_PLAYERS=$((TEAMMATES_LEN + 1))
 HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="counter_circuit,coordination_ring,cramped_room,asymmetric_advantages,forced_coordination"
 EXP_DIR="Classic/$NUM_PLAYERS" # When quick_test=True this will be overwritten to "Test/$EXP_DIR"
-TOTAL_EGO_AGENTS=4
+TOTAL_SP_AGENTS=4
 QUICK_TEST=false
+USE_CUDA=false
+USE_MULTIPLEPROCESSES=false
 
 POP_FORCE_TRAINING=false
 ADVERSARY_FORCE_TRAINING=false
@@ -37,10 +40,12 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
     --primary-force-training ${PRIMARY_FORCE_TRAINING} \
     --how-long ${HOW_LONG} \
-    --exp-name-prefix "${EXP_NAME_PREFIX}" \
\ No newline at end of file
+    --exp-name-prefix "${EXP_NAME_PREFIX}" \
+    --use-cuda ${USE_CUDA} \
+    --use-multipleprocesses ${USE_MULTIPLEPROCESSES} \
\ No newline at end of file
diff --git a/scripts/bash_scripts/profile.sh b/scripts/bash_scripts/profile.sh
index dada864..ef328af 100644
--- a/scripts/bash_scripts/profile.sh
+++ b/scripts/bash_scripts/profile.sh
@@ -2,10 +2,11 @@
 
 ALGO="SPN_XSPCKP"
 TEAMMATES_LEN=1
+NUM_PLAYERS=$((TEAMMATES_LEN + 1))
 HOW_LONG=20
 NUM_OF_CKPOINTS=40
 LAYOUT_NAMES="counter_circuit"
-TOTAL_EGO_AGENTS=2
+TOTAL_SP_AGENTS=2
 
 POP_FORCE_TRAINING=false
 ADVERSARY_FORCE_TRAINING=false
@@ -44,7 +45,7 @@ python -m cProfile -o data/profile/profile_results_all_${CURRENT_TIME}.prof scri
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
diff --git a/scripts/bash_scripts/test_run.sh b/scripts/bash_scripts/test_run.sh
index 29b9a30..9c565df 100644
--- a/scripts/bash_scripts/test_run.sh
+++ b/scripts/bash_scripts/test_run.sh
@@ -1,14 +1,16 @@
 #!/bin/sh
 
-ALGO="SPN_XSPCKP"
+ALGO="SP"
 TEAMMATES_LEN=1
 NUM_PLAYERS=$((TEAMMATES_LEN + 1))
 NUM_OF_CKPOINTS=10
 LAYOUT_NAMES="counter_circuit"
 EXP_DIR="$NUM_PLAYERS" # When quick_test=True this will be overwritten to "Test/$EXP_DIR"
-TOTAL_EGO_AGENTS=4
+TOTAL_SP_AGENTS=1
 QUICK_TEST=true
 HOW_LONG=1
+USE_CUDA=false
+USE_MULTIPLEPROCESSES=false
 
 POP_FORCE_TRAINING=false
 ADVERSARY_FORCE_TRAINING=false
@@ -44,10 +46,12 @@ python scripts/train_agents.py \
     --fcp-total-training-timesteps ${FCP_TOTAL_TRAINING_TIMESTEPS} \
     --adversary-total-training-timesteps ${ADVERSARY_TOTAL_TRAINING_TIMESTEPS} \
     --n-x-fcp-total-training-timesteps ${N_X_FCP_TOTAL_TRAINING_TIMESTEPS} \
-    --total-ego-agents ${TOTAL_EGO_AGENTS} \
+    --total-sp-agents ${TOTAL_SP_AGENTS} \
     --wandb-mode ${WANDB_MODE} \
     --pop-force-training ${POP_FORCE_TRAINING} \
     --adversary-force-training ${ADVERSARY_FORCE_TRAINING} \
     --primary-force-training ${PRIMARY_FORCE_TRAINING} \
     --how-long ${HOW_LONG} \
-    --exp-name-prefix "${EXP_NAME_PREFIX}" \
\ No newline at end of file
+    --exp-name-prefix "${EXP_NAME_PREFIX}" \
+    --use-cuda ${USE_CUDA} \
+    --use-multipleprocesses ${USE_MULTIPLEPROCESSES} \
\ No newline at end of file
diff --git a/scripts/train_agents.py b/scripts/train_agents.py
index 8191013..f606516 100644
--- a/scripts/train_agents.py
+++ b/scripts/train_agents.py
@@ -22,7 +22,7 @@ def MEP_POPULATION(args):
     agents_finder = SelfPlayAgentsFinder(args=args)
     _, _, training_infos = agents_finder.get_agents_infos()
     if len(training_infos)==0:
-        manager = MEPPopulationManager(population_size=args.total_ego_agents, args=args)
+        manager = MEPPopulationManager(population_size=args.total_sp_agents, args=args)
         manager.train_population(
             total_timesteps=args.pop_total_training_timesteps,
             num_of_ckpoints=args.num_of_ckpoints,
@@ -287,7 +287,7 @@ def SPN_XSPCKP(args) -> None:
 
 
 def best_EGO(args, add_adv=False) -> None:
-    '''only for 2 players'''
+    '''for a very specifric experimetn: only for 2 players:: ignore this'''
     primary_train_types = [
         TeamType.SELF_PLAY_HIGH,
         TeamType.SELF_PLAY_MEDIUM,
@@ -327,21 +327,24 @@ def best_EGO(args, add_adv=False) -> None:
     elif args.algo_name == 'SPN_XSPCKP':
         SPN_XSPCKP(args=args)
 
+    elif args.algo_name == 'MEP':
+        MEP_POPULATION(args=args)
+
     elif args.algo_name == 'FCP_traditional':
         FCP_traditional(args=args)
 
-    elif args.algo_name == 'FCP_mhri':
-        FCP_mhri(args=args)
+    # elif args.algo_name == 'best_EGO':
+    #     best_EGO(args=args, add_adv=False)
 
-    elif args.algo_name == 'SPN_1ADV':
-        SPN_1ADV(args=args)
+    # elif args.algo_name == 'FCP_mhri':
+    #     FCP_mhri(args=args)
 
-    elif args.algo_name == 'N_1_FCP':
-        N_1_FCP(args=args)
+    # elif args.algo_name == 'SPN_1ADV':
+    #     SPN_1ADV(args=args)
 
-    elif args.algo_name == 'SPN_1ADV_XSPCKP':
-        SPN_1ADV_XSPCKP(args=args)
+    # elif args.algo_name == 'N_1_FCP':
+    #     N_1_FCP(args=args)
 
-    elif args.algo_name == 'MEP':
-        MEP_POPULATION(args=args)
+    # elif args.algo_name == 'SPN_1ADV_XSPCKP':
+    #     SPN_1ADV_XSPCKP(args=args)
 
diff --git a/scripts/train_agents_without_bashing.py b/scripts/train_agents_without_bashing.py
index c747485..2be4414 100644
--- a/scripts/train_agents_without_bashing.py
+++ b/scripts/train_agents_without_bashing.py
@@ -54,7 +54,7 @@ def set_input(args):
         args.adversary_total_training_timesteps = int(5e6 * args.how_long)
         args.n_x_fcp_total_training_timesteps = int(2 * args.fcp_total_training_timesteps * args.how_long)
 
-        args.total_ego_agents = 8
+        args.total_sp_agents = 8
         print(f"args.layout_names: {args.layout_names}")
         if args.layout_names == complex_2_chefs_layouts:
             prefix = 'Complex'
@@ -84,7 +84,7 @@ def set_input(args):
         args.adversary_total_training_timesteps = 1500
         args.fcp_total_training_timesteps = 1500
         args.n_x_fcp_total_training_timesteps = 1500 * 2
-        args.total_ego_agents = 2
+        args.total_sp_agents = 2
         args.exp_dir = f'Test/{args.num_players}'
 
 
diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py
index 8ee8862..007a729 100644
--- a/scripts/utils/train_helper.py
+++ b/scripts/utils/train_helper.py
@@ -1,5 +1,6 @@
 from oai_agents.agents.rl import RLAgentTrainer
 from oai_agents.common.tags import TeamType
+from oai_agents.agents.agent_utils import CustomAgent
 from oai_agents.common.population import get_performance_based_population_by_layouts
 from oai_agents.common.teammates_collection import generate_TC, get_best_SP_agent, generate_TC_for_ADV_agent, update_TC_w_ADV_teammates, update_TC_w_dynamic_and_static_ADV_teammates
 from oai_agents.common.curriculum import Curriculum
@@ -71,7 +72,7 @@ def get_N_X_SP_agents(
         train_types=n_x_sp_train_types,
         eval_types=n_x_sp_eval_types['generate'],
         unseen_teammates_len = unseen_teammates_len,
-        total_ego_agents=args.total_ego_agents,
+        total_sp_agents=args.total_sp_agents,
         force_training=args.pop_force_training,
         tag=tag
     )
@@ -406,7 +407,7 @@ def get_FCP_agent_w_pop(
         total_training_timesteps=args.pop_total_training_timesteps,
         train_types=fcp_train_types,
         eval_types=fcp_eval_types['generate'],
-        total_ego_agents=args.total_ego_agents,
+        total_sp_agents=args.total_sp_agents,
         force_training=args.pop_force_training,
         tag=tag
     )
@@ -529,8 +530,9 @@ def get_N_X_FCP_agents(
 
 
 
+
 def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculum, add_adv=False):
-    '''Code purposed for a very specific experiment, assumes n_players = 2'''
+    '''Ignore: Code purposed for a very specific experiment, assumes n_players = 2'''
     from pathlib import Path
 
     eval_collection = {

From 82845e6a44c1c436a9a297a2746ef70ad1ecc87f Mon Sep 17 00:00:00 2001
From: ava <ava.abderezaei@gmail.com>
Date: Thu, 27 Mar 2025 15:32:32 -0600
Subject: [PATCH 21/26] Making sure everything works

---
 oai_agents/common/overcooked_simulation.py | 13 +++++++++++-
 oai_agents/common/population.py            | 24 ++++++++++++++++++++--
 scripts/bash_scripts/test_run.sh           |  8 ++++----
 scripts/train_agents.py                    |  8 ++++----
 4 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/oai_agents/common/overcooked_simulation.py b/oai_agents/common/overcooked_simulation.py
index 1e09fa4..3ca2561 100644
--- a/oai_agents/common/overcooked_simulation.py
+++ b/oai_agents/common/overcooked_simulation.py
@@ -10,12 +10,23 @@ def __init__(self, args, agent, teammates, layout_name, p_idx, horizon=400):
         self.args = args
         self.layout_name = layout_name
 
+        teammates_collection = {
+            'eval': {
+                self.layout_name: {
+                    'run_type': [teammates]
+                }
+            }
+        }
+
         self.env = OvercookedGymEnv(args=args,
                                     layout_name=self.layout_name,
                                     ret_completed_subtasks=False,
                                     is_eval_env=True,
                                     horizon=horizon,
-                                    learner_type='originaler')
+                                    learner_type='originaler',
+                                    teammates_collection=teammates_collection,
+                                    curriculum=None,
+                                    )
 
         self.agent = agent
         self.p_idx = p_idx
diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py
index 25c450f..1b7c992 100644
--- a/oai_agents/common/population.py
+++ b/oai_agents/common/population.py
@@ -4,7 +4,7 @@
 
 from oai_agents.agents.rl import RLAgentTrainer
 from oai_agents.common.tags import AgentPerformance, KeyCheckpoints, TeamType
-
+from oai_agents.common.teammates_collection import generate_TC
 
 from .curriculum import Curriculum
 
@@ -32,12 +32,32 @@ def train_SP_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_d
             n_envs = training_info["n_envs"]
             print(f"Restarting training from step: {start_step} (timestep: {start_timestep})")
 
+    
+    init_agent = RLAgentTrainer.generate_randomly_initialized_agent( # need a cleaner way to do this
+            args=args,
+            name=name,
+            learner_type=args.primary_learner_type,
+            hidden_dim=h_dim,
+            seed=seed,
+            n_envs=args.n_envs
+        ) 
+    
+    population = {layout_name: [] for layout_name in args.layout_names}
+    
+    teammates_collection = generate_TC(args=args,
+                                        population=population,
+                                        agent=init_agent,
+                                        train_types=[TeamType.SELF_PLAY],
+                                        eval_types_to_generate=[TeamType.SELF_PLAY],
+                                        eval_types_to_read_from_file=[],
+                                        unseen_teammates_len=0,
+                                        use_entire_population_for_train_types_teammates=True)
 
     rlat = RLAgentTrainer(
         name=name,
         args=args,
         agent=agent_ckpt,
-        teammates_collection={}, # automatically creates SP type
+        teammates_collection=teammates_collection, # automatically creates SP type
         epoch_timesteps=args.epoch_timesteps,
         n_envs=n_envs,
         hidden_dim=h_dim,
diff --git a/scripts/bash_scripts/test_run.sh b/scripts/bash_scripts/test_run.sh
index 9c565df..13c38d5 100644
--- a/scripts/bash_scripts/test_run.sh
+++ b/scripts/bash_scripts/test_run.sh
@@ -1,12 +1,12 @@
 #!/bin/sh
 
-ALGO="SP"
+ALGO="FCP_traditional"
 TEAMMATES_LEN=1
 NUM_PLAYERS=$((TEAMMATES_LEN + 1))
 NUM_OF_CKPOINTS=10
 LAYOUT_NAMES="counter_circuit"
 EXP_DIR="$NUM_PLAYERS" # When quick_test=True this will be overwritten to "Test/$EXP_DIR"
-TOTAL_SP_AGENTS=1
+TOTAL_SP_AGENTS=2
 QUICK_TEST=true
 HOW_LONG=1
 USE_CUDA=false
@@ -21,9 +21,9 @@ source scripts/bash_scripts/env_config.sh
 # Overwrite the default values from env_config here if needed
 N_ENVS=5
 WANDB_MODE="disabled"
-EPOCH_TIMESTEPS=3500
+EPOCH_TIMESTEPS=2500
 N_X_SP_TOTAL_TRAINING_TIMESTEPS=10000
-FCP_TOTAL_TRAINING_TIMESTEPS=75000
+FCP_TOTAL_TRAINING_TIMESTEPS=10000
 
 
 python scripts/train_agents.py \
diff --git a/scripts/train_agents.py b/scripts/train_agents.py
index f606516..53d48b9 100644
--- a/scripts/train_agents.py
+++ b/scripts/train_agents.py
@@ -256,15 +256,15 @@ def SPN_XSPCKP(args) -> None:
         TeamType.SELF_PLAY_HIGH,
         TeamType.SELF_PLAY_MEDIUM,
         TeamType.SELF_PLAY_LOW,
-        # TeamType.SELF_PLAY_DYNAMIC_ADV, # TODO: read from command line arg
-        # TeamType.SELF_PLAY_STATIC_ADV,
+        TeamType.SELF_PLAY_DYNAMIC_ADV, # TODO: read from command line arg
+        TeamType.SELF_PLAY_STATIC_ADV,
     ]
     primary_eval_types = {
         'generate': [
                     TeamType.SELF_PLAY_HIGH,
                      TeamType.SELF_PLAY_LOW,
-                    #  TeamType.SELF_PLAY_DYNAMIC_ADV,
-                    #  TeamType.SELF_PLAY_STATIC_ADV,
+                     TeamType.SELF_PLAY_DYNAMIC_ADV,
+                     TeamType.SELF_PLAY_STATIC_ADV,
                     ],
         'load': []
     }

From ec23330d3f366e76fb5c2bc9e2acad1b0003da26 Mon Sep 17 00:00:00 2001
From: ava <ava.abderezaei@gmail.com>
Date: Thu, 27 Mar 2025 15:37:18 -0600
Subject: [PATCH 22/26] Lint fix

---
 oai_agents/agents/base_agent.py                | 10 ++++------
 oai_agents/agents/rl.py                        |  8 ++++----
 oai_agents/common/arguments.py                 |  6 +++---
 oai_agents/common/multi_setup_trainer.py       |  2 +-
 oai_agents/common/overcooked_gui.py            |  9 ++++-----
 oai_agents/common/overcooked_simulation.py     |  4 ++--
 oai_agents/common/population.py                |  8 ++++----
 .../gym_environments/base_overcooked_env.py    | 18 +++++++-----------
 sandbox/profile_analyze.py                     |  2 +-
 scripts/profile_analyze.py                     |  2 +-
 scripts/run_overcooked_game.py                 |  6 +++---
 scripts/train_agents.py                        |  6 +++---
 scripts/utils/train_helper.py                  | 12 ++++++------
 13 files changed, 43 insertions(+), 50 deletions(-)

diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py
index d4b4600..859bda1 100644
--- a/oai_agents/agents/base_agent.py
+++ b/oai_agents/agents/base_agent.py
@@ -1,10 +1,8 @@
-from oai_agents.agents.agent_utils import load_agent, CustomAgent
+from oai_agents.agents.agent_utils import load_agent
 from oai_agents.common.arguments import get_args_to_save, set_args_from_load
 from oai_agents.common.state_encodings import ENCODING_SCHEMES
-from oai_agents.common.subtasks import calculate_completed_subtask, get_doable_subtasks, Subtasks
-from oai_agents.common.tags import AgentPerformance, TeamType, KeyCheckpoints, TeammatesCollection
 from oai_agents.common.subtasks import get_doable_subtasks, Subtasks
-from oai_agents.common.tags import AgentPerformance, KeyCheckpoints
+from oai_agents.common.tags import AgentPerformance, KeyCheckpoints, TeammatesCollection
 from oai_agents.common.checked_model_name_handler import CheckedModelNameHandler
 # from oai_agents.gym_environments.base_overcooked_env import USEABLE_COUNTERS
 
@@ -327,7 +325,7 @@ def predict(self, obs, state=None, episode_start=None, deterministic=False):
         # Updated to include action masking
         self.policy.set_training_mode(False)
         obs, vectorized_env = self.policy.obs_to_tensor(obs)
-        
+
         with th.no_grad():
             if 'subtask_mask' in obs and np.prod(obs['subtask_mask'].shape) == np.prod(self.policy.action_space.n):
                 dist = self.policy.get_distribution(obs, action_masks=obs['subtask_mask'])
@@ -428,7 +426,7 @@ def evaluate(self, eval_agent, num_eps_per_layout_per_tm=5, visualize=False, tim
         selected_p_indexes = random.sample(range(self.args.num_players), min(3, self.args.num_players))
 
         for _, env in enumerate(self.eval_envs):
-            
+
             rew_per_layout_per_teamtype[env.layout_name] = {
                 teamtype: [] for teamtype in env.teammates_collection[TeammatesCollection.EVAL][env.layout_name]
             }
diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py
index f9fde02..ae98f16 100644
--- a/oai_agents/agents/rl.py
+++ b/oai_agents/agents/rl.py
@@ -2,7 +2,7 @@
 from oai_agents.agents.base_agent import SB3Wrapper, SB3LSTMWrapper, OAITrainer, OAIAgent
 from oai_agents.common.networks import OAISinglePlayerFeatureExtractor
 from oai_agents.common.state_encodings import ENCODING_SCHEMES
-from oai_agents.common.tags import AgentPerformance, TeamType, TeammatesCollection, KeyCheckpoints
+from oai_agents.common.tags import AgentPerformance, TeammatesCollection, KeyCheckpoints
 from oai_agents.agents.agent_utils import CustomAgent
 from oai_agents.common.checked_model_name_handler import CheckedModelNameHandler
 
@@ -29,7 +29,7 @@ def __init__(
         ):
         train_types = train_types if train_types is not None else []
         eval_types = eval_types if eval_types is not None else []
-        
+
         # assert teammates_collection, "Teammates collection must be provided"
 
         name = name or 'rl_agent'
@@ -146,13 +146,13 @@ def get_envs(self, _env, _eval_envs, deterministic, learner_type, teammates_coll
 
         if _env is None:
             env_kwargs = {'shape_rewards': True, 'full_init': False, 'stack_frames': self.use_frame_stack,
-                        'deterministic': deterministic,'args': self.args, 'learner_type': learner_type, 'start_timestep': start_timestep, 
+                        'deterministic': deterministic,'args': self.args, 'learner_type': learner_type, 'start_timestep': start_timestep,
                         'teammates_collection': teammates_collection, 'curriculum': curriculum
                         }
             env = make_vec_env(OvercookedGymEnv, n_envs=self.args.n_envs, seed=self.seed, vec_env_cls=VEC_ENV_CLS, env_kwargs=env_kwargs)
 
             eval_envs_kwargs = {'is_eval_env': True, 'horizon': 400, 'stack_frames': self.use_frame_stack,
-                                 'deterministic': deterministic, 'args': self.args, 'learner_type': learner_type, 
+                                 'deterministic': deterministic, 'args': self.args, 'learner_type': learner_type,
                                  'teammates_collection': teammates_collection, 'curriculum': curriculum
                                  }
             eval_envs = [OvercookedGymEnv(**{'env_index': i, **eval_envs_kwargs, 'unique_env_idx':self.args.n_envs+i}) for i in range(self.n_layouts)]
diff --git a/oai_agents/common/arguments.py b/oai_agents/common/arguments.py
index c084fd8..f58b7f4 100644
--- a/oai_agents/common/arguments.py
+++ b/oai_agents/common/arguments.py
@@ -114,14 +114,14 @@ def get_arguments(additional_args: Optional[List] = None):
 
     args = parser.parse_args()
     args.base_dir = Path(args.base_dir)
-    
+
     args.device = th.device('cuda' if args.use_cuda and th.cuda.is_available() else 'cpu')
-    
+
     args.layout_names = args.layout_names.split(',')
     args.low_perfs = args.low_perfs.split(',')
     args.med_perfs = args.med_perfs.split(',')
     args.high_perfs = args.high_perfs.split(',')
-    
+
 
     if isinstance(args.layout_names, str):
         args.layout_names = args.layout_names.split(',')
diff --git a/oai_agents/common/multi_setup_trainer.py b/oai_agents/common/multi_setup_trainer.py
index 1ea8cbb..8648090 100644
--- a/oai_agents/common/multi_setup_trainer.py
+++ b/oai_agents/common/multi_setup_trainer.py
@@ -127,7 +127,7 @@ def get_trained_agent(self, seed, h_dim):
             hidden_dim=h_dim,
             seed=seed,
             n_envs=self.args.n_envs
-        ) 
+        )
 
         population = {layout_name: [] for layout_name in self.args.layout_names}
         teammates_collection = generate_TC(args=self.args,
diff --git a/oai_agents/common/overcooked_gui.py b/oai_agents/common/overcooked_gui.py
index 44bd551..3ec289b 100644
--- a/oai_agents/common/overcooked_gui.py
+++ b/oai_agents/common/overcooked_gui.py
@@ -32,7 +32,6 @@
 # from oai_agents.agents import Manager
 from oai_agents.common.subtasks import facing
 from oai_agents.gym_environments.base_overcooked_env import OvercookedGymEnv
-from oai_agents.gym_environments.worker_env import OvercookedSubtaskGymEnv
 from overcooked_ai_py.mdp.overcooked_mdp import Direction, Action
 # from overcooked_ai_py.planning.planners import MediumLevelPlanner
 from overcooked_ai_py.visualization.state_visualizer import StateVisualizer, roboto_path
@@ -64,9 +63,9 @@ def __init__(self, args, layout_name=None, agent=None, teammates=None, p_idx=0,
                                         )
         self.agent = agent
         self.p_idx = p_idx
-        
+
         self.env.set_teammates('run_type')
-        
+
         self.env.reset(p_idx=self.p_idx)
         if self.agent != 'human':
             self.agent.set_encoding_params(self.p_idx, self.args.horizon, env=self.env, is_haha=isinstance(self.agent, HierarchicalRL), tune_subtasks=False)
@@ -106,7 +105,7 @@ def __init__(self, args, layout_name=None, agent=None, teammates=None, p_idx=0,
         self.gif_name = gif_name
         if not os.path.exists(f'data/screenshots/{self.gif_name}'):
             os.makedirs(f'data/screenshots/{self.gif_name}')
-        
+
 
         self.resource_locations = {}
         for y, row in enumerate(self.env.env.mdp.terrain_mtx):
@@ -115,7 +114,7 @@ def __init__(self, args, layout_name=None, agent=None, teammates=None, p_idx=0,
                     self.resource_locations[(x, y)] = cell
 
         self.resource_usage = {
-            agent_idx: {pos: 0 for pos in self.resource_locations}
+            agent_idx: dict.fromkeys(self.resource_locations, 0)
             for agent_idx in range(len(self.env.state.players))
         }
 
diff --git a/oai_agents/common/overcooked_simulation.py b/oai_agents/common/overcooked_simulation.py
index 3ca2561..9a81989 100644
--- a/oai_agents/common/overcooked_simulation.py
+++ b/oai_agents/common/overcooked_simulation.py
@@ -98,7 +98,7 @@ def run_simulation(self, how_many_times):
 
 if __name__ == '__main__':
     from oai_agents.common.arguments import get_arguments
-    from oai_agents.agents.agent_utils import DummyAgent, CustomAgent, load_agent
+    from oai_agents.agents.agent_utils import CustomAgent, load_agent
     from pathlib import Path
 
     args = get_arguments()
@@ -114,4 +114,4 @@ def run_simulation(self, how_many_times):
     teammates = [CustomAgent(args=args, name='tm', trajectories={args.layout_names[0]: [(1, 1), (1, 2)]})]
 
     simulation = OvercookedSimulation(args=args, agent=agent, teammates=teammates, layout_name=args.layout_names[0], p_idx=p_idx, horizon=400)
-    trajectories = simulation.run_simulation(how_many_times=4)
\ No newline at end of file
+    trajectories = simulation.run_simulation(how_many_times=4)
diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py
index 1b7c992..3ca1e37 100644
--- a/oai_agents/common/population.py
+++ b/oai_agents/common/population.py
@@ -32,7 +32,7 @@ def train_SP_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_d
             n_envs = training_info["n_envs"]
             print(f"Restarting training from step: {start_step} (timestep: {start_timestep})")
 
-    
+
     init_agent = RLAgentTrainer.generate_randomly_initialized_agent( # need a cleaner way to do this
             args=args,
             name=name,
@@ -40,10 +40,10 @@ def train_SP_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_d
             hidden_dim=h_dim,
             seed=seed,
             n_envs=args.n_envs
-        ) 
-    
+        )
+
     population = {layout_name: [] for layout_name in args.layout_names}
-    
+
     teammates_collection = generate_TC(args=args,
                                         population=population,
                                         agent=init_agent,
diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py
index d4ec65b..17c3158 100644
--- a/oai_agents/gym_environments/base_overcooked_env.py
+++ b/oai_agents/gym_environments/base_overcooked_env.py
@@ -1,11 +1,8 @@
 from oai_agents.common.state_encodings import ENCODING_SCHEMES
-from oai_agents.common.subtasks import Subtasks, calculate_completed_subtask, get_doable_subtasks
-from oai_agents.common.learner import LearnerType, Learner
-from oai_agents.agents.agent_utils import CustomAgent, DummyAgent
-from oai_agents.common.tags import AgentPerformance, TeamType, TeammatesCollection
 from oai_agents.common.subtasks import Subtasks, get_doable_subtasks
 from oai_agents.common.learner import Learner
-from oai_agents.agents.agent_utils import CustomAgent
+from oai_agents.agents.agent_utils import CustomAgent, DummyAgent
+from oai_agents.common.tags import TeammatesCollection
 
 from overcooked_ai_py.mdp.overcooked_mdp import OvercookedGridworld, Action, Direction
 from overcooked_ai_py.mdp.overcooked_env import OvercookedEnv
@@ -18,7 +15,6 @@
 import numpy as np
 import pygame
 from pygame.locals import HWSURFACE, DOUBLEBUF, RESIZABLE
-from stable_baselines3.common.env_checker import check_env
 from stable_baselines3.common.vec_env.stacked_observations import StackedObservations
 import torch as th
 import random
@@ -164,7 +160,7 @@ def set_teammates(self, teamtype=None):
             assert self.is_eval_env is True, "Teamtype should only be set for evaluation environments"
             population_teamtypes = self.teammates_collection[TeammatesCollection.EVAL][self.layout_name]
             teammates = population_teamtypes[teamtype][np.random.randint(len(population_teamtypes[teamtype]))]
-        else:    
+        else:
             population_teamtypes = self.teammates_collection[TeammatesCollection.TRAIN][self.layout_name]
             teammates = self.curriculum.select_teammates_for_layout(population_teamtypes=population_teamtypes, layout=self.layout_name)
 
@@ -370,14 +366,14 @@ def set_bonus_getter(self, bonus_getter):
 
     env = OvercookedGymEnv(layout_name=args.layout_names[0], args=args, ret_completed_subtasks=False,
                             is_eval_env=True, horizon=400, learner_type='originaler')
-    
-    p_idx = 0    
+
+    p_idx = 0
     teammates = [DummyAgent()]
-    
+
     env.set_teammates(teammates)
     env.reset(p_idx=p_idx)
     done = False
-    
+
     while not done:
         action = np.random.randint(0, Action.NUM_ACTIONS)
         action_idx = Action.ACTION_TO_INDEX[Action.STAY]
diff --git a/sandbox/profile_analyze.py b/sandbox/profile_analyze.py
index d7136f9..d416cfc 100644
--- a/sandbox/profile_analyze.py
+++ b/sandbox/profile_analyze.py
@@ -6,4 +6,4 @@
 args = parser.parse_args()
 name = args.name
 p = pstats.Stats(f"data/profile/{name}")
-p.strip_dirs().sort_stats("cumulative").print_stats(20)  # Show top 20 functions
\ No newline at end of file
+p.strip_dirs().sort_stats("cumulative").print_stats(20)  # Show top 20 functions
diff --git a/scripts/profile_analyze.py b/scripts/profile_analyze.py
index d7136f9..d416cfc 100644
--- a/scripts/profile_analyze.py
+++ b/scripts/profile_analyze.py
@@ -6,4 +6,4 @@
 args = parser.parse_args()
 name = args.name
 p = pstats.Stats(f"data/profile/{name}")
-p.strip_dirs().sort_stats("cumulative").print_stats(20)  # Show top 20 functions
\ No newline at end of file
+p.strip_dirs().sort_stats("cumulative").print_stats(20)  # Show top 20 functions
diff --git a/scripts/run_overcooked_game.py b/scripts/run_overcooked_game.py
index 5e18dc9..7d127af 100644
--- a/scripts/run_overcooked_game.py
+++ b/scripts/run_overcooked_game.py
@@ -1,6 +1,6 @@
 from pathlib import Path
 
-from oai_agents.agents.agent_utils import DummyAgent, load_agent
+from oai_agents.agents.agent_utils import load_agent
 from oai_agents.agents.rl import RLAgentTrainer
 from oai_agents.common.arguments import get_arguments
 from oai_agents.common.overcooked_gui import OvercookedGUI
@@ -17,7 +17,7 @@ def get_teammate_from_pop_file(tm_name, tm_score, pop_path, layout_name):
     args = get_arguments()
     args.num_players = 2
 
-    args.layout = f'c1'
+    args.layout = 'c1'
     args.p_idx = 0
     args.layout_names = [args.layout]
     args.n_envs = 1
@@ -29,7 +29,7 @@ def get_teammate_from_pop_file(tm_name, tm_score, pop_path, layout_name):
         # 'agent_models/c4_v4/SP_s1010_h256_tr[SP]_ran/best'
         # 'agent_models/c4_best_EGO/best_c4/best'
 
-    #     'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', # green 
+    #     'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', # green
     #     'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', # orange
     #     'agent_models/ALMH_CUR/2/SP_hd64_seed14/best',
     #     'agent_models/ALMH_CUR/2/SP_hd64_seed14/best',
diff --git a/scripts/train_agents.py b/scripts/train_agents.py
index 53d48b9..fb6c2dc 100644
--- a/scripts/train_agents.py
+++ b/scripts/train_agents.py
@@ -301,9 +301,9 @@ def best_EGO(args, add_adv=False) -> None:
         'load': []
     }
     if args.prioritized_sampling:
-        curriculum = Curriculum(train_types=primary_train_types, 
-                                eval_types=primary_eval_types, 
-                                is_random=False, 
+        curriculum = Curriculum(train_types=primary_train_types,
+                                eval_types=primary_eval_types,
+                                is_random=False,
                                 prioritized_sampling=True,
                                 priority_scaling=2.0)
     else:
diff --git a/scripts/utils/train_helper.py b/scripts/utils/train_helper.py
index 007a729..2d6cda3 100644
--- a/scripts/utils/train_helper.py
+++ b/scripts/utils/train_helper.py
@@ -130,7 +130,7 @@ def gen_ADV_train_N_X_SP(args, population, curriculum, unseen_teammates_len, n_x
     #         hidden_dim=args.N_X_SP_h_dim,
     #         seed=args.N_X_SP_seed,
     #         n_envs=args.n_envs
-    #) 
+    #)
 
     teammates_collection = generate_TC(args=args,
                                         population=population,
@@ -296,7 +296,7 @@ def N_X_SP(args, population, curriculum, unseen_teammates_len, n_x_sp_eval_types
         hidden_dim=args.N_X_SP_h_dim,
         seed=args.N_X_SP_seed,
         n_envs=args.n_envs,
-         
+
     )
 
     teammates_collection = generate_TC(
@@ -559,13 +559,13 @@ def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculu
                 ttype = TeamType.SELF_PLAY_MEDIUM
             elif agent_address in args.high_perfs:
                 ttype = TeamType.SELF_PLAY_HIGH
-            
+
             if ttype in train_collection[layout_name]:
                 train_collection[layout_name][ttype].append([agent])
 
             if ttype in eval_collection[layout_name]:
                 eval_collection[layout_name][ttype] = [[agent]]
-    
+
     name = f'best_{args.layout_names[0]}'
 
     if add_adv:
@@ -589,7 +589,7 @@ def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculu
         TeammatesCollection.TRAIN: train_collection,
         TeammatesCollection.EVAL: eval_collection
     }
-    
+
     best_ego_trainer = RLAgentTrainer(
         name=name,
         args=args,
@@ -601,7 +601,7 @@ def get_best_EGO_agents(args, primary_train_types, primary_eval_types, curriculu
         seed=args.N_X_SP_seed,
         hidden_dim=args.N_X_SP_h_dim,
         curriculum=curriculum,
-        
+
         learner_type=args.primary_learner_type,
         checkpoint_rate=args.n_x_sp_total_training_timesteps // args.num_of_ckpoints,
     )

From 215a570eba814958f2897d4d57b2aac6fadf27f4 Mon Sep 17 00:00:00 2001
From: ava <ava.abderezaei@gmail.com>
Date: Thu, 27 Mar 2025 15:56:17 -0600
Subject: [PATCH 23/26] Finalize PR

---
 oai_agents/agents/base_agent.py               | 14 +++++-----
 oai_agents/agents/rl.py                       |  8 +++---
 oai_agents/common/arguments.py                |  7 ++---
 oai_agents/common/multi_setup_trainer.py      |  6 ++---
 oai_agents/common/overcooked_gui.py           | 26 ++-----------------
 oai_agents/common/overcooked_simulation.py    | 10 ++-----
 oai_agents/common/population.py               |  2 +-
 .../gym_environments/base_overcooked_env.py   |  7 ++---
 8 files changed, 26 insertions(+), 54 deletions(-)

diff --git a/oai_agents/agents/base_agent.py b/oai_agents/agents/base_agent.py
index 859bda1..c207bfe 100644
--- a/oai_agents/agents/base_agent.py
+++ b/oai_agents/agents/base_agent.py
@@ -233,14 +233,7 @@ def get_distribution(self, obs: th.Tensor):
         return dist
 
     def learn(self, epoch_timesteps):
-        # import cProfile
-        # import time
-        # profiler = cProfile.Profile()
-        # profiler.enable()
         self.agent.learn(total_timesteps=epoch_timesteps, reset_num_timesteps=False)
-        # profiler.disable()
-        # c_time = time.strftime("%Y-%m-%d_%H-%M-%S")
-        # profiler.dump_stats(f'data/profile/learn_{c_time}.prof')
         self.num_timesteps = self.agent.num_timesteps
 
     def save(self, path: Path) -> None:
@@ -457,6 +450,13 @@ def evaluate(self, eval_agent, num_eps_per_layout_per_tm=5, visualize=False, tim
         return np.mean(tot_mean_reward), rew_per_layout, rew_per_layout_per_teamtype
 
     def set_new_teammates(self):
+        """
+        The logic for selecting teammates has been moved to `base_overcooked_env` to support
+        running environments with the SubProcEnv flag enabled.
+        `teammates_collection` and `curriculum` are now managed within the environment.
+        The `set_teammates` method in `base_overcooked_env` selects an appropriate teammate
+        based on the current curriculum settings.
+        """
         for i in range(self.args.n_envs):
             self.env.env_method('set_teammates', indices=i)
 
diff --git a/oai_agents/agents/rl.py b/oai_agents/agents/rl.py
index ae98f16..add4d17 100644
--- a/oai_agents/agents/rl.py
+++ b/oai_agents/agents/rl.py
@@ -5,6 +5,8 @@
 from oai_agents.common.tags import AgentPerformance, TeammatesCollection, KeyCheckpoints
 from oai_agents.agents.agent_utils import CustomAgent
 from oai_agents.common.checked_model_name_handler import CheckedModelNameHandler
+from oai_agents.gym_environments.base_overcooked_env import OvercookedGymEnv
+
 
 import numpy as np
 from stable_baselines3 import PPO, DQN
@@ -30,8 +32,6 @@ def __init__(
         train_types = train_types if train_types is not None else []
         eval_types = eval_types if eval_types is not None else []
 
-        # assert teammates_collection, "Teammates collection must be provided"
-
         name = name or 'rl_agent'
         super(RLAgentTrainer, self).__init__(name, args, seed=seed)
 
@@ -63,6 +63,8 @@ def __init__(
         self.use_policy_clone = use_policy_clone
 
         self.learner_type = learner_type
+
+        # teammates_collection and curriculum are passed to the environment instead.
         self.env, self.eval_envs = self.get_envs(_env=env, _eval_envs=eval_envs,
                                                  deterministic=deterministic, learner_type=learner_type,
                                                  start_timestep=start_timestep, teammates_collection=teammates_collection,
@@ -137,8 +139,6 @@ def print_tc_helper(self, teammates_collection, message=None):
 
 
     def get_envs(self, _env, _eval_envs, deterministic, learner_type, teammates_collection, curriculum, start_timestep: int = 0):
-        from oai_agents.gym_environments.base_overcooked_env import OvercookedGymEnv
-
         if self.args.use_multipleprocesses:
             VEC_ENV_CLS = SubprocVecEnv
         else:
diff --git a/oai_agents/common/arguments.py b/oai_agents/common/arguments.py
index f58b7f4..310b288 100644
--- a/oai_agents/common/arguments.py
+++ b/oai_agents/common/arguments.py
@@ -104,9 +104,10 @@ def get_arguments(additional_args: Optional[List] = None):
     parser.add_argument("--total-sp-agents", type=int, default=4)
     parser.add_argument("--ck-list-offset", type=int, default=0)
 
-    parser.add_argument('--low-perfs', help='shitty code to run ult baseline exp', default='default')
-    parser.add_argument('--med-perfs', help='shitty code to run ult baseline exp', default='default')
-    parser.add_argument('--high-perfs', help='shitty code to run ult baseline exp', default='default')
+    # The next three args are only to run the ultimate baseline exp, I will clean it later
+    parser.add_argument('--low-perfs', help='code to run ult baseline exp', default='default')
+    parser.add_argument('--med-perfs', help='code to run ult baseline exp', default='default')
+    parser.add_argument('--high-perfs', help='code to run ult baseline exp', default='default')
 
 
     for parser_arg, parser_kwargs in additional_args:
diff --git a/oai_agents/common/multi_setup_trainer.py b/oai_agents/common/multi_setup_trainer.py
index 8648090..44290fd 100644
--- a/oai_agents/common/multi_setup_trainer.py
+++ b/oai_agents/common/multi_setup_trainer.py
@@ -119,8 +119,7 @@ def get_trained_agent(self, seed, h_dim):
             curriculum=self.curriculum
         )
 
-        # print('before generate_randomly_initialized_agent')
-        init_agent = RLAgentTrainer.generate_randomly_initialized_agent( # need a cleaner way to do this
+        init_agent = RLAgentTrainer.generate_randomly_initialized_agent(
             args=self.args,
             name=name,
             learner_type=self.args.primary_learner_type,
@@ -128,7 +127,6 @@ def get_trained_agent(self, seed, h_dim):
             seed=seed,
             n_envs=self.args.n_envs
         )
-
         population = {layout_name: [] for layout_name in self.args.layout_names}
         teammates_collection = generate_TC(args=self.args,
                                             population=population,
@@ -138,7 +136,7 @@ def get_trained_agent(self, seed, h_dim):
                                             eval_types_to_read_from_file=self.eval_types['load'],
                                             unseen_teammates_len=0,
                                             use_entire_population_for_train_types_teammates=True)
-
+        # we can't no longer pass empty teammates_collection to the RlAgentTrainer, so for SP we should do this ^
 
         return self.get_reinforcement_agent(
             name=name,
diff --git a/oai_agents/common/overcooked_gui.py b/oai_agents/common/overcooked_gui.py
index 3ec289b..09bb99c 100644
--- a/oai_agents/common/overcooked_gui.py
+++ b/oai_agents/common/overcooked_gui.py
@@ -48,14 +48,7 @@ def __init__(self, args, layout_name=None, agent=None, teammates=None, p_idx=0,
         self._display_surf = None
         self.args = args
         self.layout_name = layout_name or 'asymmetric_advantages'
-
-        teammates_collection = {
-            'eval': {
-                args.layout: {
-                    'run_type': [teammates]
-                }
-            }
-        }
+        teammates_collection = {'eval': {self.layout_name: {'run_type': [teammates]}}}
 
         self.env = OvercookedGymEnv(layout_name=self.layout_name, args=args, ret_completed_subtasks=False,
                                         is_eval_env=True, horizon=horizon, learner_type='originaler',
@@ -64,7 +57,7 @@ def __init__(self, args, layout_name=None, agent=None, teammates=None, p_idx=0,
         self.agent = agent
         self.p_idx = p_idx
 
-        self.env.set_teammates('run_type')
+        self.env.set_teammates(teamtype='run_type')
 
         self.env.reset(p_idx=self.p_idx)
         if self.agent != 'human':
@@ -107,21 +100,6 @@ def __init__(self, args, layout_name=None, agent=None, teammates=None, p_idx=0,
             os.makedirs(f'data/screenshots/{self.gif_name}')
 
 
-        self.resource_locations = {}
-        for y, row in enumerate(self.env.env.mdp.terrain_mtx):
-            for x, cell in enumerate(row):
-                if cell in ['S', 'D', 'P', 'O']:
-                    self.resource_locations[(x, y)] = cell
-
-        self.resource_usage = {
-            agent_idx: dict.fromkeys(self.resource_locations, 0)
-            for agent_idx in range(len(self.env.state.players))
-        }
-
-        print(f"Resource locations: {self.resource_locations}")
-
-
-
     def start_screen(self):
         pygame.init()
         surface = StateVisualizer(tile_size=self.tile_size).render_state(self.env.state,
diff --git a/oai_agents/common/overcooked_simulation.py b/oai_agents/common/overcooked_simulation.py
index 9a81989..e825844 100644
--- a/oai_agents/common/overcooked_simulation.py
+++ b/oai_agents/common/overcooked_simulation.py
@@ -10,13 +10,7 @@ def __init__(self, args, agent, teammates, layout_name, p_idx, horizon=400):
         self.args = args
         self.layout_name = layout_name
 
-        teammates_collection = {
-            'eval': {
-                self.layout_name: {
-                    'run_type': [teammates]
-                }
-            }
-        }
+        teammates_collection = {'eval': {self.layout_name: {'run_type': [teammates]}}}
 
         self.env = OvercookedGymEnv(args=args,
                                     layout_name=self.layout_name,
@@ -30,7 +24,7 @@ def __init__(self, args, agent, teammates, layout_name, p_idx, horizon=400):
 
         self.agent = agent
         self.p_idx = p_idx
-        self.env.set_teammates('run_type')
+        self.env.set_teammates(teamtype='run_type')
         self.env.reset(p_idx=self.p_idx)
 
         assert self.agent != 'human'
diff --git a/oai_agents/common/population.py b/oai_agents/common/population.py
index 3ca1e37..0d6c577 100644
--- a/oai_agents/common/population.py
+++ b/oai_agents/common/population.py
@@ -33,7 +33,7 @@ def train_SP_with_checkpoints(args, total_training_timesteps, ck_rate, seed, h_d
             print(f"Restarting training from step: {start_step} (timestep: {start_timestep})")
 
 
-    init_agent = RLAgentTrainer.generate_randomly_initialized_agent( # need a cleaner way to do this
+    init_agent = RLAgentTrainer.generate_randomly_initialized_agent(
             args=args,
             name=name,
             learner_type=args.primary_learner_type,
diff --git a/oai_agents/gym_environments/base_overcooked_env.py b/oai_agents/gym_environments/base_overcooked_env.py
index 17c3158..7e877df 100644
--- a/oai_agents/gym_environments/base_overcooked_env.py
+++ b/oai_agents/gym_environments/base_overcooked_env.py
@@ -156,6 +156,10 @@ def get_joint_action(self):
         return self.joint_action
 
     def set_teammates(self, teamtype=None):
+        '''
+        When teamtype is None, teammate is set according to the curriculum
+        When teamtype is not None, teammate is set according to the teamtype which is only used for evaluation purposes
+        '''
         if teamtype:
             assert self.is_eval_env is True, "Teamtype should only be set for evaluation environments"
             population_teamtypes = self.teammates_collection[TeammatesCollection.EVAL][self.layout_name]
@@ -253,7 +257,6 @@ def step(self, action):
                 tm_obs = self.get_obs(c_idx=t_idx, enc_fn=teammate.encoding_fn)
 
                 if type(teammate) == CustomAgent:
-                # if isinstance(teammate, CustomAgent):
                     info = {'layout_name': self.layout_name, 'u_env_idx': self.unique_env_idx}
                     joint_action[t_idx] = teammate.predict(obs=tm_obs, deterministic=self.deterministic, info=info)[0]
                 else:
@@ -279,7 +282,6 @@ def step(self, action):
         for t_idx in self.t_idxes: # Should be right after env.step
             tm = self.get_teammate_from_idx(t_idx)
             if type(tm) == CustomAgent:
-            # if isinstance(tm, CustomAgent):
                 tm.update_current_position(layout_name=self.layout_name, new_position=self.env.state.players[t_idx].position, u_env_idx=self.unique_env_idx)
 
         if self.shape_rewards and not self.is_eval_env:
@@ -311,7 +313,6 @@ def reset(self, p_idx=None):
         if self.reset_info and 'start_position' in self.reset_info:
             self.reset_info['start_position'] = {}
             for id in range(len(teammates_ids)):
-                # if isinstance(self.teammates[id], CustomAgent):
                 if type(self.teammates[id]) == CustomAgent:
                     self.teammates[id].reset()
                     self.reset_info['start_position'][teammates_ids[id]] = self.teammates[id].get_start_position(self.layout_name, u_env_idx=self.unique_env_idx)

From cc4ec560b4bbe9058acffde6e376a1e5e8e18585 Mon Sep 17 00:00:00 2001
From: ava <ava.abderezaei@gmail.com>
Date: Thu, 27 Mar 2025 15:59:44 -0600
Subject: [PATCH 24/26] Fix default n_envs

---
 scripts/bash_scripts/env_config.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/bash_scripts/env_config.sh b/scripts/bash_scripts/env_config.sh
index 3e4951b..5ee4ff1 100755
--- a/scripts/bash_scripts/env_config.sh
+++ b/scripts/bash_scripts/env_config.sh
@@ -14,7 +14,7 @@ fi
 
 if [ "$QUICK_TEST" = false ]; then
   WANDB_MODE="online"
-  N_ENVS=50
+  N_ENVS=210
   EPOCH_TIMESTEPS=100000
   POP_TOTAL_TRAINING_TIMESTEPS=$(echo "$HOW_LONG * 5000000" | bc)
   N_X_SP_TOTAL_TRAINING_TIMESTEPS=$(echo "$HOW_LONG * 5000000" | bc)

From 65e7967c2dd84faeda499cc94dc8046ea0b75d11 Mon Sep 17 00:00:00 2001
From: ava <ava.abderezaei@gmail.com>
Date: Fri, 28 Mar 2025 14:59:04 -0600
Subject: [PATCH 25/26] Small cleanups

---
 sandbox/visualize_heatmap.py   | 76 ++++++++++++----------------------
 scripts/run_overcooked_game.py | 19 ++++-----
 2 files changed, 34 insertions(+), 61 deletions(-)

diff --git a/sandbox/visualize_heatmap.py b/sandbox/visualize_heatmap.py
index f0dd8b3..886913d 100644
--- a/sandbox/visualize_heatmap.py
+++ b/sandbox/visualize_heatmap.py
@@ -9,11 +9,11 @@
 from oai_agents.common.overcooked_simulation import OvercookedSimulation
 
 
-def extract_layout_features(grid):
-    """
-    Extracts layout features such as counters, pots, onions, and player starting positions.
-    Returns a dictionary with their coordinates and the grid shape.
-    """
+def extract_layout_features(args):
+    from overcooked_ai_py.mdp.overcooked_mdp import OvercookedGridworld
+    mdp = OvercookedGridworld.from_layout_name(args.layout)
+    grid = mdp.terrain_mtx
+
     layout_features = {
         "P": [],
         "O": [],
@@ -23,11 +23,11 @@ def extract_layout_features(grid):
     }
     feature_positions = set()  # Store all feature coordinates for masking
 
-    grid_lines = [line.strip() for line in grid.strip().split("\n")]
-    grid_height = len(grid_lines)
-    grid_width = max(len(line) for line in grid_lines)  # Accounts for irregular widths
+    # grid_lines = [line.strip() for line in grid.strip().split("\n")]
+    grid_height = len(grid)
+    grid_width = max(len(line) for line in grid)  # Accounts for irregular widths
 
-    for y, row in enumerate(grid_lines):
+    for y, row in enumerate(grid):
         for x, char in enumerate(row):
             if char == "P":
                 layout_features["P"].append((x, y))
@@ -113,64 +113,40 @@ def plot_heatmap(tiles_v, layout_features, feature_positions, title=''):
 if __name__ == "__main__":
     args = get_arguments()
     args.num_players = 2
-    args.layout = 'storage_room'
-
-    # grid_layout = """XXXPPXXX
-    #                  X   2  X
-    #                  D XXXX S
-    #                  X   1  X
-    #                  XXXOOXXX"""
-
-    grid_layout = """XPXXXXXXXXPX
-                     S   XODX   S
-                     X    12    X
-                     X   XDOX   X
-                     XXXXXXXXXXXX"""
-
-    # grid_layout = """XODSXXXXSDXX
-    #                  X          X
-    #                  S PP XX    X
-    #                  D PP OX 1  X
-    #                  O PP DX 2  X
-    #                  X    SX    X
-    #                  XSDOXXXXOPXX"""
-
-    # grid_layout = """XXXPPXXX
-    #                  X  2 4 X
-    #                  S XXXX5S
-    #                  X  1 3 X
-    #                  XXDOODXX"""
+    args.layout = 'c4'
 
     args.p_idx = 0
     args.n_envs = 200
     args.layout_names = [args.layout]
 
-    # path = 'agent_models/Complex/2/FCP_s1010_h256_tr[AMX]_ran/last'
-    path = 'agent_models/Complex/2/SP_hd256_seed2602/last'
-    # path = 'agent_models/Complex/2/N-1-SP_s1010_h256_tr[SPH_SPM_SPL_SPSA]_ran_originaler_attack0/last'
-    # path = 'agent_models/Complex/2/N-1-SP_s1010_h256_tr[SPH_SPM_SPL_SPSA]_ran_originaler_attack1/last'
-    # path = 'agent_models/Complex/2/N-1-SP_s1010_h256_tr[SPH_SPM_SPL_SPSA]_ran_originaler_attack2/last'
+    path = 'agent_models/c4_best_EGO_with_CAP/best_c4_adv/best'
+    # path = 'agent_models/c4_best_EGO/best_c4/best'
 
     agent = load_agent(Path(path), args)
     title = f'{args.layout}_{path.split("/")[-2]}'
 
-    high_perf_teammates = [agent for _ in range(args.num_players - 1)]
-    low_perf_teammates = [DummyAgent(action='random') for _ in range(args.num_players - 1)]
 
-    # Define the environment grid layout (modify this based on the actual layout)
+    high_perf_paths = [
+        'agent_models/c4_v4/SP_s1010_h256_tr[SP]_ran/best',
+        'agent_models/c4_v3/SP_s1010_h256_tr[SP]_ran/best',
+        'agent_models/c4_v2/SP_s1010_h256_tr[SP]_ran/best',
+        'agent_models/c4_v1/SP_s1010_h256_tr[SP]_ran/best',
+    ]
+    high_perf_teammates = [[load_agent(Path(tm_path), args)] for tm_path in high_perf_paths[:args.num_players - 1]]
+    # high_perf_teammates = [agent for _ in range(args.num_players - 1)]
+    # low_perf_teammates = [DummyAgent(action='random') for _ in range(args.num_players - 1)]
+    low_perf_teammates = []
 
-    # Extract layout features, feature positions, and shape dynamically
-    layout_features, feature_positions, shape = extract_layout_features(grid_layout)
 
-    # Initialize heatmap matrices dynamically based on extracted shape
-    final_tiles_v = np.zeros(shape)
+    layout_features, feature_positions, shape = extract_layout_features(args)
 
+    final_tiles_v = np.zeros(shape)
     for p_idx in range(args.num_players):
-        for teammates in [low_perf_teammates, high_perf_teammates]:
+        for teammates in high_perf_teammates:
             simulation = OvercookedSimulation(args=args, agent=agent, teammates=teammates, layout_name=args.layout, p_idx=p_idx, horizon=400)
             trajectories = simulation.run_simulation(how_many_times=args.num_eval_for_heatmap_gen)
             tile = get_tile_map(args=args, shape=shape, agent=agent, p_idx=p_idx, trajectories=trajectories, interact_actions_only=False)
-            final_tiles_v += tile['V']
+            final_tiles_v += tile['P']
 
     # final_tiles_v = not_used_function_get_tile_v_using_all_states(args=args, agent=agent, layout=args.layout, shape=shape)
 
diff --git a/scripts/run_overcooked_game.py b/scripts/run_overcooked_game.py
index 7d127af..12825a2 100644
--- a/scripts/run_overcooked_game.py
+++ b/scripts/run_overcooked_game.py
@@ -17,32 +17,29 @@ def get_teammate_from_pop_file(tm_name, tm_score, pop_path, layout_name):
     args = get_arguments()
     args.num_players = 2
 
-    args.layout = 'c1'
+    args.layout = 'c4'
     args.p_idx = 0
     args.layout_names = [args.layout]
     args.n_envs = 1
 
     teammates_path = [
         # 'agent_models/c1_v4/SP_s1010_h256_tr[SP]_ran/best'
-        'agent_models/c1_best_EGO/best_c1/best'
-
+        # 'agent_models/c1_best_EGO/best_c1/best'
         # 'agent_models/c4_v4/SP_s1010_h256_tr[SP]_ran/best'
         # 'agent_models/c4_best_EGO/best_c4/best'
-
-    #     'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', # green
-    #     'agent_models/ALMH_CUR/2/SP_hd64_seed14/best', # orange
-    #     'agent_models/ALMH_CUR/2/SP_hd64_seed14/best',
-    #     'agent_models/ALMH_CUR/2/SP_hd64_seed14/best',
-    #     'agent_models/ALMH_CUR/2/SP_hd64_seed14/best',
+        # 'agent_models/c4_v4/SP_s1010_h256_tr[SP]_ran/best',
+        # 'agent_models/c4_v3/SP_s1010_h256_tr[SP]_ran/best',
+        'agent_models/c4_best_EGO_with_CAP/best_c4_adv/best'
     ]
+
     teammates = [load_agent(Path(tm_path), args) for tm_path in teammates_path[:args.num_players - 1]]
 
     # trajectories = tile locations. Top left of the layout is (0, 0), bottom right is (M, N)
     # teammates = [CustomAgent(args=args, name='human', trajectories={args.layout: [(2, 1), (3, 1)]})]
     # teammates = [DummyAgent(action='random') for _ in range(args.num_players - 1)]
 
-    # player_path = 'agent_models/ALMH_CUR/2/SP_hd64_seed14/best'
-    # player_path = 'agent_models/c4_best_EGO/best_c4/best'
+    # player_path = 'agent_models/c4_best_EGO_with_CAP/best_c4_adv/best'
+    # # player_path = 'agent_models/c4_best_EGO/best_c4/best'
     # player = load_agent(Path(player_path), args)
     player = teammates[0]
     # player = 'human' # blue

From 5a4055e31de2b5b56a7f70a07c8023830bac0782 Mon Sep 17 00:00:00 2001
From: ava <ava.abderezaei@gmail.com>
Date: Fri, 28 Mar 2025 15:00:22 -0600
Subject: [PATCH 26/26] ruff fix

---
 sandbox/visualize_heatmap.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sandbox/visualize_heatmap.py b/sandbox/visualize_heatmap.py
index 886913d..321610b 100644
--- a/sandbox/visualize_heatmap.py
+++ b/sandbox/visualize_heatmap.py
@@ -4,7 +4,7 @@
 import seaborn as sns
 
 from oai_agents.common.heatmap import get_tile_map
-from oai_agents.agents.agent_utils import DummyAgent, load_agent
+from oai_agents.agents.agent_utils import load_agent
 from oai_agents.common.arguments import get_arguments
 from oai_agents.common.overcooked_simulation import OvercookedSimulation