|
49 | 49 | "source": [ |
50 | 50 | "# @title Install necessary packages.\n", |
51 | 51 | "!pip install -U dopamine-rl\n", |
52 | | - "!pip install pandas==0.24.2 # Needed to be able to load the pickle files.\n", |
53 | | - "!pip install --upgrade gym\n", |
54 | | - "!pip install gym[atari,accept-rom-license]" |
| 52 | + "!pip install -U gymnasium\n", |
| 53 | + "!pip install -U baselines\n", |
| 54 | + "!pip install -U ale_py\n" |
55 | 55 | ] |
56 | 56 | }, |
57 | 57 | { |
|
77 | 77 | "GAME = 'Asterix' # @param" |
78 | 78 | ] |
79 | 79 | }, |
80 | | - { |
81 | | - "cell_type": "code", |
82 | | - "execution_count": null, |
83 | | - "metadata": { |
84 | | - "cellView": "form", |
85 | | - "id": "EFY3tTITHugq" |
86 | | - }, |
87 | | - "outputs": [], |
88 | | - "source": [ |
89 | | - "# @title Load baseline data\n", |
90 | | - "!gsutil -q -m cp -R gs://download-dopamine-rl/preprocessed-benchmarks/* /content/\n", |
91 | | - "experimental_data = colab_utils.load_baselines('/content')" |
92 | | - ] |
93 | | - }, |
94 | 80 | { |
95 | 81 | "cell_type": "markdown", |
96 | 82 | "metadata": { |
|
140 | 126 | "run_experiment.Runner.num_iterations = 200\n", |
141 | 127 | "run_experiment.Runner.training_steps = 10\n", |
142 | 128 | "run_experiment.Runner.max_steps_per_episode = 100\n", |
| 129 | + "ReplayBuffer.max_capacity = 1_000\n", |
| 130 | + "ReplayBuffer.batch_size = 32\n", |
143 | 131 | "\"\"\".format(GAME)\n", |
144 | 132 | "gin.parse_config(random_dqn_config, skip_unknown=False)\n", |
145 | 133 | "\n", |
|
178 | 166 | " LOG_PATH, verbose=True, summary_keys=['train_episode_returns'])\n", |
179 | 167 | "random_dqn_data['agent'] = 'MyRandomDQN'\n", |
180 | 168 | "random_dqn_data['run_number'] = 1\n", |
181 | | - "experimental_data[GAME] = experimental_data[GAME].merge(random_dqn_data,\n", |
182 | | - " how='outer')" |
| 169 | + "experimental_data = {GAME: random_dqn_data}\n" |
183 | 170 | ] |
184 | 171 | }, |
185 | 172 | { |
|
234 | 221 | " actions with probability switch_prob.\"\"\"\n", |
235 | 222 | " def __init__(self, sess, num_actions, switch_prob=0.1):\n", |
236 | 223 | " self._sess = sess\n", |
| 224 | + " self.summary_writer = None\n", |
237 | 225 | " self._num_actions = num_actions\n", |
238 | 226 | " self._switch_prob = switch_prob\n", |
239 | 227 | " self._last_action = np.random.randint(num_actions)\n", |
|
273 | 261 | "run_experiment.Runner.num_iterations = 200\n", |
274 | 262 | "run_experiment.Runner.training_steps = 10\n", |
275 | 263 | "run_experiment.Runner.max_steps_per_episode = 100\n", |
| 264 | + "ReplayBuffer.max_capacity = 1_000\n", |
| 265 | + "ReplayBuffer.batch_size = 32\n", |
276 | 266 | "\"\"\".format(GAME)\n", |
277 | 267 | "gin.parse_config(sticky_config, skip_unknown=False)\n", |
278 | 268 | "\n", |
|
0 commit comments