diff --git a/rsl_rl/runners/on_policy_runner.py b/rsl_rl/runners/on_policy_runner.py index 36f11f37..37f022e4 100644 --- a/rsl_rl/runners/on_policy_runner.py +++ b/rsl_rl/runners/on_policy_runner.py @@ -102,7 +102,7 @@ def learn(self, num_learning_iterations: int, init_at_random_ep_len: bool = Fals # Sample actions actions = self.alg.act(obs) # Step the environment - obs, rewards, dones, extras = self.env.step(actions.to(self.env.device)) + obs, rewards, dones, extras = self.env.step(actions.to(self.device)) # Move to device obs, rewards, dones = (obs.to(self.device), rewards.to(self.device), dones.to(self.device)) # process the step