From c3480c28585cfc83318b466431dfdbd444a4beb4 Mon Sep 17 00:00:00 2001 From: HosseinShojaei Date: Sat, 2 Aug 2025 17:11:31 +0330 Subject: [PATCH] Refactor: Use algorithm's device for actions to ensure consistency --- rsl_rl/runners/on_policy_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rsl_rl/runners/on_policy_runner.py b/rsl_rl/runners/on_policy_runner.py index 36f11f37..37f022e4 100644 --- a/rsl_rl/runners/on_policy_runner.py +++ b/rsl_rl/runners/on_policy_runner.py @@ -102,7 +102,7 @@ def learn(self, num_learning_iterations: int, init_at_random_ep_len: bool = Fals # Sample actions actions = self.alg.act(obs) # Step the environment - obs, rewards, dones, extras = self.env.step(actions.to(self.env.device)) + obs, rewards, dones, extras = self.env.step(actions.to(self.device)) # Move to device obs, rewards, dones = (obs.to(self.device), rewards.to(self.device), dones.to(self.device)) # process the step