From c3480c28585cfc83318b466431dfdbd444a4beb4 Mon Sep 17 00:00:00 2001
From: HosseinShojaei <ho_sho1377@yahoo.com>
Date: Sat, 2 Aug 2025 17:11:31 +0330
Subject: [PATCH] Refactor: Use algorithm's device for actions to ensure
 consistency

---
 rsl_rl/runners/on_policy_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rsl_rl/runners/on_policy_runner.py b/rsl_rl/runners/on_policy_runner.py
index 36f11f37..37f022e4 100644
--- a/rsl_rl/runners/on_policy_runner.py
+++ b/rsl_rl/runners/on_policy_runner.py
@@ -102,7 +102,7 @@ def learn(self, num_learning_iterations: int, init_at_random_ep_len: bool = Fals
                     # Sample actions
                     actions = self.alg.act(obs)
                     # Step the environment
-                    obs, rewards, dones, extras = self.env.step(actions.to(self.env.device))
+                    obs, rewards, dones, extras = self.env.step(actions.to(self.device))
                     # Move to device
                     obs, rewards, dones = (obs.to(self.device), rewards.to(self.device), dones.to(self.device))
                     # process the step