revert maze reward function (#158)

Kallinteris-Andreas · web-flow · commit 5dde2379a0c5 · 2023-06-23T06:55:52.000-04:00
* revert maze reward function

* Update maze_v4.py
diff --git a/gymnasium_robotics/envs/maze/maze.py b/gymnasium_robotics/envs/maze/maze.py
@@ -275,11 +275,11 @@ def add_xy_position_noise(self, xy_pos: np.ndarray) -> np.ndarray:
     def compute_reward(
         self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info
     ) -> float:
-        d = np.linalg.norm(achieved_goal - desired_goal, axis=-1)
+        distance = np.linalg.norm(achieved_goal - desired_goal, axis=-1)
         if self.reward_type == "dense":
-            return np.exp(-d)
+            return np.exp(-distance)
         elif self.reward_type == "sparse":
-            return -(d > 0.45).astype(np.float32)
+            return (distance <= 0.45).astype(np.float64)
 
     def compute_terminated(
         self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info
diff --git a/gymnasium_robotics/envs/maze/maze_v4.py b/gymnasium_robotics/envs/maze/maze_v4.py
@@ -355,11 +355,11 @@ def add_xy_position_noise(self, xy_pos: np.ndarray) -> np.ndarray:
     def compute_reward(
         self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info
     ) -> float:
-        d = np.linalg.norm(achieved_goal - desired_goal, axis=-1)
+        distance = np.linalg.norm(achieved_goal - desired_goal, axis=-1)
         if self.reward_type == "dense":
-            return np.exp(-d)
+            return np.exp(-distance)
         elif self.reward_type == "sparse":
-            return -(d > 0.45).astype(np.float32)
+            return (distance <= 0.45).astype(np.float64)
 
     def compute_terminated(
         self, achieved_goal: np.ndarray, desired_goal: np.ndarray, info