Update v5 doc

Kallinteris-Andreas · web-flow · commit d28a71979feb · 2024-10-01T21:38:39.000+03:00
diff --git a/gymnasium_robotics/envs/maze/ant_maze_v5.py b/gymnasium_robotics/envs/maze/ant_maze_v5.py
@@ -1,4 +1,4 @@
-"""A maze environment with the Gymnasium Ant agent (https://github.com/Farama-Foundation/Gymnasium/blob/main/gymnasium/envs/mujoco/ant_v4.py).
+"""A maze environment with the Gymnasium Ant agent (https://github.com/Farama-Foundation/Gymnasium/blob/main/gymnasium/envs/mujoco/ant_v5.py).
 
 The code is inspired by the D4RL repository hosted on GitHub (https://github.com/Farama-Foundation/D4RL), published in the paper
 'D4RL: Datasets for Deep Data-Driven Reinforcement Learning' by Justin Fu, Aviral Kumar, Ofir Nachum, George Tucker, Sergey Levine.
@@ -40,22 +40,22 @@ class AntMazeEnv(MazeEnv, EzPickle):
     #### Maze size
     The map variations for the mazes are the same as for `PointMaze`. The ant environments with fixed goal and reset locations are the following:
 
-    * `AntMaze_UMaze-v4`
-    * `AntMaze_BigMaze-v4`
-    * `AntMaze_HardestMaze-v4`
+    * `AntMaze_UMaze-v5`
+    * `AntMaze_BigMaze-v5`
+    * `AntMaze_HardestMaze-v5`
 
     #### Diverse goal mazes
     The environments with fixed reset position for the ant and randomly selected goals, also known as diverse goal, are:
 
-    * `AntMaze_BigMaze_DG-v4`
-    * `AntMaze_HardestMaze_DG-v4`
+    * `AntMaze_BigMaze_DG-v5`
+    * `AntMaze_HardestMaze_DG-v5`
 
     #### Diverse goal and reset mazes
 
     Finally, the environments that select the reset and goal locations randomly are:
 
-    * `AntMaze_BigMaze_DGR-v4`
-    * `AntMaze_HardestMaze_DGR-v4`
+    * `AntMaze_BigMaze_DGR-v5`
+    * `AntMaze_HardestMaze_DGR-v5`
 
     #### Custom maze
     Also, any of the `AntMaze` environments can be initialized with a custom maze map by setting the `maze_map` argument like follows:
@@ -70,7 +70,7 @@ class AntMazeEnv(MazeEnv, EzPickle):
            [1, C, 0, C, 1],
            [1, 1, 1, 1, 1]]
 
-    env = gym.make('AntMaze_UMaze-v4', maze_map=example_map)
+    env = gym.make('AntMaze_UMaze-v5', maze_map=example_map)
     ```
 
     ### Action Space
@@ -153,8 +153,8 @@ class AntMazeEnv(MazeEnv, EzPickle):
     - *sparse*: the returned reward can have two values: `0` if the ant hasn't reached its final target position, and `1` if the ant is in the final target position (the ant is considered to have reached the goal if the Euclidean distance between both is lower than 0.5 m).
     - *dense*: the returned reward is the negative Euclidean distance between the achieved goal position and the desired goal.
 
-    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `AntMaze_UMaze-v4`. However, for `dense`
-    reward the id must be modified to `AntMaze_UMazeDense-v4` and initialized as follows:
+    To initialize this environment with one of the mentioned reward functions the type of reward must be specified in the id string when the environment is initialized. For `sparse` reward the id is the default of the environment, `AntMaze_UMaze-v5`. However, for `dense`
+    reward the id must be modified to `AntMaze_UMazeDense-v5` and initialized as follows:
 
     ```python
     import gymnasium as gym
@@ -197,7 +197,7 @@ class AntMazeEnv(MazeEnv, EzPickle):
 
     gym.register_envs(gymnasium_robotics)
 
-    env = gym.make('AntMaze_UMaze-v4', max_episode_steps=100)
+    env = gym.make('AntMaze_UMaze-v5', max_episode_steps=100)
     ```
 
     ### Version History