Auto-upgrade syntax via pyupgrade (#158)

vwxyzjn · web-flow · commit b403a4b17eb3 · 2022-04-12T22:01:05.000-04:00
* Auto-upgrade syntax via `pyupgrade`

* Add changes
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,4 +1,10 @@
 repos:
+  - repo: https://github.com/asottile/pyupgrade
+    rev: v2.31.1
+    hooks:
+      - id: pyupgrade
+        args: 
+          - --py37-plus
   - repo: https://github.com/PyCQA/isort
     rev: 5.10.1
     hooks:
diff --git a/cleanrl/c51.py b/cleanrl/c51.py
@@ -89,7 +89,7 @@ def thunk():
 # ALGO LOGIC: initialize agent here:
 class QNetwork(nn.Module):
     def __init__(self, env, n_atoms=101, v_min=-100, v_max=100):
-        super(QNetwork, self).__init__()
+        super().__init__()
         self.env = env
         self.n_atoms = n_atoms
         self.register_buffer("atoms", torch.linspace(v_min, v_max, steps=n_atoms))
diff --git a/cleanrl/c51_atari.py b/cleanrl/c51_atari.py
@@ -105,7 +105,7 @@ def thunk():
 # ALGO LOGIC: initialize agent here:
 class QNetwork(nn.Module):
     def __init__(self, env, n_atoms=101, v_min=-100, v_max=100):
-        super(QNetwork, self).__init__()
+        super().__init__()
         self.env = env
         self.n_atoms = n_atoms
         self.register_buffer("atoms", torch.linspace(v_min, v_max, steps=n_atoms))
diff --git a/cleanrl/ddpg_continuous_action.py b/cleanrl/ddpg_continuous_action.py
@@ -86,7 +86,7 @@ def thunk():
 # ALGO LOGIC: initialize agent here:
 class QNetwork(nn.Module):
     def __init__(self, env):
-        super(QNetwork, self).__init__()
+        super().__init__()
         self.fc1 = nn.Linear(np.array(env.single_observation_space.shape).prod() + np.prod(env.single_action_space.shape), 256)
         self.fc2 = nn.Linear(256, 256)
         self.fc3 = nn.Linear(256, 1)
@@ -101,7 +101,7 @@ def forward(self, x, a):
 
 class Actor(nn.Module):
     def __init__(self, env):
-        super(Actor, self).__init__()
+        super().__init__()
         self.fc1 = nn.Linear(np.array(env.single_observation_space.shape).prod(), 256)
         self.fc2 = nn.Linear(256, 256)
         self.fc_mu = nn.Linear(256, np.prod(env.single_action_space.shape))
diff --git a/cleanrl/dqn.py b/cleanrl/dqn.py
@@ -84,7 +84,7 @@ def thunk():
 # ALGO LOGIC: initialize agent here:
 class QNetwork(nn.Module):
     def __init__(self, env):
-        super(QNetwork, self).__init__()
+        super().__init__()
         self.network = nn.Sequential(
             nn.Linear(np.array(env.single_observation_space.shape).prod(), 64),
             nn.Tanh(),
diff --git a/cleanrl/dqn_atari.py b/cleanrl/dqn_atari.py
@@ -100,7 +100,7 @@ def thunk():
 # ALGO LOGIC: initialize agent here:
 class QNetwork(nn.Module):
     def __init__(self, env):
-        super(QNetwork, self).__init__()
+        super().__init__()
         self.network = nn.Sequential(
             nn.Conv2d(4, 32, 8, stride=4),
             nn.ReLU(),
diff --git a/cleanrl/ppg_procgen.py b/cleanrl/ppg_procgen.py
@@ -142,7 +142,7 @@ def get_output_shape(self):
 
 class Agent(nn.Module):
     def __init__(self, envs):
-        super(Agent, self).__init__()
+        super().__init__()
         h, w, c = envs.single_observation_space.shape
         shape = (c, h, w)
         conv_seqs = []
diff --git a/cleanrl/ppo.py b/cleanrl/ppo.py
@@ -103,7 +103,7 @@ def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
 
 class Agent(nn.Module):
     def __init__(self, envs):
-        super(Agent, self).__init__()
+        super().__init__()
         self.critic = nn.Sequential(
             layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
             nn.Tanh(),
diff --git a/cleanrl/ppo_atari.py b/cleanrl/ppo_atari.py
@@ -117,7 +117,7 @@ def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
 
 class Agent(nn.Module):
     def __init__(self, envs):
-        super(Agent, self).__init__()
+        super().__init__()
         self.network = nn.Sequential(
             layer_init(nn.Conv2d(4, 32, 8, stride=4)),
             nn.ReLU(),
diff --git a/cleanrl/ppo_atari_envpool.py b/cleanrl/ppo_atari_envpool.py
@@ -81,7 +81,7 @@ def parse_args():
 
 class RecordEpisodeStatistics(gym.Wrapper):
     def __init__(self, env, deque_size=100):
-        super(RecordEpisodeStatistics, self).__init__(env)
+        super().__init__(env)
         self.num_envs = getattr(env, "num_envs", 1)
         self.episode_returns = None
         self.episode_lengths = None
@@ -94,7 +94,7 @@ def __init__(self, env, deque_size=100):
             print("env has lives")
 
     def reset(self, **kwargs):
-        observations = super(RecordEpisodeStatistics, self).reset(**kwargs)
+        observations = super().reset(**kwargs)
         self.episode_returns = np.zeros(self.num_envs, dtype=np.float32)
         self.episode_lengths = np.zeros(self.num_envs, dtype=np.int32)
         self.lives = np.zeros(self.num_envs, dtype=np.int32)
@@ -103,7 +103,7 @@ def reset(self, **kwargs):
         return observations
 
     def step(self, action):
-        observations, rewards, dones, infos = super(RecordEpisodeStatistics, self).step(action)
+        observations, rewards, dones, infos = super().step(action)
         self.episode_returns += infos["reward"]
         self.episode_lengths += 1
         self.returned_episode_returns[:] = self.episode_returns
@@ -133,7 +133,7 @@ def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
 
 class Agent(nn.Module):
     def __init__(self, envs):
-        super(Agent, self).__init__()
+        super().__init__()
         self.network = nn.Sequential(
             layer_init(nn.Conv2d(4, 32, 8, stride=4)),
             nn.ReLU(),
diff --git a/cleanrl/ppo_atari_lstm.py b/cleanrl/ppo_atari_lstm.py
@@ -117,7 +117,7 @@ def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
 
 class Agent(nn.Module):
     def __init__(self, envs):
-        super(Agent, self).__init__()
+        super().__init__()
         self.network = nn.Sequential(
             layer_init(nn.Conv2d(1, 32, 8, stride=4)),
             nn.ReLU(),
diff --git a/cleanrl/ppo_continuous_action.py b/cleanrl/ppo_continuous_action.py
@@ -106,7 +106,7 @@ def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
 
 class Agent(nn.Module):
     def __init__(self, envs):
-        super(Agent, self).__init__()
+        super().__init__()
         self.critic = nn.Sequential(
             layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
             nn.Tanh(),
diff --git a/cleanrl/ppo_memory_env_lstm.py b/cleanrl/ppo_memory_env_lstm.py
@@ -79,7 +79,7 @@ def parse_args():
 
 class TestMemoryEnv(gym.Env):
     # fmt: off
-    """
+    r"""
     final state  [0,0,0,1,0]    [0,0,0,0,1]
                        \           /
     second state        [0,0,1,0,0]
@@ -152,7 +152,7 @@ def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
 
 class Agent(nn.Module):
     def __init__(self, envs):
-        super(Agent, self).__init__()
+        super().__init__()
         self.network = nn.Sequential(
             layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 64)),
             nn.Tanh(),
diff --git a/cleanrl/ppo_pettingzoo.py b/cleanrl/ppo_pettingzoo.py
@@ -87,7 +87,7 @@ def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
 
 class Agent(nn.Module):
     def __init__(self, envs):
-        super(Agent, self).__init__()
+        super().__init__()
         self.network = nn.Sequential(
             layer_init(nn.Conv2d(3, 32, 8, stride=4)),
             nn.ReLU(),
diff --git a/cleanrl/ppo_procgen.py b/cleanrl/ppo_procgen.py
@@ -124,7 +124,7 @@ def get_output_shape(self):
 
 class Agent(nn.Module):
     def __init__(self, envs):
-        super(Agent, self).__init__()
+        super().__init__()
         h, w, c = envs.single_observation_space.shape
         shape = (c, h, w)
         conv_seqs = []
diff --git a/cleanrl/rnd_ppo.py b/cleanrl/rnd_ppo.py
@@ -11,7 +11,7 @@
 cv2.ocl.setUseOpenCL(False)
 
 
-class RunningMeanStd(object):
+class RunningMeanStd:
     # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
     def __init__(self, epsilon=1e-4, shape=()):
         self.mean = np.zeros(shape, "float64")
@@ -41,7 +41,7 @@ def update_from_moments(self, batch_mean, batch_var, batch_count):
         self.count = new_count
 
 
-class RewardForwardFilter(object):
+class RewardForwardFilter:
     def __init__(self, gamma):
         self.rewems = None
         self.gamma = gamma
@@ -291,7 +291,7 @@ def observation(self, observation):
         return np.array(observation).astype(np.float32) / 255.0
 
 
-class LazyFrames(object):
+class LazyFrames:
     def __init__(self, frames):
         """This object ensures that common frames between the observations are only stored once.
         It exists purely to optimize memory usage which can be huge for DQN's 1M frames replay
@@ -360,7 +360,7 @@ class ImageToPyTorch(gym.ObservationWrapper):
     """
 
     def __init__(self, env):
-        super(ImageToPyTorch, self).__init__(env)
+        super().__init__(env)
         old_shape = self.observation_space.shape
         self.observation_space = gym.spaces.Box(
             low=0,
@@ -533,7 +533,7 @@ def wrap_pytorch(env):
 
 class VecPyTorch(VecEnvWrapper):
     def __init__(self, venv, device):
-        super(VecPyTorch, self).__init__(venv)
+        super().__init__(venv)
         self.device = device
 
     def reset(self):
@@ -666,7 +666,7 @@ def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
 
 class Agent(nn.Module):
     def __init__(self, envs, frames=4):
-        super(Agent, self).__init__()
+        super().__init__()
         self.network = nn.Sequential(
             Scale(1 / 255),
             layer_init(nn.Conv2d(frames, 32, 8, stride=4)),
@@ -710,7 +710,7 @@ def forward(self, input):
 
 class RNDModel(nn.Module):
     def __init__(self, input_size, output_size):
-        super(RNDModel, self).__init__()
+        super().__init__()
 
         self.input_size = input_size
         self.output_size = output_size
@@ -911,7 +911,7 @@ def forward(self, next_obs):
     # flatten the batch
     b_obs = obs.reshape((-1,) + envs.observation_space.shape)
     b_logprobs = logprobs.reshape(-1)
-    b_actions = actions.reshape((-1))
+    b_actions = actions.reshape(-1)
     b_ext_advantages = ext_advantages.reshape(-1)
     b_int_advantages = int_advantages.reshape(-1)
     b_ext_returns = ext_returns.reshape(-1)
diff --git a/cleanrl/sac_continuous_action.py b/cleanrl/sac_continuous_action.py
@@ -91,7 +91,7 @@ def thunk():
 # ALGO LOGIC: initialize agent here:
 class SoftQNetwork(nn.Module):
     def __init__(self, env):
-        super(SoftQNetwork, self).__init__()
+        super().__init__()
         self.fc1 = nn.Linear(np.array(env.single_observation_space.shape).prod() + np.prod(env.single_action_space.shape), 256)
         self.fc2 = nn.Linear(256, 256)
         self.fc3 = nn.Linear(256, 1)
@@ -110,7 +110,7 @@ def forward(self, x, a):
 
 class Actor(nn.Module):
     def __init__(self, env):
-        super(Actor, self).__init__()
+        super().__init__()
         self.fc1 = nn.Linear(np.array(env.single_observation_space.shape).prod(), 256)
         self.fc2 = nn.Linear(256, 256)
         self.fc_mean = nn.Linear(256, np.prod(env.single_action_space.shape))
@@ -146,7 +146,7 @@ def get_action(self, x):
     def to(self, device):
         self.action_scale = self.action_scale.to(device)
         self.action_bias = self.action_bias.to(device)
-        return super(Actor, self).to(device)
+        return super().to(device)
 
 
 if __name__ == "__main__":
diff --git a/cleanrl/td3_continuous_action.py b/cleanrl/td3_continuous_action.py
@@ -85,7 +85,7 @@ def thunk():
 # ALGO LOGIC: initialize agent here:
 class QNetwork(nn.Module):
     def __init__(self, env):
-        super(QNetwork, self).__init__()
+        super().__init__()
         self.fc1 = nn.Linear(np.array(env.single_observation_space.shape).prod() + np.prod(env.single_action_space.shape), 256)
         self.fc2 = nn.Linear(256, 256)
         self.fc3 = nn.Linear(256, 1)
@@ -100,7 +100,7 @@ def forward(self, x, a):
 
 class Actor(nn.Module):
     def __init__(self, env):
-        super(Actor, self).__init__()
+        super().__init__()
         self.fc1 = nn.Linear(np.array(env.single_observation_space.shape).prod(), 256)
         self.fc2 = nn.Linear(256, 256)
         self.fc_mu = nn.Linear(256, np.prod(env.single_action_space.shape))
diff --git a/cleanrl_utils/buffers.py b/cleanrl_utils/buffers.py
@@ -15,7 +15,7 @@ def unique(sorted_array):
     return sorted_array[uniques]
 
 
-class SegmentTree(object):
+class SegmentTree:
     def __init__(self, capacity, operation, neutral_element):
         """
         Build a Segment Tree data structure.
@@ -97,7 +97,7 @@ def __getitem__(self, idx):
 
 class SumSegmentTree(SegmentTree):
     def __init__(self, capacity):
-        super(SumSegmentTree, self).__init__(capacity=capacity, operation=np.add, neutral_element=0.0)
+        super().__init__(capacity=capacity, operation=np.add, neutral_element=0.0)
         self._value = np.array(self._value)
 
     def sum(self, start=0, end=None):
@@ -108,7 +108,7 @@ def sum(self, start=0, end=None):
         :param end: (int) end position of the reduction (must be < len(arr), can be None for len(arr) - 1)
         :return: (Any) reduction of SumSegmentTree
         """
-        return super(SumSegmentTree, self).reduce(start, end)
+        return super().reduce(start, end)
 
     def find_prefixsum_idx(self, prefixsum):
         """
@@ -146,7 +146,7 @@ def find_prefixsum_idx(self, prefixsum):
 
 class MinSegmentTree(SegmentTree):
     def __init__(self, capacity):
-        super(MinSegmentTree, self).__init__(capacity=capacity, operation=np.minimum, neutral_element=float("inf"))
+        super().__init__(capacity=capacity, operation=np.minimum, neutral_element=float("inf"))
         self._value = np.array(self._value)
 
     def min(self, start=0, end=None):
@@ -157,7 +157,7 @@ def min(self, start=0, end=None):
         :param end: (int) end position of the reduction (must be < len(arr), can be None for len(arr) - 1)
         :return: (Any) reduction of MinSegmentTree
         """
-        return super(MinSegmentTree, self).reduce(start, end)
+        return super().reduce(start, end)
 
 
 import warnings
@@ -212,7 +212,7 @@ def __init__(
         device: Union[th.device, str] = "cpu",
         n_envs: int = 1,
     ):
-        super(BaseBuffer, self).__init__()
+        super().__init__()
         self.buffer_size = buffer_size
         self.observation_space = observation_space
         self.action_space = action_space
@@ -343,7 +343,7 @@ def __init__(
         n_envs: int = 1,
         optimize_memory_usage: bool = False,
     ):
-        super(ReplayBuffer, self).__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)
+        super().__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)
 
         assert n_envs == 1, "Replay buffer only support single environment for now"
 
@@ -465,7 +465,7 @@ def __init__(
         n_envs: int = 1,
     ):
 
-        super(RolloutBuffer, self).__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)
+        super().__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)
         self.gae_lambda = gae_lambda
         self.gamma = gamma
         self.observations, self.actions, self.rewards, self.advantages = None, None, None, None
@@ -483,7 +483,7 @@ def reset(self) -> None:
         self.log_probs = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
         self.advantages = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
         self.generator_ready = False
-        super(RolloutBuffer, self).reset()
+        super().reset()
 
     def compute_returns_and_advantage(self, last_values: th.Tensor, dones: np.ndarray) -> None:
         """
@@ -605,7 +605,7 @@ def __init__(
         device: Union[th.device, str] = "cpu",
         n_envs: int = 1,
     ):
-        super(PrioritizedReplayBuffer, self).__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)
+        super().__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)
 
         assert n_envs == 1, "Replay buffer only support single environment for now"
         assert alpha >= 0
diff --git a/cleanrl_utils/docker_queue.py b/cleanrl_utils/docker_queue.py
@@ -33,7 +33,7 @@
 
 # c = client.containers.run("ubuntu:latest", "echo hello world", detach=True)
 
-with open(args.exp_script, "r") as f:
+with open(args.exp_script) as f:
     lines = f.readlines()
 
 tasks = []
diff --git a/cleanrl_utils/paper_plot.py b/cleanrl_utils/paper_plot.py
diff --git a/cleanrl_utils/plot.py b/cleanrl_utils/plot.py
diff --git a/cleanrl_utils/plot_individual.py b/cleanrl_utils/plot_individual.py