Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
f127aa3
prototype jax with ddpg
vwxyzjn May 29, 2022
cbc5d88
Quick fix
vwxyzjn Jun 22, 2022
b4662c2
quick fix
vwxyzjn Jun 22, 2022
754a0b1
Commit changes - successful prototype
vwxyzjn Jun 24, 2022
223a8ff
Remove scripts
vwxyzjn Jun 25, 2022
85fbfe2
Simplify the implementation: careful with shape
vwxyzjn Jun 25, 2022
8ffbd26
Format
vwxyzjn Jun 25, 2022
c72cfb7
Remove code
vwxyzjn Jun 25, 2022
bfece78
formatting changes
vwxyzjn Jun 25, 2022
0710728
formatting change
vwxyzjn Jun 25, 2022
92d9d13
bug fix
vwxyzjn Jun 25, 2022
ee80f6b
correctly implementing keys
vwxyzjn Jun 26, 2022
0b30c57
these two lines are not necessary
vwxyzjn Jun 28, 2022
8e9f991
Adapting to the `TrainState` API
vwxyzjn Jun 28, 2022
38ca055
Simplify code
vwxyzjn Jun 28, 2022
3a58fcf
use `optax.incremental_update`
vwxyzjn Jun 29, 2022
207d09f
Also log q values
vwxyzjn Jun 29, 2022
6f4fa3d
Addresses #211
vwxyzjn Jun 29, 2022
52243ec
Merge branch 'master' into jax-ddpg
vwxyzjn Jun 29, 2022
9ec4ac5
update docs
vwxyzjn Jun 29, 2022
acb3293
Add jax benchmark experiments
vwxyzjn Jun 29, 2022
0e9d8f4
remove old files
vwxyzjn Jun 29, 2022
8226824
update benchmark scripts
vwxyzjn Jun 29, 2022
57230c3
update lock files
vwxyzjn Jun 29, 2022
29a0aef
Handle action space bounds
vwxyzjn Jun 30, 2022
5f0ed84
Merge branch 'master' into jax-ddpg
vwxyzjn Jun 30, 2022
024b8c5
Add docs
vwxyzjn Jun 30, 2022
34c2825
Typo
vwxyzjn Jun 30, 2022
e12c283
update CI
vwxyzjn Jun 30, 2022
7b5febd
bug fix and add docs link
vwxyzjn Jul 12, 2022
eb85ae6
Add a note explaining the speed
vwxyzjn Jul 12, 2022
003a770
Update ddpg docs
vwxyzjn Jul 12, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ jobs:
run: poetry install -E pybullet
- name: Install mujoco dependencies
run: poetry install -E mujoco
- name: Install jax dependencies
run: poetry install -E jax
- name: Downgrade setuptools
run: poetry run pip install setuptools==59.5.0
- name: install mujoco dependencies
Expand Down
4 changes: 2 additions & 2 deletions benchmark/c51.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
poetry install
OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \
OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
--env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
--command "poetry run python cleanrl/c51.py --cuda False --track --capture-video" \
--num-seeds 3 \
--workers 9

poetry install -E atari
OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \
OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
--env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
--command "poetry run python cleanrl/c51_atari.py --track --capture-video" \
--num-seeds 3 \
Expand Down
11 changes: 10 additions & 1 deletion benchmark/ddpg.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,13 @@ OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \
--env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 InvertedPendulum-v2 Humanoid-v2 Pusher-v2 \
--command "poetry run python cleanrl/ddpg_continuous_action.py --track --capture-video" \
--num-seeds 3 \
--workers 3
--workers 3

poetry install -E "mujoco jax"
poetry run pip install --upgrade "jax[cuda]==0.3.14" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html
poetry run python -c "import mujoco_py"
OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
--env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 \
--command "poetry run python cleanrl/ddpg_continuous_action_jax.py --track --capture-video" \
--num-seeds 3 \
--workers 1
4 changes: 2 additions & 2 deletions benchmark/dqn.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
poetry install
OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \
OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
--env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
--command "poetry run python cleanrl/dqn.py --cuda False --track --capture-video" \
--num-seeds 3 \
--workers 9

poetry install -E atari
OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \
OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
--env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
--command "poetry run python cleanrl/dqn_atari.py --track --capture-video" \
--num-seeds 3 \
Expand Down
2 changes: 1 addition & 1 deletion benchmark/ppg.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# export WANDB_ENTITY=openrlbenchmark

poetry install -E procgen
xvfb-run -a python -m cleanrl_utils.benchmark \
xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
--env-ids starpilot bossfight bigfish \
--command "poetry run python cleanrl/ppg_procgen.py --track --capture-video" \
--num-seeds 3 \
Expand Down
18 changes: 9 additions & 9 deletions benchmark/ppo.sh
Original file line number Diff line number Diff line change
@@ -1,58 +1,58 @@
# export WANDB_ENTITY=openrlbenchmark

poetry install
OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \
OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
--env-ids CartPole-v1 Acrobot-v1 MountainCar-v0 \
--command "poetry run python cleanrl/ppo.py --cuda False --track --capture-video" \
--num-seeds 3 \
--workers 9

poetry install -E atari
OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \
OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
--env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
--command "poetry run python cleanrl/ppo_atari.py --track --capture-video" \
--num-seeds 3 \
--workers 3

poetry install -E atari
OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \
OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
--env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
--command "poetry run python cleanrl/ppo_atari_lstm.py --track --capture-video" \
--num-seeds 3 \
--workers 3

poetry install -E envpool
xvfb-run -a python -m cleanrl_utils.benchmark \
xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
--env-ids Pong-v5 BeamRider-v5 Breakout-v5 \
--command "poetry run python cleanrl/ppo_atari_envpool.py --track --capture-video" \
--num-seeds 3 \
--workers 1

poetry install -E "mujoco pybullet"
python -c "import mujoco_py"
OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \
poetry run python -c "import mujoco_py"
OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
--env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 \
--command "poetry run python cleanrl/ppo_continuous_action.py --cuda False --track --capture-video" \
--num-seeds 3 \
--workers 9

poetry install -E procgen
xvfb-run -a python -m cleanrl_utils.benchmark \
xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
--env-ids starpilot bossfight bigfish \
--command "poetry run python cleanrl/ppo_procgen.py --track --capture-video" \
--num-seeds 3 \
--workers 1

poetry install -E atari
xvfb-run -a python -m cleanrl_utils.benchmark \
xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
--env-ids PongNoFrameskip-v4 BeamRiderNoFrameskip-v4 BreakoutNoFrameskip-v4 \
--command "poetry run torchrun --standalone --nnodes=1 --nproc_per_node=2 cleanrl/ppo_atari_multigpu.py --track --capture-video" \
--num-seeds 3 \
--workers 1

poetry install -E "pettingzoo atari"
poetry run AutoROM --accept-license
xvfb-run -a python -m cleanrl_utils.benchmark \
xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
--env-ids pong_v3 surround_v2 tennis_v3 \
--command "poetry run python cleanrl/ppo_pettingzoo_ma_atari.py --track --capture-video" \
--num-seeds 3 \
Expand Down
4 changes: 2 additions & 2 deletions benchmark/sac.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
poetry install -E "mujoco pybullet"
python -c "import mujoco_py"
OMP_NUM_THREADS=1 xvfb-run -a python -m cleanrl_utils.benchmark \
poetry run python -c "import mujoco_py"
OMP_NUM_THREADS=1 xvfb-run -a poetry run python -m cleanrl_utils.benchmark \
--env-ids HalfCheetah-v2 Walker2d-v2 Hopper-v2 \
--command "poetry run python cleanrl/sac_continuous_action.py --track --capture-video" \
--num-seeds 3 \
Expand Down
Loading