Skip to content

Commit 8ab0868

Browse files
authored
[RLlib] MetricsLogger: Fix get/set_state to handle tensors in self.values. (#53514)
1 parent eed7e02 commit 8ab0868

File tree

5 files changed

+92
-73
lines changed

5 files changed

+92
-73
lines changed

rllib/BUILD

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4209,7 +4209,7 @@ py_test(
42094209
"--num-agents=2",
42104210
"--as-test",
42114211
"--evaluation-parallel-to-training",
4212-
"--stop-reward=900.0",
4212+
"--stop-reward=800.0",
42134213
"--num-cpus=6",
42144214
"--evaluation-duration=auto",
42154215
"--evaluation-duration-unit=episodes",

rllib/algorithms/algorithm_config.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4349,10 +4349,10 @@ def get_default_learner_class(self) -> Union[Type["Learner"], str]:
43494349
def get_rl_module_spec(
43504350
self,
43514351
env: Optional[EnvType] = None,
4352-
spaces: Optional[Dict[str, gym.Space]] = None,
4352+
spaces: Optional[Dict[str, Tuple[gym.Space, gym.Space]]] = None,
43534353
inference_only: Optional[bool] = None,
43544354
) -> RLModuleSpec:
4355-
"""Returns the RLModuleSpec based on the given env/spaces.
4355+
"""Returns the RLModuleSpec based on the given env/spaces and this config.
43564356
43574357
Args:
43584358
env: An optional environment instance, from which to infer the observation-
@@ -4363,10 +4363,10 @@ def get_rl_module_spec(
43634363
spaces: Optional dict mapping ModuleIDs to 2-tuples of observation- and
43644364
action space that should be used for the respective RLModule.
43654365
These spaces are usually provided by an already instantiated remote
4366-
EnvRunner (call `EnvRunner.get_spaces()`). If not provided, tries
4367-
to infer from `env`, otherwise from `self.observation_space` and
4368-
`self.action_space`. Raises an error, if no information on spaces can be
4369-
inferred.
4366+
EnvRunner (call `EnvRunner.get_spaces()` to receive this dict). If not
4367+
provided, RLlib tries to infer this from `env`, if provided, otherwise
4368+
from `self.observation_space` and `self.action_space`. Raises an error,
4369+
if no information on spaces can be inferred.
43704370
inference_only: If `True`, the returned module spec is used in an
43714371
inference-only setting (sampling) and the RLModule can thus be built in
43724372
its light version (if available). For example, the `inference_only`

rllib/algorithms/dqn/torch/dqn_torch_learner.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,13 +247,13 @@ def possibly_masked_max(data_):
247247
key=module_id,
248248
window=1, # <- single items (should not be mean/ema-reduced over time).
249249
)
250-
# If we learn a Q-value distribution store the support and average
250+
# If we learn a Q-value distribution log the support and average
251251
# probabilities.
252252
if config.num_atoms > 1:
253253
# Log important loss stats.
254254
self.metrics.log_dict(
255255
{
256-
ATOMS: z,
256+
ATOMS: torch.mean(z),
257257
# The absolute difference in expectation between the actions
258258
# should (at least mildly) rise.
259259
"expectations_abs_diff": torch.mean(

rllib/examples/evaluation/evaluation_parallel_to_training.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,10 @@
8888
from ray.rllib.utils.typing import ResultDict
8989
from ray.tune.registry import get_trainable_cls, register_env
9090

91-
parser = add_rllib_example_script_args(default_reward=500.0)
91+
parser = add_rllib_example_script_args(
92+
default_timesteps=200000,
93+
default_reward=500.0,
94+
)
9295
parser.set_defaults(
9396
evaluation_num_env_runners=2,
9497
evaluation_interval=1,

0 commit comments

Comments
 (0)