ray-project
diff --git a/‎doc/source/rllib/metrics-logger.rst
Lines changed: 39 additions & 10 deletions b/‎doc/source/rllib/metrics-logger.rst
Lines changed: 39 additions & 10 deletions
diff --git a/‎rllib/BUILD
Lines changed: 21 additions & 0 deletions b/‎rllib/BUILD
Lines changed: 21 additions & 0 deletions
diff --git a/‎rllib/algorithms/algorithm.py
Lines changed: 13 additions & 31 deletions b/‎rllib/algorithms/algorithm.py
Lines changed: 13 additions & 31 deletions
diff --git a/‎rllib/algorithms/impala/impala.py
Lines changed: 1 addition & 1 deletion b/‎rllib/algorithms/impala/impala.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎rllib/algorithms/sac/torch/sac_torch_learner.py
Lines changed: 2 additions & 2 deletions b/‎rllib/algorithms/sac/torch/sac_torch_learner.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎rllib/core/learner/learner.py
Lines changed: 3 additions & 0 deletions b/‎rllib/core/learner/learner.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎rllib/examples/evaluation/custom_evaluation.py
Lines changed: 1 addition & 3 deletions b/‎rllib/examples/evaluation/custom_evaluation.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎rllib/examples/learners/classes/vpg_torch_learner.py
Lines changed: 1 addition & 2 deletions b/‎rllib/examples/learners/classes/vpg_torch_learner.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎rllib/tuned_examples/impala/heavy_cartpole_impala.py
Lines changed: 1 addition & 1 deletion b/‎rllib/tuned_examples/impala/heavy_cartpole_impala.py
Lines changed: 1 addition & 1 deletion
@@ -181,15 +181,19 @@ to :py:class:`~ray.rllib.algorithms.algorithm.Algorithm`:
 .. testcode::
 
     logger.log_value("some_items", value="a", reduce=None, clear_on_reduce=True)
-    logger.log_value("some_items", value="b")
-    logger.log_value("some_items", value="c")
-    logger.log_value("some_items", value="d")
+    logger.log_value("some_items", value="b", reduce=None, clear_on_reduce=True)
+    logger.log_value("some_items", value="c", reduce=None, clear_on_reduce=True)
+    logger.log_value("some_items", value="d", reduce=None, clear_on_reduce=True)
 
     logger.peek("some_items")  # expect a list: ["a", "b", "c", "d"]
 
     logger.reduce()
     logger.peek("some_items")  # expect an empty list: []
 
+You should pass additional arguments like ``reduce=None`` and ``clear_on_reduce=True`` to the
+:py:meth:`~ray.rllib.utils.metrics.metrics_logger.MetricsLogger.log_value` method on each call.
+Otherwise, MetricsLogger will emit warnings to ensure that it's behaviour is always as expected.
+
 
 Logging a set of nested scalar values
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -238,9 +242,9 @@ log three consecutive image frames from a ``CartPole`` environment, do the follo
     env.reset()
     logger.log_value("some_images", value=env.render(), reduce=None, clear_on_reduce=True)
     env.step(0)
-    logger.log_value("some_images", value=env.render())
+    logger.log_value("some_images", value=env.render(), reduce=None, clear_on_reduce=True)
     env.step(1)
-    logger.log_value("some_images", value=env.render())
+    logger.log_value("some_images", value=env.render(), reduce=None, clear_on_reduce=True)
 
 Timers
 ~~~~~~
@@ -296,7 +300,7 @@ Set ``clear_on_reduce=False``, which is the default, if you want the count to ac
     logger = MetricsLogger()
 
     logger.log_value("my_counter", 50, reduce="sum", window=None)
-    logger.log_value("my_counter", 25)
+    logger.log_value("my_counter", 25, reduce="sum", window=None)
     logger.peek("my_counter")  # expect: 75
 
     # Even if your logger gets "reduced" from time to time, the counter keeps increasing
@@ -306,7 +310,7 @@ Set ``clear_on_reduce=False``, which is the default, if you want the count to ac
 
     # To clear the sum after each "reduce" event, set `clear_on_reduce=True`:
     logger.log_value("my_temp_counter", 50, reduce="sum", window=None, clear_on_reduce=True)
-    logger.log_value("my_temp_counter", 25)
+    logger.log_value("my_temp_counter", 25, reduce="sum", window=None, clear_on_reduce=True)
     logger.peek("my_counter")  # expect: 75
     logger.reduce()
     logger.peek("my_counter")  # expect: 0 (upon reduction, all values are cleared)
@@ -323,8 +327,7 @@ on each ``reduce()`` operation.
 The :py:class:`~ray.rllib.algorithms.algorithm.Algorithm` automatically compiles an extra key for each such metric, adding the suffix ``_throughput``
 to the original key and assigning it the value for the throughput per second.
 
-You can use the :py:meth:`~ray.rllib.utils.metrics.metrics_logger.MetricsLogger.peek` method with the call argument ``throughput=True``
-to access the throughput value. For example:
+You can use the :py:meth:`~ray.rllib.utils.metrics.metrics_logger.MetricsLogger.peek` method to access the throughput value by passing the ``throughput=True`` flag.
 
 .. testcode::
 
@@ -337,13 +340,39 @@ to access the throughput value. For example:
         logger.log_value("lifetime_count", 5, reduce="sum", with_throughput=True)
 
         # RLlib triggers a new throughput computation at each `reduce()` call
-        logger.reduce()
         time.sleep(1.0)
 
         # Expect the first call to return NaN because we don't have a proper start time for the time delta.
         # From the second call on, expect a value of roughly 5/sec.
         print(logger.peek("lifetime_count", throughput=True))
 
+        logger.log_value("lifetime_count", 5, reduce="sum", with_throughput=True)
+        # Expect the throughput to be roughly 10/sec now.
+        print(logger.peek("lifetime_count", throughput=True))
+
+        # You can also get a dict of all throughputs at once:
+        print(logger.peek(throughput=True))
+
+
+Measuring throughputs with MetricsLogger.log_time()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You can also use the :py:meth:`~ray.rllib.utils.metrics.metrics_logger.MetricsLogger.log_time` method to measure throughputs.
+
+.. testcode::
+
+    import time
+    from ray.rllib.utils.metrics.metrics_logger import MetricsLogger
+
+    logger = MetricsLogger()
+
+    for _ in range(3):
+        with logger.log_time("my_block_to_be_timed", with_throughput=True):
+            time.sleep(1.0)
+
+    # Expect the throughput to be roughly 1.0/sec.
+    print(logger.peek("my_block_to_be_timed", throughput=True))
+
 
 Example 1: How to use MetricsLogger in EnvRunner callbacks
 ----------------------------------------------------------
 
@@ -2896,6 +2896,27 @@ py_test(
     ],
 )
 
+# Test metrics (metrics logger, stats)
+py_test(
+    name = "test_metrics_logger",
+    size = "small",
+    srcs = ["utils/metrics/tests/test_metrics_logger.py"],
+    tags = [
+        "team:rllib",
+        "utils",
+    ],
+)
+
+py_test(
+    name = "test_stats",
+    size = "small",
+    srcs = ["utils/metrics/tests/test_stats.py"],
+    tags = [
+        "team:rllib",
+        "utils",
+    ],
+)
+
 # @OldAPIStack
 py_test(
     name = "test_value_predictions",
 
@@ -159,7 +159,6 @@
 )
 from ray.rllib.utils.metrics.learner_info import LEARNER_INFO
 from ray.rllib.utils.metrics.metrics_logger import MetricsLogger
-from ray.rllib.utils.metrics.stats import Stats
 from ray.rllib.utils.replay_buffers import MultiAgentReplayBuffer, ReplayBuffer
 from ray.rllib.utils.runners.runner_group import RunnerGroup
 from ray.rllib.utils.serialization import deserialize_type, NOT_SERIALIZABLE
@@ -484,7 +483,7 @@ def __init__(
         # The Algorithm's `MetricsLogger` object to collect stats from all its
         # components (including timers, counters and other stats in its own
         # `training_step()` and other methods) as well as custom callbacks.
-        self.metrics = MetricsLogger()
+        self.metrics = MetricsLogger(root=True)
 
         # Create a default logger creator if no logger_creator is specified
         if logger_creator is None:
@@ -1139,9 +1138,8 @@ def evaluate_offline(self):
         # Evaluate with fixed duration.
         self._evaluate_offline_with_fixed_duration()
         # Reduce the evaluation results.
-        eval_results = self.metrics.reduce(
-            key=(EVALUATION_RESULTS, OFFLINE_EVAL_RUNNER_RESULTS),
-            return_stats_obj=False,
+        eval_results = self.metrics.peek(
+            ("EVALUATION_RESULTS", "OFFLINE_EVAL_RUNNER_RESULTS"), default={}
         )
 
         # Trigger `on_evaluate_offline_end` callback.
@@ -1292,9 +1290,11 @@ def evaluate(
             eval_results = {}
 
         if self.config.enable_env_runner_and_connector_v2:
-            eval_results = self.metrics.reduce(
-                key=EVALUATION_RESULTS, return_stats_obj=False
-            )
+            eval_results = self.metrics.peek(key=EVALUATION_RESULTS, default={})
+            if log_once("no_eval_results") and not eval_results:
+                logger.warning(
+                    "No evaluation results found for this iteration. This can happen if the evaluation worker(s) is/are not healthy."
+                )
         else:
             eval_results = {ENV_RUNNER_RESULTS: eval_results}
             eval_results[NUM_AGENT_STEPS_SAMPLED_THIS_ITER] = agent_steps
@@ -3382,9 +3382,9 @@ def _run_one_training_iteration(self) -> Tuple[ResultDict, "TrainIterCtx"]:
                 key=AGGREGATOR_ACTOR_RESULTS,
             )
 
-        # Only here (at the end of the iteration), reduce the results into a single
-        # result dict.
-        return self.metrics.reduce(), train_iter_ctx
+        # Only here (at the end of the iteration), compile the results into a single result dict.
+        # Calling compile here reduces the metrics into single values and adds throughputs to the results where applicable.
+        return self.metrics.compile(), train_iter_ctx
 
     def _run_one_offline_evaluation(self):
         """Runs offline evaluation step via `self.offline_evaluate()` and handling runner
@@ -3606,26 +3606,7 @@ def _compile_iteration_results(self, *, train_results, eval_results):
                 ),
             }
 
-        # Compile all throughput stats.
-        throughputs = {}
-
-        def _reduce(p, s):
-            if isinstance(s, Stats):
-                ret = s.peek()
-                _throughput = s.peek(throughput=True)
-                if _throughput is not None:
-                    _curr = throughputs
-                    for k in p[:-1]:
-                        _curr = _curr.setdefault(k, {})
-                    _curr[p[-1] + "_throughput"] = _throughput
-            else:
-                ret = s
-            return ret
-
-        # Resolve all `Stats` leafs by peeking (get their reduced values).
-        all_results = tree.map_structure_with_path(_reduce, results)
-        deep_update(all_results, throughputs, new_keys_allowed=True)
-        return all_results
+        return results
 
     def __repr__(self):
         if self.config.enable_rl_module_and_learner:
@@ -4466,6 +4447,7 @@ def should_stop(self, results):
         min_t = self.algo.config.min_time_s_per_iteration
         min_sample_ts = self.algo.config.min_sample_timesteps_per_iteration
         min_train_ts = self.algo.config.min_train_timesteps_per_iteration
+
         # Repeat if not enough time has passed or if not enough
         # env|train timesteps have been processed (or these min
         # values are not provided by the user).
 
@@ -931,7 +931,7 @@ def _training_step_old_api_stack(self):
 
         # With a training step done, try to bring any aggregators back to life
         # if necessary.
-        # Aggregation workers are stateless, so we do not need to restore any
+        # AggregatorActor are stateless, so we do not need to restore any
         # state here.
         if self._aggregator_actor_manager:
             self._aggregator_actor_manager.probe_unhealthy_actors(
 
@@ -211,8 +211,8 @@ def compute_loss_for_module(
                 POLICY_LOSS_KEY: actor_loss,
                 QF_LOSS_KEY: critic_loss,
                 "alpha_loss": alpha_loss,
-                "alpha_value": alpha,
-                "log_alpha_value": torch.log(alpha),
+                "alpha_value": alpha[0],
+                "log_alpha_value": torch.log(alpha)[0],
                 "target_entropy": self.target_entropy[module_id],
                 LOGPS_KEY: torch.mean(fwd_out["logp_resampled"]),
                 QF_MEAN_KEY: torch.mean(fwd_out["q_curr"]),
 
@@ -1645,18 +1645,21 @@ def _log_steps_trained_metrics(self, batch: MultiAgentBatch):
                 key=(mid, NUM_MODULE_STEPS_TRAINED_LIFETIME),
                 value=module_batch_size,
                 reduce="sum",
+                with_throughput=True,
             )
             # Log module steps (sum of all modules).
             self.metrics.log_value(
                 key=(ALL_MODULES, NUM_MODULE_STEPS_TRAINED),
                 value=module_batch_size,
                 reduce="sum",
                 clear_on_reduce=True,
+                with_throughput=True,
             )
             self.metrics.log_value(
                 key=(ALL_MODULES, NUM_MODULE_STEPS_TRAINED_LIFETIME),
                 value=module_batch_size,
                 reduce="sum",
+                with_throughput=True,
             )
         # Log env steps (all modules).
         self.metrics.log_value(
 
@@ -154,9 +154,7 @@ def custom_eval_function(
     algorithm.metrics.merge_and_log_n_dicts(
         env_runner_metrics, key=(EVALUATION_RESULTS, ENV_RUNNER_RESULTS)
     )
-    eval_results = algorithm.metrics.reduce(
-        key=(EVALUATION_RESULTS, ENV_RUNNER_RESULTS)
-    )
+    eval_results = algorithm.metrics.peek((EVALUATION_RESULTS, ENV_RUNNER_RESULTS))
     # Alternatively, you could manually reduce over the n returned `env_runner_metrics`
     # dicts, but this would be much harder as you might not know, which metrics
     # to sum up, which ones to average over, etc..
 
@@ -53,9 +53,8 @@ def compute_loss_for_module(
             self.metrics.log_value(
                 key=(module_id, f"action_{act}_return_to_go_mean"),
                 value=ret_to_go,
-                # Mean over the batch size.
                 reduce="mean",
-                window=len(batch[Columns.RETURNS_TO_GO]),
+                clear_on_reduce=True,
             )
 
         return loss
 
@@ -1,5 +1,5 @@
 # Non-learning, throughput-only benchmark used to tune and test the usage of
-# AggregationActors in IMPALA and APPO.
+# AggregatorActor in IMPALA and APPO.
 
 # With the current setup below, 27 EnvRunners (+ 2 eval EnvRunners), 0 Learners
 # 1 local A10 GPU Learner and 2 Aggregator actors, the achieved training throughput