Merge branch 'main' into main

maoulee · web-flow · commit aa111fd3188f · 2025-03-23T12:58:10.000+08:00
diff --git a/.github/workflows/tests_latest.yml b/.github/workflows/tests_latest.yml
@@ -17,7 +17,7 @@ jobs:
     steps:
       - name: Git checkout
         uses: actions/checkout@v4
-        with: { ref: v0.15-release }
+        with: { ref: v0.16-release }
       - name: Set up Python 3.12
         uses: actions/setup-python@v5
         with:
diff --git a/.gitignore b/.gitignore
@@ -142,4 +142,4 @@ checklink/cookies.txt
 # wandb files
 nbs/wandb/
 examples/notebooks/wandb/
-wandb/
+wandb/
diff --git a/CITATION.cff b/CITATION.cff
@@ -31,4 +31,4 @@ keywords:
   - pytorch
   - transformers
 license: Apache-2.0
-version: 0.15
+version: 0.16
diff --git a/docs/source/data_utils.md b/docs/source/data_utils.md
@@ -35,3 +35,11 @@
 ## pack_examples
 
 [[autodoc]] pack_examples
+
+## pack_dataset
+
+[[autodoc]] pack_dataset
+
+## truncate_dataset
+
+[[autodoc]] truncate_dataset
diff --git a/docs/source/reducing_memory_usage.md b/docs/source/reducing_memory_usage.md
@@ -136,6 +136,15 @@ When using DeepSpeed ZeRO-3, model weights are sharded across multiple GPUs. Onl
 If you encounter this issue, you can disable the gathering of model weights for generation by setting the following parameter:
 
 <hfoptions id="ds3_gather_for_generation">
+<hfoption id="GRPO">
+
+```python
+from trl import GRPOConfig
+
+training_args = GRPOConfig(..., ds3_gather_for_generation=False)
+```
+
+</hfoption>
 <hfoption id="Online DPO">
 
 ```python
diff --git a/setup.py b/setup.py
@@ -69,7 +69,7 @@
 from setuptools import find_packages, setup
 
 
-__version__ = "0.16.0.dev0"  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
+__version__ = "0.17.0.dev0"  # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
 
 REQUIRED_PKGS = [
     "accelerate>=0.34.0",
diff --git a/tests/test_data_utils.py b/tests/test_data_utils.py
@@ -27,7 +27,9 @@
     maybe_convert_to_chatml,
     maybe_extract_prompt,
     maybe_unpair_preference_dataset,
+    pack_dataset,
     pack_examples,
+    truncate_dataset,
     unpair_preference_dataset,
 )
 
@@ -395,7 +397,7 @@ def test_maybe_extract_prompt_standard_already_explicit(self):
 
 
 class TestPackExamples(unittest.TestCase):
-    def test_pack_examples_larger_chunks(self):
+    def test_larger_chunks(self):
         examples = {
             "input_ids": [[1, 2, 3], [4, 5, 6, 7], [8]],
             "attention_mask": [[0, 1, 1], [0, 0, 1, 1], [1]],
@@ -408,7 +410,7 @@ def test_pack_examples_larger_chunks(self):
         result = pack_examples(examples, seq_length)
         self.assertEqual(result, expected_output)
 
-    def test_pack_examples_smaller_chunks(self):
+    def test_smaller_chunks(self):
         examples = {
             "input_ids": [[1, 2, 3], [4, 5, 6, 7], [8]],
             "attention_mask": [[0, 1, 1], [0, 0, 1, 1], [1]],
@@ -421,7 +423,7 @@ def test_pack_examples_smaller_chunks(self):
         result = pack_examples(examples, seq_length)
         self.assertEqual(result, expected_output)
 
-    def test_pack_with_dataset(self):
+    def test_with_dataset(self):
         examples = {
             "input_ids": [[1, 2, 3], [4, 5, 6, 7], [8]],
             "attention_mask": [[0, 1, 1], [0, 0, 1, 1], [1]],
@@ -436,6 +438,84 @@ def test_pack_with_dataset(self):
         self.assertEqual(dataset.to_dict(), expected_output)
 
 
+class TestPackDataset(unittest.TestCase):
+    def test_with_dataset(self):
+        examples = {
+            "input_ids": [[1, 2, 3], [4, 5, 6, 7], [8]],
+            "attention_mask": [[0, 1, 1], [0, 0, 1, 1], [1]],
+        }
+        dataset = Dataset.from_dict(examples)
+        seq_length = 3
+        expected_output = {
+            "input_ids": [[1, 2, 3], [4, 5, 6], [7, 8]],
+            "attention_mask": [[0, 1, 1], [0, 0, 1], [1, 1]],
+        }
+        dataset = pack_dataset(dataset, seq_length)
+        self.assertEqual(dataset.to_dict(), expected_output)
+
+    def test_with_iterable_dataset(self):
+        examples = {
+            "input_ids": [[1, 2, 3], [4, 5, 6, 7], [8]],
+            "attention_mask": [[0, 1, 1], [0, 0, 1, 1], [1]],
+        }
+        dataset = Dataset.from_dict(examples).to_iterable_dataset()
+        seq_length = 3
+        expected_output = {
+            "input_ids": [[1, 2, 3], [4, 5, 6], [7, 8]],
+            "attention_mask": [[0, 1, 1], [0, 0, 1], [1, 1]],
+        }
+        dataset = pack_dataset(dataset, seq_length)
+        num_examples = len(examples[next(iter(examples))])
+        self.assertEqual(next(iter(dataset.batch(batch_size=num_examples))), expected_output)
+
+
+class TestTruncateExamples(unittest.TestCase):
+    def test_with_dataset(self):
+        examples = {
+            "input_ids": [[1, 2, 3], [4, 5, 6, 7], [8]],
+            "attention_mask": [[0, 1, 1], [0, 0, 1, 1], [1]],
+        }
+        dataset = Dataset.from_dict(examples)
+        max_length = 2
+        expected_output = {
+            "input_ids": [[1, 2], [4, 5], [8]],
+            "attention_mask": [[0, 1], [0, 0], [1]],
+        }
+        dataset = truncate_dataset(dataset, max_length)
+        self.assertEqual(dataset.to_dict(), expected_output)
+
+    def test_with_iterable_dataset(self):
+        examples = {
+            "input_ids": [[1, 2, 3], [4, 5, 6, 7], [8]],
+            "attention_mask": [[0, 1, 1], [0, 0, 1, 1], [1]],
+        }
+        dataset = Dataset.from_dict(examples).to_iterable_dataset()
+        max_length = 2
+        expected_output = {
+            "input_ids": [[1, 2], [4, 5], [8]],
+            "attention_mask": [[0, 1], [0, 0], [1]],
+        }
+        dataset = truncate_dataset(dataset, max_length)
+        num_examples = len(examples[next(iter(examples))])
+        self.assertEqual(next(iter(dataset.batch(batch_size=num_examples))), expected_output)
+
+    def test_with_extra_column(self):
+        examples = {
+            "input_ids": [[1, 2, 3], [4, 5, 6, 7], [8]],
+            "attention_mask": [[0, 1, 1], [0, 0, 1, 1], [1]],
+            "my_column": ["a", "b", "c"],
+        }
+        dataset = Dataset.from_dict(examples)
+        max_length = 2
+        expected_output = {
+            "input_ids": [[1, 2], [4, 5], [8]],
+            "attention_mask": [[0, 1], [0, 0], [1]],
+            "my_column": ["a", "b", "c"],
+        }
+        dataset = truncate_dataset(dataset, max_length)
+        self.assertEqual(dataset.to_dict(), expected_output)
+
+
 class TestMaybeConvertToChatML(unittest.TestCase):
     def test_with_conversations_key(self):
         # Particular case where the key is "conversations": we rename it to "messages"
diff --git a/tests/test_grpo_trainer.py b/tests/test_grpo_trainer.py
@@ -914,3 +914,34 @@ def test_training_vllm_with_additional_generation_kwargs(self):
             for n, param in previous_trainable_params.items():
                 new_param = trainer.model.get_parameter(n)
                 self.assertFalse(torch.equal(param, new_param), f"Parameter {n} has not changed.")
+
+    def test_training_no_scale_rewards(self):
+        dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            training_args = GRPOConfig(
+                output_dir=tmp_dir,
+                learning_rate=0.1,  # increase the learning rate to speed up the test
+                per_device_train_batch_size=3,  # reduce the batch size to reduce memory usage
+                num_generations=3,  # reduce the number of generations to reduce memory usage
+                max_completion_length=32,  # reduce the completion length to reduce memory usage
+                scale_rewards=False,
+                report_to="none",
+            )
+            trainer = GRPOTrainer(
+                model="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
+                reward_funcs="trl-internal-testing/tiny-Qwen2ForSequenceClassification-2.5",
+                args=training_args,
+                train_dataset=dataset,
+            )
+
+            previous_trainable_params = {n: param.clone() for n, param in trainer.model.named_parameters()}
+
+            trainer.train()
+
+            self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
+
+            # Check that the params have changed
+            for n, param in previous_trainable_params.items():
+                new_param = trainer.model.get_parameter(n)
+                self.assertFalse(torch.equal(param, new_param), f"Parameter {n} has not changed.")
diff --git a/trl/__init__.py b/trl/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.16.0.dev0"
+__version__ = "0.17.0.dev0"
 
 from typing import TYPE_CHECKING
 
@@ -29,7 +29,9 @@
         "maybe_convert_to_chatml",
         "maybe_extract_prompt",
         "maybe_unpair_preference_dataset",
+        "pack_dataset",
         "pack_examples",
+        "truncate_dataset",
         "unpair_preference_dataset",
     ],
     "environment": ["TextEnvironment", "TextHistory"],
@@ -130,7 +132,9 @@
         maybe_convert_to_chatml,
         maybe_extract_prompt,
         maybe_unpair_preference_dataset,
+        pack_dataset,
         pack_examples,
+        truncate_dataset,
         unpair_preference_dataset,
     )
     from .environment import TextEnvironment, TextHistory
diff --git a/trl/data_utils.py b/trl/data_utils.py
@@ -12,8 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import functools
 from typing import Any, Callable, Optional, Sequence, TypeVar, Union
 
+import numpy as np
+import pyarrow as pa
+import pyarrow.compute as pc
+import pyarrow.types
 from datasets import Dataset, DatasetDict
 from transformers import PreTrainedTokenizerBase
 
@@ -466,6 +471,132 @@ def pack_examples(examples: dict[str, list[list]], seq_length: int) -> dict[str,
     return examples
 
 
+def pack_dataset(dataset: DatasetType, seq_length: int, map_kwargs: Optional[dict[str, Any]] = None) -> DatasetType:
+    r"""
+    Pack sequences in a dataset into chunks of size `seq_length`.
+
+    Args:
+        dataset (`Dataset` or `DatasetDict`):
+            Dataset to pack
+        seq_length (`int`):
+            Target sequence length to pack to.
+        map_kwargs (`dict` or `None`, *optional*, defaults to `None`):
+            Additional keyword arguments to pass to the dataset's map method when packing examples.
+
+    Returns:
+        `Dataset` or `DatasetDict`: The dataset with packed sequences. The number of examples may
+        decrease as sequences are combined.
+
+    Example:
+    ```python
+    >>> from datasets import Dataset
+    >>> examples = {
+    ...     "input_ids": [[1, 2], [3, 4], [5, 6], [7]],
+    ...     "attention_mask": [[1, 1], [0, 1], [1, 1], [1]],
+    ... }
+    >>> dataset = Dataset.from_dict(examples)
+    >>> packed_dataset = pack_dataset(dataset, seq_length=4)
+    >>> packed_dataset[:]
+    {'input_ids': [[1, 2, 3, 4], [5, 6, 7]],
+     'attention_mask': [[1, 1, 0, 1], [1, 1, 1]]}
+    ```
+    """
+    if map_kwargs is None:
+        map_kwargs = {}
+    if isinstance(dataset, Dataset):
+        # Fast packing with pyarrow
+        def pack(examples):
+            packed_columns = []
+            for column in examples.columns:
+                if pyarrow.types.is_list(column.type) or pyarrow.types.is_large_list(column.type):
+                    if isinstance(column, pa.ChunkedArray):
+                        column = column.combine_chunks()
+                    offsets, values = column.offsets, column.values
+                    values = values[offsets[0].as_py() : offsets[-1].as_py()]
+                    num_elements = len(values)
+                    dtype = offsets.type.to_pandas_dtype()  # np.int32 or np.int64
+                    offsets = np.arange(0, num_elements, seq_length, dtype=dtype)
+                    offsets = np.concatenate((offsets, [num_elements]))
+                    column = type(column).from_arrays(offsets, values)
+                packed_columns.append(column)
+            return pa.Table.from_arrays(packed_columns, names=examples.column_names)
+
+        dataset = dataset.with_format("arrow")
+        dataset = dataset.map(pack, batched=True, **map_kwargs)
+        dataset = dataset.with_format(None)
+    else:
+        dataset = dataset.map(
+            functools.partial(pack_examples, seq_length=seq_length),
+            batched=True,
+            **map_kwargs,
+        )
+    return dataset
+
+
+def truncate_dataset(
+    dataset: DatasetType, max_length: int, map_kwargs: Optional[dict[str, Any]] = None
+) -> DatasetType:
+    r"""
+    Truncate sequences in a dataset to a specifed `max_length`.
+
+    Args:
+        dataset (`Dataset` or `DatasetDict`):
+            Dataset to truncate.
+        seq_length (`int`):
+            Maximum sequence length to truncate to.
+        map_kwargs (`dict` or `None`, *optional*, defaults to `None`):
+            Additional keyword arguments to pass to the dataset's map method when truncating examples.
+
+    Returns:
+        `Dataset` or `DatasetDict`: The dataset with truncated sequences.
+
+    Example:
+    ```python
+    >>> from datasets import Dataset
+    >>> examples = {
+    ...     "input_ids": [[1, 2, 3], [4, 5, 6, 7], [8]],
+    ...     "attention_mask": [[0, 1, 1], [0, 0, 1, 1], [1]],
+    ... }
+    >>> dataset = Dataset.from_dict(examples)
+    >>> truncated_dataset = truncate_dataset(dataset, max_length=2)
+    >>> truncated_dataset[:]
+    {'input_ids': [[1, 2], [4, 5], [8]],
+     'attention_mask': [[0, 1], [0, 0], [1]]}
+    ```
+    """
+    if map_kwargs is None:
+        map_kwargs = {}
+    if isinstance(dataset, Dataset):
+        # Fast truncation with pyarrow
+        def truncate(examples):
+            truncated_columns = []
+            for column in examples.columns:
+                if pyarrow.types.is_list(column.type) or pyarrow.types.is_large_list(column.type):
+                    column = pc.list_slice(column, 0, max_length)
+                truncated_columns.append(column)
+            return pa.Table.from_arrays(truncated_columns, names=examples.column_names)
+
+        dataset = dataset.with_format("arrow")
+        dataset = dataset.map(truncate, batched=True, **map_kwargs)
+        dataset = dataset.with_format(None)
+    else:
+
+        def truncate(examples):
+            truncated_examples = {}
+            for key, column in examples.items():
+                if column and isinstance(column[0], list):
+                    column = [val[:max_length] for val in column]
+                truncated_examples[key] = column
+            return truncated_examples
+
+        dataset = dataset.map(
+            truncate,
+            batched=True,
+            **map_kwargs,
+        )
+    return dataset
+
+
 def maybe_convert_to_chatml(example: dict[str, list]) -> dict[str, list]:
     """
     Convert a conversational dataset with fields `from` and `value` to ChatML format.
diff --git a/trl/trainer/grpo_config.py b/trl/trainer/grpo_config.py
diff --git a/trl/trainer/grpo_trainer.py b/trl/trainer/grpo_trainer.py
diff --git a/trl/trainer/sft_trainer.py b/trl/trainer/sft_trainer.py