huggingface
diff --git a/‎docs/source/grpo_trainer.md‎
Lines changed: 60 additions & 0 deletions b/‎docs/source/grpo_trainer.md‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎setup.cfg‎
Lines changed: 5 additions & 0 deletions b/‎setup.cfg‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎trl/cli.py‎
Lines changed: 6 additions & 0 deletions b/‎trl/cli.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎trl/import_utils.py‎
Lines changed: 5 additions & 0 deletions b/‎trl/import_utils.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎trl/scripts/env.py‎
Lines changed: 2 additions & 0 deletions b/‎trl/scripts/env.py‎
Lines changed: 2 additions & 0 deletions
@@ -243,6 +243,66 @@ If the recommended value does not work in your environment, we suggest adding a
 
 For more information, see [Speeding up training with vLLM](speeding_up_training#vllm-for-fast-generation-in-online-methods).
 
+
+### Speed up training with SGLang-powered generation
+
+Another alternative to vLLM is to use the [SGLang](https://sglang.ai/) to enable fast generate. To enable it first install the package with:
+
+```shell
+git clone [email protected]:huggingface/trl.git
+cd trl
+python3 -m uv pip install -e ".[sglang]"
+
+# start sglang-server
+python3 -m sglang.launch_server --model-path qwen/qwen2.5-7b-instruct
+
+# run "export CUDA_VISIBLE_DEVICES"
+# run script
+python3 grpo_test.py
+```
+
+Then, pass the `use_sglang=True` in the training arguments and point to the SGLang server via the `sglang_server_url`:
+
+```python
+import os
+
+from datasets import load_dataset
+
+from trl import GRPOConfig, GRPOTrainer
+
+
+dataset = load_dataset("trl-lib/tldr", split="train[:10%]”)
+
+checkpoint_dir = os.path.join("/sgl-workspace/ryang/trl", "checkpoints/sgl")
+os.makedirs(checkpoint_dir, exist_ok=True)
+
+def reward_len(completions, **kwargs):
+    return [-abs(20 - len(completion)) for completion in completions]
+
+
+training_args = GRPOConfig(
+    output_dir=os.path.join(checkpoint_dir, "Qwen2.5_output"),
+    logging_steps=10,
+    use_sglang=True,
+    sglang_device="cuda:0",
+    sglang_gpu_memory_utilization=0.9,
+    sglang_server_url="http://127.0.0.1:30000",
+)
+
+
+trainer = GRPOTrainer(
+    model="Qwen/Qwen2.5-7B-Instruct",
+    reward_funcs=reward_len,
+    args=training_args,
+    train_dataset=dataset,
+)
+
+training_args.checkpoint_path = checkpoint_dir 
+
+
+trainer.train()
+```
+
 ### GRPO at scale: train a 70B+ Model on multiple nodes
 
 When training large models like **Qwen2.5-72B**, you need several key optimizations to make the training efficient and scalable across multiple GPUs and nodes. These include:
 
@@ -74,6 +74,10 @@ vllm =
     requests; python_version < "3.13"
     uvicorn; python_version < "3.13"
 
+sglang =
+    sglang>=0.4.6post2
+    requests
+
 vlm =
     Pillow
 dev =
@@ -87,6 +91,7 @@ dev =
     %(scikit)s
     %(test)s
     %(vlm)s
+    %(sglang)s
 
 [options.entry_points]
 console_scripts =
 
@@ -24,6 +24,8 @@
 from .scripts.grpo import make_parser as make_grpo_parser
 from .scripts.kto import make_parser as make_kto_parser
 from .scripts.sft import make_parser as make_sft_parser
+from .scripts.sglang_serve import main as sglang_serve_main
+from .scripts.sglang_serve import make_parser as make_sglang_serve_parser
 from .scripts.utils import TrlParser
 from .scripts.vllm_serve import main as vllm_serve_main
 from .scripts.vllm_serve import make_parser as make_vllm_serve_parser
@@ -42,6 +44,7 @@ def main():
     make_kto_parser(subparsers)
     make_sft_parser(subparsers)
     make_vllm_serve_parser(subparsers)
+    make_sglang_serve_parser(subparsers)
 
     # Parse the arguments; the remaining ones (`launch_args`) are passed to the 'accelerate launch' subparser.
     # Duplicates may occur if the same argument is provided in both the config file and CLI.
@@ -131,6 +134,9 @@ def main():
             )
 
         vllm_serve_main(script_args)
+    elif args.command == "sglang-serve":
+        (script_args,) = parser.parse_args_and_config()
+        sglang_serve_main(script_args)
 
 
 if __name__ == "__main__":
 
@@ -38,6 +38,8 @@
 _vllm_available = _is_package_available("vllm")
 _vllm_ascend_available = _is_package_available("vllm_ascend")
 _joblib_available = _is_package_available("joblib")
+_sglang_available = _is_package_available("sglang")
+
 
 
 def is_deepspeed_available() -> bool:
@@ -91,6 +93,9 @@ def is_vllm_ascend_available() -> bool:
 def is_joblib_available() -> bool:
     return _joblib_available
 
+def is_sglang_available() -> bool:
+    return _sglang_available
+
 
 class _LazyModule(ModuleType):
     """
 
@@ -27,6 +27,7 @@
     is_diffusers_available,
     is_liger_kernel_available,
     is_llm_blender_available,
+    is_sglang_available,
     is_vllm_available,
 )
 from .utils import get_git_commit_hash
@@ -74,6 +75,7 @@ def print_env():
         "OpenAI version": version("openai") if is_openai_available() else "not installed",
         "PEFT version": version("peft") if is_peft_available() else "not installed",
         "vLLM version": version("vllm") if is_vllm_available() else "not installed",
+        "SGLang version": version("sglang") if is_sglang_available() else "not installed",
     }
 
     info_str = "\n".join([f"- {prop}: {val}" for prop, val in info.items()])
Original file line number	Diff line number	Diff line change
`@@ -27,6 +27,7 @@`
`27`	`27`	`is_diffusers_available,`
`28`	`28`	`is_liger_kernel_available,`
`29`	`29`	`is_llm_blender_available,`
	`30`	`+ is_sglang_available,`
`30`	`31`	`is_vllm_available,`
`31`	`32`	`)`
`32`	`33`	`from .utils import get_git_commit_hash`
`@@ -74,6 +75,7 @@ def print_env():`
`74`	`75`	`"OpenAI version": version("openai") if is_openai_available() else "not installed",`
`75`	`76`	`"PEFT version": version("peft") if is_peft_available() else "not installed",`
`76`	`77`	`"vLLM version": version("vllm") if is_vllm_available() else "not installed",`
	`78`	`+ "SGLang version": version("sglang") if is_sglang_available() else "not installed",`
`77`	`79`	`}`
`78`	`80`
`79`	`81`	`info_str = "\n".join([f"- {prop}: {val}" for prop, val in info.items()])`