Skip to content
22 changes: 12 additions & 10 deletions trl/trainer/bco_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,19 +83,16 @@ class BCOConfig(TrainingArguments):
logging_steps: float = field(
default=10,
metadata={
"help": (
"Log every X updates steps. Should be an integer or a float in range `[0,1)`. "
"If smaller than 1, will be interpreted as ratio of total training steps."
)
"help": "Log every X updates steps. Should be an integer or a float in range `[0,1)`. If smaller than 1, "
"will be interpreted as ratio of total training steps."
},
)
bf16: bool = field(
default=True,
bf16: Optional[bool] = field(
default=None,
metadata={
"help": (
"Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or using CPU (use_cpu) or Ascend NPU. This is an experimental API and it may change."
)
"help": "Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or Intel XPU or using CPU (use_cpu) or Ascend NPU. If not set, it defaults to `True` if "
"`fp16` is not set."
},
)

Expand Down Expand Up @@ -202,3 +199,8 @@ class BCOConfig(TrainingArguments):
default=10.0,
metadata={"help": "Maximum value of the density ratio. The estimated density ratio is clamped to this value."},
)

def __post_init__(self):
self.bf16 = not (self.fp16) if self.bf16 is None else self.bf16

super().__post_init__()
22 changes: 12 additions & 10 deletions trl/trainer/cpo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,19 +90,16 @@ class CPOConfig(TrainingArguments):
logging_steps: float = field(
default=10,
metadata={
"help": (
"Log every X updates steps. Should be an integer or a float in range `[0,1)`. "
"If smaller than 1, will be interpreted as ratio of total training steps."
)
"help": "Log every X updates steps. Should be an integer or a float in range `[0,1)`. If smaller than 1, "
"will be interpreted as ratio of total training steps."
},
)
bf16: bool = field(
default=True,
bf16: Optional[bool] = field(
default=None,
metadata={
"help": (
"Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or using CPU (use_cpu) or Ascend NPU. This is an experimental API and it may change."
)
"help": "Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or Intel XPU or using CPU (use_cpu) or Ascend NPU. If not set, it defaults to `True` if "
"`fp16` is not set."
},
)

Expand Down Expand Up @@ -188,3 +185,8 @@ class CPOConfig(TrainingArguments):
default=None,
metadata={"help": "Number of processes to use for processing the dataset."},
)

def __post_init__(self):
self.bf16 = not (self.fp16) if self.bf16 is None else self.bf16

super().__post_init__()
22 changes: 12 additions & 10 deletions trl/trainer/dpo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,19 +189,16 @@ class DPOConfig(TrainingArguments):
logging_steps: float = field(
default=10,
metadata={
"help": (
"Log every X updates steps. Should be an integer or a float in range `[0,1)`. "
"If smaller than 1, will be interpreted as ratio of total training steps."
)
"help": "Log every X updates steps. Should be an integer or a float in range `[0,1)`. If smaller than 1, "
"will be interpreted as ratio of total training steps."
},
)
bf16: bool = field(
default=True,
bf16: Optional[bool] = field(
default=None,
metadata={
"help": (
"Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or using CPU (use_cpu) or Ascend NPU. This is an experimental API and it may change."
)
"help": "Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or Intel XPU or using CPU (use_cpu) or Ascend NPU. If not set, it defaults to `True` if "
"`fp16` is not set."
},
)

Expand Down Expand Up @@ -439,3 +436,8 @@ class DPOConfig(TrainingArguments):
"Comet during evaluation."
},
)

def __post_init__(self):
self.bf16 = not (self.fp16) if self.bf16 is None else self.bf16

super().__post_init__()
19 changes: 9 additions & 10 deletions trl/trainer/grpo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,19 +212,16 @@ class GRPOConfig(TrainingArguments):
logging_steps: float = field(
default=10,
metadata={
"help": (
"Log every X updates steps. Should be an integer or a float in range `[0,1)`. "
"If smaller than 1, will be interpreted as ratio of total training steps."
)
"help": "Log every X updates steps. Should be an integer or a float in range `[0,1)`. If smaller than 1, "
"will be interpreted as ratio of total training steps."
},
)
bf16: bool = field(
default=True,
bf16: Optional[bool] = field(
default=None,
metadata={
"help": (
"Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or using CPU (use_cpu) or Ascend NPU. This is an experimental API and it may change."
)
"help": "Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or Intel XPU or using CPU (use_cpu) or Ascend NPU. If not set, it defaults to `True` if "
"`fp16` is not set."
},
)

Expand Down Expand Up @@ -530,6 +527,8 @@ class GRPOConfig(TrainingArguments):
)

def __post_init__(self):
self.bf16 = not (self.fp16) if self.bf16 is None else self.bf16

super().__post_init__()

num_processes = self.world_size
Expand Down
19 changes: 9 additions & 10 deletions trl/trainer/iterative_sft_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,16 @@ class may differ from those in [`~transformers.TrainingArguments`].
logging_steps: float = field(
default=10,
metadata={
"help": (
"Log every X updates steps. Should be an integer or a float in range `[0,1)`. "
"If smaller than 1, will be interpreted as ratio of total training steps."
)
"help": "Log every X updates steps. Should be an integer or a float in range `[0,1)`. If smaller than 1, "
"will be interpreted as ratio of total training steps."
},
)
bf16: bool = field(
default=True,
bf16: Optional[bool] = field(
default=None,
metadata={
"help": (
"Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or using CPU (use_cpu) or Ascend NPU. This is an experimental API and it may change."
)
"help": "Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or Intel XPU or using CPU (use_cpu) or Ascend NPU. If not set, it defaults to `True` if "
"`fp16` is not set."
},
)

Expand Down Expand Up @@ -96,6 +93,8 @@ class may differ from those in [`~transformers.TrainingArguments`].
)

def __post_init__(self):
self.bf16 = not (self.fp16) if self.bf16 is None else self.bf16

super().__post_init__()

if self.truncation_mode not in ["keep_end", "keep_start"]:
Expand Down
22 changes: 12 additions & 10 deletions trl/trainer/kto_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,19 +97,16 @@ class KTOConfig(TrainingArguments):
logging_steps: float = field(
default=10,
metadata={
"help": (
"Log every X updates steps. Should be an integer or a float in range `[0,1)`. "
"If smaller than 1, will be interpreted as ratio of total training steps."
)
"help": "Log every X updates steps. Should be an integer or a float in range `[0,1)`. If smaller than 1, "
"will be interpreted as ratio of total training steps."
},
)
bf16: bool = field(
default=True,
bf16: Optional[bool] = field(
default=None,
metadata={
"help": (
"Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or using CPU (use_cpu) or Ascend NPU. This is an experimental API and it may change."
)
"help": "Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or Intel XPU or using CPU (use_cpu) or Ascend NPU. If not set, it defaults to `True` if "
"`fp16` is not set."
},
)

Expand Down Expand Up @@ -231,3 +228,8 @@ class KTOConfig(TrainingArguments):
"`use_liger_loss` is `True`."
},
)

def __post_init__(self):
self.bf16 = not (self.fp16) if self.bf16 is None else self.bf16

super().__post_init__()
20 changes: 10 additions & 10 deletions trl/trainer/online_dpo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,19 +81,16 @@ class may differ from those in [`~transformers.TrainingArguments`].
logging_steps: float = field(
default=10,
metadata={
"help": (
"Log every X updates steps. Should be an integer or a float in range `[0,1)`. "
"If smaller than 1, will be interpreted as ratio of total training steps."
)
"help": "Log every X updates steps. Should be an integer or a float in range `[0,1)`. If smaller than 1, "
"will be interpreted as ratio of total training steps."
},
)
bf16: bool = field(
default=True,
bf16: Optional[bool] = field(
default=None,
metadata={
"help": (
"Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or using CPU (use_cpu) or Ascend NPU. This is an experimental API and it may change."
)
"help": "Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or Intel XPU or using CPU (use_cpu) or Ascend NPU. If not set, it defaults to `True` if "
"`fp16` is not set."
},
)

Expand Down Expand Up @@ -180,6 +177,9 @@ class may differ from those in [`~transformers.TrainingArguments`].
)

def __post_init__(self):
self.bf16 = not (self.fp16) if self.bf16 is None else self.bf16

super().__post_init__()

if hasattr(self.beta, "__len__") and len(self.beta) == 1:
self.beta = self.beta[0]
22 changes: 12 additions & 10 deletions trl/trainer/orpo_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,19 +75,16 @@ class ORPOConfig(TrainingArguments):
logging_steps: float = field(
default=10,
metadata={
"help": (
"Log every X updates steps. Should be an integer or a float in range `[0,1)`. "
"If smaller than 1, will be interpreted as ratio of total training steps."
)
"help": "Log every X updates steps. Should be an integer or a float in range `[0,1)`. If smaller than 1, "
"will be interpreted as ratio of total training steps."
},
)
bf16: bool = field(
default=True,
bf16: Optional[bool] = field(
default=None,
metadata={
"help": (
"Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or using CPU (use_cpu) or Ascend NPU. This is an experimental API and it may change."
)
"help": "Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or Intel XPU or using CPU (use_cpu) or Ascend NPU. If not set, it defaults to `True` if "
"`fp16` is not set."
},
)

Expand Down Expand Up @@ -159,3 +156,8 @@ class ORPOConfig(TrainingArguments):
default=None,
metadata={"help": "Number of processes to use for processing the dataset."},
)

def __post_init__(self):
self.bf16 = not (self.fp16) if self.bf16 is None else self.bf16

super().__post_init__()
22 changes: 12 additions & 10 deletions trl/trainer/prm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,19 +56,16 @@ class PRMConfig(TrainingArguments):
logging_steps: float = field(
default=10,
metadata={
"help": (
"Log every X updates steps. Should be an integer or a float in range `[0,1)`. "
"If smaller than 1, will be interpreted as ratio of total training steps."
)
"help": "Log every X updates steps. Should be an integer or a float in range `[0,1)`. If smaller than 1, "
"will be interpreted as ratio of total training steps."
},
)
bf16: bool = field(
default=True,
bf16: Optional[bool] = field(
default=None,
metadata={
"help": (
"Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or using CPU (use_cpu) or Ascend NPU. This is an experimental API and it may change."
)
"help": "Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or Intel XPU or using CPU (use_cpu) or Ascend NPU. If not set, it defaults to `True` if "
"`fp16` is not set."
},
)
average_tokens_across_devices: bool = field(
Expand Down Expand Up @@ -110,3 +107,8 @@ class PRMConfig(TrainingArguments):
default=None,
metadata={"help": "Number of processes to use for processing the dataset."},
)

def __post_init__(self):
self.bf16 = not (self.fp16) if self.bf16 is None else self.bf16

super().__post_init__()
22 changes: 12 additions & 10 deletions trl/trainer/reward_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,19 +51,16 @@ class may differ from those in [`~transformers.TrainingArguments`].
logging_steps: float = field(
default=10,
metadata={
"help": (
"Log every X updates steps. Should be an integer or a float in range `[0,1)`. "
"If smaller than 1, will be interpreted as ratio of total training steps."
)
"help": "Log every X updates steps. Should be an integer or a float in range `[0,1)`. If smaller than 1, "
"will be interpreted as ratio of total training steps."
},
)
bf16: bool = field(
default=True,
bf16: Optional[bool] = field(
default=None,
metadata={
"help": (
"Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or using CPU (use_cpu) or Ascend NPU. This is an experimental API and it may change."
)
"help": "Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or Intel XPU or using CPU (use_cpu) or Ascend NPU. If not set, it defaults to `True` if "
"`fp16` is not set."
},
)
average_tokens_across_devices: bool = field(
Expand Down Expand Up @@ -103,3 +100,8 @@ class may differ from those in [`~transformers.TrainingArguments`].
"if the dataset is pretokenized."
},
)

def __post_init__(self):
self.bf16 = not (self.fp16) if self.bf16 is None else self.bf16

super().__post_init__()
19 changes: 9 additions & 10 deletions trl/trainer/sft_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,19 +104,16 @@ class SFTConfig(TrainingArguments):
logging_steps: float = field(
default=10,
metadata={
"help": (
"Log every X updates steps. Should be an integer or a float in range `[0,1)`. "
"If smaller than 1, will be interpreted as ratio of total training steps."
)
"help": "Log every X updates steps. Should be an integer or a float in range `[0,1)`. If smaller than 1, "
"will be interpreted as ratio of total training steps."
},
)
bf16: bool = field(
default=True,
bf16: Optional[bool] = field(
default=None,
metadata={
"help": (
"Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or using CPU (use_cpu) or Ascend NPU. This is an experimental API and it may change."
)
"help": "Whether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA "
"architecture or Intel XPU or using CPU (use_cpu) or Ascend NPU. If not set, it defaults to `True` if "
"`fp16` is not set."
},
)
average_tokens_across_devices: bool = field(
Expand Down Expand Up @@ -252,6 +249,8 @@ class SFTConfig(TrainingArguments):
)

def __post_init__(self):
self.bf16 = not (self.fp16) if self.bf16 is None else self.bf16

super().__post_init__()

if self.max_seq_length is not None:
Expand Down
Loading
Loading