checkstyle

shivam15s · shivam15s · commit 116ee1a26629 · 2024-11-21T23:12:02.000Z
diff --git a/src/liger_kernel/chunked_loss/dpo_loss.py b/src/liger_kernel/chunked_loss/dpo_loss.py
@@ -9,7 +9,13 @@
 class LigerFusedLinearDPOFunction(LigerFusedLinearPreferenceBase):
 
     @staticmethod
-    def preference_loss_fn(chosen_logps, rejected_logps, ref_chosen_logps=None, ref_rejected_logps=None, beta=0.1):
+    def preference_loss_fn(
+        chosen_logps,
+        rejected_logps,
+        ref_chosen_logps=None,
+        ref_rejected_logps=None,
+        beta=0.1,
+    ):
         """
         Compute DPO loss (Direct Preference Optimization).
         Args:
@@ -102,7 +108,9 @@ def __init__(
         self.compiled = compiled
         self.use_ref_model = use_ref_model
 
-    def forward(self, lin_weight, _input, target, bias=None, ref_weight=None, ref_bias=None):
+    def forward(
+        self, lin_weight, _input, target, bias=None, ref_weight=None, ref_bias=None
+    ):
         return LigerFusedLinearDPOFunction.apply(
             _input,
             lin_weight,
diff --git a/src/liger_kernel/chunked_loss/fused_linear_preference.py b/src/liger_kernel/chunked_loss/fused_linear_preference.py
@@ -19,7 +19,9 @@ def preference_loss_fn(chosen_logps, rejected_logps, beta=0.1):
         raise NotImplementedError("Preference loss function must be implemented.")
 
     @staticmethod
-    def get_ref_logps(input_chunk, ref_weight, target_chunk, ref_bias=None, ignore_index=-100):
+    def get_ref_logps(
+        input_chunk, ref_weight, target_chunk, ref_bias=None, ignore_index=-100
+    ):
         with torch.no_grad():
             ref_logits_chunk = input_chunk @ ref_weight.t()
             if ref_bias is not None:
@@ -29,11 +31,15 @@ def get_ref_logps(input_chunk, ref_weight, target_chunk, ref_bias=None, ignore_i
             loss_mask = target_chunk != ignore_index
             label_chunk = torch.where(loss_mask, target_chunk, 0)
 
-            ref_per_token_logps = ref_log_probs_chunk.gather(-1, label_chunk.unsqueeze(-1)).squeeze(-1)
-            ref_average_log_prob = (ref_per_token_logps * loss_mask).sum(-1) / loss_mask.sum(-1)
+            ref_per_token_logps = ref_log_probs_chunk.gather(
+                -1, label_chunk.unsqueeze(-1)
+            ).squeeze(-1)
+            ref_average_log_prob = (ref_per_token_logps * loss_mask).sum(
+                -1
+            ) / loss_mask.sum(-1)
 
-            ref_chosen_logps = ref_average_log_prob[:input_chunk.shape[0] // 2]
-            ref_rejected_logps = ref_average_log_prob[input_chunk.shape[0] // 2:]
+            ref_chosen_logps = ref_average_log_prob[: input_chunk.shape[0] // 2]
+            ref_rejected_logps = ref_average_log_prob[input_chunk.shape[0] // 2 :]
         return ref_chosen_logps, ref_rejected_logps
 
     @staticmethod
@@ -242,8 +248,14 @@ def _compute_loss(
         rejected_logps = average_log_prob[len_chosen_chunk:]
 
         if use_ref_model:
-            ref_chosen_logps, ref_rejected_logps = LigerFusedLinearPreferenceBase.get_ref_logps(
-                input_chunk, ref_weight, target_chunk, ref_bias=ref_bias, ignore_index=ignore_index
+            ref_chosen_logps, ref_rejected_logps = (
+                LigerFusedLinearPreferenceBase.get_ref_logps(
+                    input_chunk,
+                    ref_weight,
+                    target_chunk,
+                    ref_bias=ref_bias,
+                    ignore_index=ignore_index,
+                )
             )
             loss_kwargs["ref_chosen_logps"] = ref_chosen_logps
             loss_kwargs["ref_rejected_logps"] = ref_rejected_logps
diff --git a/test/chunked_loss/test_dpo_loss.py b/test/chunked_loss/test_dpo_loss.py
@@ -19,8 +19,12 @@ class HFDPOLoss(HFAlignmentLoss):
     Reference: https://github.com/huggingface/trl/blob/main/trl/trainer/orpo_trainer.py
     """
 
-    def __init__(self, ignore_index: int = -100, beta: float = 0.1, use_ref_model: bool = True):
-        super().__init__(beta=beta, ignore_index=ignore_index, use_ref_model=use_ref_model)
+    def __init__(
+        self, ignore_index: int = -100, beta: float = 0.1, use_ref_model: bool = True
+    ):
+        super().__init__(
+            beta=beta, ignore_index=ignore_index, use_ref_model=use_ref_model
+        )
 
     def alignment_loss(
         self,
@@ -69,7 +73,9 @@ def __init__(
         ).get_batch_loss_metrics
 
     def forward(self, x, y):
-        return self.dpo_loss(self.lin.weight, x, y, self.lin.bias, self.ref_lin.weight, self.ref_lin.bias)
+        return self.dpo_loss(
+            self.lin.weight, x, y, self.lin.bias, self.ref_lin.weight, self.ref_lin.bias
+        )
 
 
 class LigerLMHeadDPO(torch.nn.Module):
@@ -90,10 +96,14 @@ def __init__(
         self.ref_lin = torch.nn.Linear(
             in_features=H, out_features=V, bias=ref_bias, dtype=dtype
         )
-        self.dpo_loss = LigerFusedLinearDPOLoss(ignore_index=ignore_index, beta=beta, use_ref_model=True)
+        self.dpo_loss = LigerFusedLinearDPOLoss(
+            ignore_index=ignore_index, beta=beta, use_ref_model=True
+        )
 
     def forward(self, x, y):
-        return self.dpo_loss(self.lin.weight, x, y, self.lin.bias, self.ref_lin.weight, self.ref_lin.bias)
+        return self.dpo_loss(
+            self.lin.weight, x, y, self.lin.bias, self.ref_lin.weight, self.ref_lin.bias
+        )
 
 
 @pytest.mark.parametrize(
@@ -113,7 +123,9 @@ def forward(self, x, y):
 @pytest.mark.parametrize("bias", [True, False])
 @pytest.mark.parametrize("ref_bias", [True, False])
 @pytest.mark.parametrize("ignore_index, beta", [(-100, 0.1), (42, 0.2)])
-def test_correctness(B, T, H, V, scalar, dtype, atol, rtol, bias, ref_bias, ignore_index, beta):
+def test_correctness(
+    B, T, H, V, scalar, dtype, atol, rtol, bias, ref_bias, ignore_index, beta
+):
     B = 2 * B  # dpo loss requires B to be even
 
     torch_lm_head_dpo = TorchLMHeadDPO(
@@ -138,17 +150,17 @@ def test_correctness(B, T, H, V, scalar, dtype, atol, rtol, bias, ref_bias, igno
     torch_lm_head_dpo.lin.weight.data = liger_lm_head_dpo.lin.weight.data = torch.randn(
         V, H, device="cuda", dtype=dtype
     )
-    torch_lm_head_dpo.ref_lin.weight.data = liger_lm_head_dpo.ref_lin.weight.data = torch.randn(
-        V, H, device="cuda", dtype=dtype
+    torch_lm_head_dpo.ref_lin.weight.data = liger_lm_head_dpo.ref_lin.weight.data = (
+        torch.randn(V, H, device="cuda", dtype=dtype)
     )
 
     if bias:
         torch_lm_head_dpo.lin.bias.data = liger_lm_head_dpo.lin.bias.data = torch.randn(
             V, device="cuda", dtype=dtype
         )
     if ref_bias:
-        torch_lm_head_dpo.ref_lin.bias.data = liger_lm_head_dpo.ref_lin.bias.data = torch.randn(
-            V, device="cuda", dtype=dtype
+        torch_lm_head_dpo.ref_lin.bias.data = liger_lm_head_dpo.ref_lin.bias.data = (
+            torch.randn(V, device="cuda", dtype=dtype)
         )
 
     _input = torch.randn(B, T, H, device="cuda", dtype=dtype) * scalar
@@ -244,8 +256,12 @@ def test_correctness_functional(B, T, H, V, scalar, dtype, atol, rtol, bias, ref
     ref_bias1 = _ref_bias.detach().clone().requires_grad_(True) if ref_bias else None
     ref_bias2 = _ref_bias.detach().clone().requires_grad_(True) if ref_bias else None
 
-    loss1 = LigerFusedLinearDPOFunction.apply(input1, weight1, target, bias1, ref_weight1, ref_bias1)
-    loss2 = liger_fused_linear_dpo(input2, weight2, target, bias2, ref_weight2, ref_bias2)
+    loss1 = LigerFusedLinearDPOFunction.apply(
+        input1, weight1, target, bias1, ref_weight1, ref_bias1
+    )
+    loss2 = liger_fused_linear_dpo(
+        input2, weight2, target, bias2, ref_weight2, ref_bias2
+    )
 
     assert_verbose_allclose(loss1, loss2, atol=atol, rtol=rtol)
 
diff --git a/test/utils.py b/test/utils.py
@@ -355,7 +355,13 @@ def revert_liger_kernel_to_phi3(model_config: MiniModelConfig):
 
 class HFAlignmentLoss:
 
-    def __init__(self, alpha: float = 1.0, beta: float = 0.1, ignore_index: int = -100, use_ref_model: bool = False):
+    def __init__(
+        self,
+        alpha: float = 1.0,
+        beta: float = 0.1,
+        ignore_index: int = -100,
+        use_ref_model: bool = False,
+    ):
         self.alpha = alpha
         self.beta = beta
         self.ignore_index = ignore_index
@@ -414,8 +420,13 @@ def get_ref_logps(
         ref_logits = _input @ ref_weight.t()
         if ref_bias is not None:
             ref_logits = ref_logits + ref_bias
-        ref_all_logps = self.get_batch_logps(ref_logits, target, average_log_prob=average_log_prob)
-        return ref_all_logps[:_input.shape[0] // 2], ref_all_logps[_input.shape[0] // 2:]
+        ref_all_logps = self.get_batch_logps(
+            ref_logits, target, average_log_prob=average_log_prob
+        )
+        return (
+            ref_all_logps[: _input.shape[0] // 2],
+            ref_all_logps[_input.shape[0] // 2 :],
+        )
 
     def concatenated_forward(
         self,
@@ -503,7 +514,9 @@ def get_batch_loss_metrics(
             )
             loss_kwargs["ref_chosen_logps"] = ref_chosen_logps
             loss_kwargs["ref_rejected_logps"] = ref_rejected_logps
-        losses = self.alignment_loss(policy_chosen_logps, policy_rejected_logps, **loss_kwargs)
+        losses = self.alignment_loss(
+            policy_chosen_logps, policy_rejected_logps, **loss_kwargs
+        )
         # full loss
         loss = policy_nll_loss * self.alpha - losses.mean()
         return loss