Allow n-tuples for CE MarginMSE training

tomaarsen · tomaarsen · commit af58c7fec217 · 2025-08-06T13:22:37.000+02:00
diff --git a/sentence_transformers/cross_encoder/losses/MarginMSELoss.py b/sentence_transformers/cross_encoder/losses/MarginMSELoss.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import torch
 from torch import Tensor, nn
 
 from sentence_transformers.cross_encoder.CrossEncoder import CrossEncoder
@@ -92,37 +93,65 @@ def compute_labels(batch):
                 f"but got a model with {self.model.num_labels} output labels."
             )
 
-    def forward(self, inputs: list[list[str]], labels: Tensor) -> Tensor:
-        if len(inputs) != 3:
+    def forward(self, inputs: list[list[str]], labels: Tensor | list[Tensor]) -> Tensor:
+        anchors = inputs[0]
+        positives = inputs[1]
+        negatives = inputs[2:]
+        batch_size = len(anchors)
+
+        # If there's multiple scores, then `labels` is a list of tensors. We need to stack them into
+        # a single tensor of shape (batch_size, num_columns - 1)
+        if isinstance(labels, list):
+            labels = torch.stack(labels, dim=1).T
+
+        if labels.shape == (batch_size, len(negatives) + 1):
+            # If labels are given as a single score for positive and multiple negatives,
+            # we need to adjust the labels to be the difference between positive and negatives
+            labels = labels[:, 0].unsqueeze(1) - labels[:, 1:]
+
+        # Ensure the shape is (batch_size, num_negatives)
+        if labels.shape == (batch_size,):
+            labels = labels.unsqueeze(1)
+
+        if labels.shape != (batch_size, len(negatives)):
             raise ValueError(
-                f"MSELoss expects a dataset with three non-label columns, but got a dataset with {len(inputs)} columns."
+                f"Labels shape {labels.shape} does not match expected shape {(batch_size, len(negatives))}. "
+                "Ensure that your dataset labels/scores are 1) lists of differences between positive scores and "
+                "negatives scores (length `num_negatives`), or 2) lists of positive and negative scores "
+                "(length `num_negatives + 1`)."
             )
 
-        positive_pairs = list(zip(inputs[0], inputs[1]))
-        tokens = self.model.tokenizer(
-            positive_pairs,
-            padding=True,
-            truncation=True,
-            return_tensors="pt",
-        )
-        tokens.to(self.model.device)
-        positive_logits = self.model(**tokens)[0].view(-1)
-        positive_logits = self.activation_fn(positive_logits)
+        positive_pairs = list(zip(anchors, positives))
+        positive_logits = self.logits_from_pairs(positive_pairs)
+        negative_logits_list = []
+        for negative in negatives:
+            negative_pairs = list(zip(anchors, negative))
+            negative_logits_list.append(self.logits_from_pairs(negative_pairs))
 
-        negative_pairs = list(zip(inputs[0], inputs[2]))
+        margin_logits = [positive_logits - negative_logits for negative_logits in negative_logits_list]
+        margin_logits = torch.stack(margin_logits, dim=1)
+        loss = self.loss_fct(margin_logits, labels.float())
+        return loss
+
+    def logits_from_pairs(self, pairs: list[tuple[str, str]]) -> Tensor:
+        """
+        Computes the logits for a list of pairs using the model.
+
+        Args:
+            pairs (list[tuple[str, str]]): A list of pairs of strings (query, passage).
+
+        Returns:
+            Tensor: The logits for the pairs.
+        """
         tokens = self.model.tokenizer(
-            negative_pairs,
+            pairs,
             padding=True,
             truncation=True,
             return_tensors="pt",
         )
         tokens.to(self.model.device)
-        negative_logits = self.model(**tokens)[0].view(-1)
-        negative_logits = self.activation_fn(negative_logits)
-
-        margin_logits = positive_logits - negative_logits
-        loss = self.loss_fct(margin_logits, labels.float())
-        return loss
+        logits = self.model(**tokens)[0].view(-1)
+        return self.activation_fn(logits)
 
     def get_config_dict(self):
         return {
diff --git a/sentence_transformers/losses/MarginMSELoss.py b/sentence_transformers/losses/MarginMSELoss.py
@@ -187,9 +187,8 @@ def compute_loss_from_embeddings(self, embeddings: list[Tensor], labels: Tensor)
             # we need to adjust the labels to be the difference between positive and negatives
             labels = labels[:, 0].unsqueeze(1) - labels[:, 1:]
 
+        # Ensure the shape is (batch_size, num_negatives)
         if labels.shape == (batch_size,):
-            # If labels are given as a single score for positive and multiple negatives,
-            # we need to adjust the labels to be the difference between positive and negatives
             labels = labels.unsqueeze(1)
 
         if labels.shape != (batch_size, len(embeddings_negs)):