[Model] Ignore rotary embed load for Cohere model (vllm-project#17319)

ekagra-ranjan · Mu Huai · commit 8f584395f41d · 2025-05-12T19:19:24.000+08:00
Signed-off-by: Mu Huai &lt;tianbowen.tbw@antgroup.com&gt;
diff --git a/vllm/model_executor/models/commandr.py b/vllm/model_executor/models/commandr.py
@@ -418,6 +418,10 @@ def load_weights(self, weights: Iterable[Tuple[str,
         loaded_params: Set[str] = set()
         for name, loaded_weight in weights:
 
+            # Skip loading rotary embeddings since vLLM has its own
+            if "rotary_emb.inv_freq" in name:
+                continue
+
             if (self.quant_config is not None and
                 (scale_name := self.quant_config.get_cache_scale(name))):
                 # Loading kv cache quantization scales