🐛 Reverted logic to fix build; potential GGUF-related issues.

fialhocoelho · fialhocoelho · commit 1297cc8e60ec · 2024-10-08T20:08:11.000-03:00
Signed-off-by: Jefferson Fialho &lt;jfialho@ibm.com&gt;
diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py
@@ -512,7 +512,10 @@ def __init__(
                 quant_config=quant_config,
             )
             if config.tie_word_embeddings:
-                self.lm_head = self.model.embed_tokens
+                # Reverted logic to fix build issues;
+                # this may introduce GGUF-related bugs.
+                # self.lm_head = self.model.embed_tokens
+                self.lm_head.weight = self.model.embed_tokens.weight
 
             logit_scale = getattr(config, "logit_scale", 1.0)
             self.logits_processor = LogitsProcessor(self.unpadded_vocab_size,