fix gpt with paddle.matmul (PaddlePaddle#3483)

ZHUI · web-flow · commit 90c6f1da59ad · 2022-10-17T11:46:14.000+08:00
diff --git a/paddlenlp/transformers/gpt/modeling.py b/paddlenlp/transformers/gpt/modeling.py
@@ -198,10 +198,9 @@ def forward(self,
             q, k, v, cache = self._prepare_qkv(query, key, value, use_cache,
                                                cache)
         # scale dot product attention
-        product = layers.matmul(x=q,
+        product = paddle.matmul(x=q * (self.head_dim**-0.5),
                                 y=k,
-                                transpose_y=True,
-                                alpha=self.head_dim**-0.5)
+                                transpose_y=True)
 
         if attn_mask is not None:
             product = product + attn_mask