fix npu nn.Pad2D() (#5167)

zws-2019 · web-flow · commit b8d23a12b281 · 2023-03-09T23:29:31.000+08:00
diff --git a/paddlenlp/transformers/generation_utils.py b/paddlenlp/transformers/generation_utils.py
@@ -409,8 +409,14 @@ def update_model_kwargs_for_generation(outputs, model_kwargs, is_encoder_decoder
             if convert_dtype(attention_mask.dtype) == "bool":
                 attention_mask = paddle.cast(attention_mask, "int64")
             if len(attention_mask.shape) == 4:
-                attention_mask = nn.Pad2D([0, 0, 0, 1], mode="replicate")(attention_mask)
-                attention_mask = nn.Pad2D([0, 1, 0, 0], value=-1e4)(attention_mask)
+                cur_device = paddle.get_device()
+                if cur_device.split(":")[0] == "npu":
+                    attention_mask = nn.Pad2D([0, 0, 0, 1], mode="constant")(attention_mask)
+                    attention_mask = nn.Pad2D([0, 1, 0, 0], value=0)(attention_mask)
+                else:
+                    attention_mask = nn.Pad2D([0, 0, 0, 1], mode="replicate")(attention_mask)
+                    attention_mask = nn.Pad2D([0, 1, 0, 0], value=-1e4)(attention_mask)
+
                 dtype = convert_dtype(attention_mask.dtype)
                 if "int" in dtype:
                     attention_mask[:, :, -1, -1] = 1