Skip to content

Commit ac7c17f

Browse files
yuanlehomeMangodadada
authored andcommitted
fix llama3 static run (PaddlePaddle#8849)
1 parent a2bf616 commit ac7c17f

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

llm/predict/predictor.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ def _preprocess(self, source):
179179
source,
180180
max_length=self.config.src_length,
181181
truncation=True,
182+
return_position_ids=True if not isinstance(self.tokenizer, ChatGLMTokenizer) else False,
182183
truncation_side="left",
183184
return_tensors=self.return_tensors,
184185
padding=True,
@@ -305,6 +306,9 @@ def __init__(self, config: PredictorArgument, tokenizer: PretrainedTokenizer = N
305306
inference_config.disable_gpu()
306307
inference_config.disable_glog_info()
307308
inference_config.enable_new_executor()
309+
# remove `gpu_cpu_map_matmul_v2_to_matmul_pass` to avoid mapping matmul_v2 -> matmul op
310+
if config.dtype == "bfloat16":
311+
inference_config.delete_pass("gpu_cpu_map_matmul_v2_to_matmul_pass")
308312
if in_pir_executor_mode():
309313
inference_config.enable_new_ir()
310314
if in_cinn_mode():

0 commit comments

Comments
 (0)