fix some bugs

bukejiyu · bukejiyu · commit 02988089d2fa · 2024-10-23T15:32:25.000+08:00
diff --git a/csrc/cpu/README.md b/csrc/cpu/README.md
@@ -1,15 +1,16 @@
 # cpu-custom-ops
 
 ## 快速开始
-
-### 1.环境准备
+### 1. 详细 cpu 推理教程
+    [cpu](../../llm/docs/cpu_install.md)
+###
+### 2.环境准备
 ```shell
-cmake >=3.18
 # 查询机器是否支持 avx512指令
 lscpu | grep avx512*
 ```
 
-### 2.安装 cpu 自定义算子和第三方库
+### 3.安装 cpu 自定义算子和第三方库
 ```shell
 #建议在 gcc 9.4.0 下安装第三方库
 bash setup.sh
diff --git a/csrc/cpu/setup.sh b/csrc/cpu/setup.sh
@@ -55,12 +55,12 @@ rm -rf build
 mkdir build && cd build
 cmake ..
 make -j
+cd ..
 
 #xft
 export XFT_HEADER_DIR=$PWD
 export XFT_LIB_DIR=$XFT_HEADER_DIR/build
 export LD_LIBRARY_PATH=$XFT_LIB_DIR:$LD_LIBRARY_PATH
-
 #setup cpu paddle_nlp ops
-cd ../../
-python ./src/setup_cpu.py install
+cd ..
+python ./src/setup_cpu.py install --user
diff --git a/csrc/cpu/src/setup_cpu.py b/csrc/cpu/src/setup_cpu.py
@@ -112,7 +112,7 @@ def check_avx512_bf16__support():
 
 custom_kernel_dot_module = CppExtension(
     sources=[
-        "../generation/save_with_output.cc",
+        "../gpu/save_with_output.cc",
         "./src/token_penalty_multi_scores.cc",
         "./src/stop_generation_multi_ends.cc",
         "./src/set_value_by_flags.cc",
@@ -129,6 +129,6 @@ def check_avx512_bf16__support():
 setup(
     name="paddlenlp_ops",
     version="1.0",
-    description="custom kernel fot compiling",
+    description="custom kernel for compiling",
     ext_modules=[custom_kernel_dot_module],
 )
diff --git a/paddlenlp/experimental/transformers/fused_transformer_layers.py b/paddlenlp/experimental/transformers/fused_transformer_layers.py
@@ -40,7 +40,10 @@
         "The paddlenlp_ops package is not installed. you can read the docs and install it by hand, "
         "you can refer to: https://github.com/PaddlePaddle/PaddleNLP/blob/develop/csrc/README.md"
     )
-from paddlenlp_ops import rebuild_padding_v2
+if (
+    paddle.device.get_all_custom_device_type() is not None and len(paddle.device.get_all_custom_device_type()) > 0
+) or core.is_compiled_with_cuda():
+    from paddlenlp_ops import rebuild_padding_v2
 
 if core.is_compiled_with_cuda():
     if os.getenv("FLAGS_CUTLASS_FP8_GEMM", "False") == "True":
diff --git a/paddlenlp/experimental/transformers/llama/modeling.py b/paddlenlp/experimental/transformers/llama/modeling.py
@@ -291,7 +291,6 @@ def forward(
     @paddle.no_grad()
     # avx
     def set_state_dict(self, state_dict):
-        self.transformer_block.init_weight()
         unfused_state_dict = {}
         head_size = self.hidden_size // self.num_attention_heads
         split_fn = split_param_func()