Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions paddlenlp/generation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1211,6 +1211,8 @@ def sample(
probs = TopPProcess(probs, top_p, min_tokens_to_keep)
if paddle.device.is_compiled_with_custom_device("gcu"):
probs = paddle.cast(probs, "float32")
if paddle.device.is_compiled_with_xpu():
probs = paddle.cast(probs, "float32")

# multinomial already support fp16 and bf16 currently, fix issue: https://github.com/PaddlePaddle/Paddle/issues/51852
next_tokens = paddle.multinomial(probs)
Expand Down
7 changes: 5 additions & 2 deletions paddlenlp/peft/lora/lora_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
load_state_dict,
)
from ...transformers.utils import get_checkpoint_shard_files, weight_name_suffix
from ...utils.distributed import distributed_gather
from ...utils.distributed import distributed_allgather, distributed_gather
from ...utils.env import LORA_WEIGHTS_NAME, SAFE_PEFT_WEIGHTS_INDEX_NAME
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

在XPU和GPU的接口里面可以直接同时import distributed_allgather 和 distributed_gather 吗

Copy link
Contributor Author

@houj04 houj04 Jul 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

参考代码是:#8697
改这个的原因是,XPU目前不支持gather。
gather和allgather对于rank0是一样的,对于非rank0不同。但是代码逻辑中有is_dst的控制,非rank0的反正都会扔掉不要,所以就直接替换了。

from ...utils.log import logger
from ...utils.tools import get_env_device
Expand Down Expand Up @@ -329,7 +329,10 @@ def _merge_trainable_tensor_parallel(self, trainable_state_dict):
for key in trainable_state_dict:
tensor = trainable_state_dict[key]
if key in trainable_name_action_mappings:
ret = distributed_gather(tensor, group=mp_group, offload=True)
if get_env_device() == "xpu":
ret = distributed_allgather(tensor, group=mp_group, offload=True)
else:
ret = distributed_gather(tensor, group=mp_group, offload=True)
action = trainable_name_action_mappings[key]
if key in self.lora_split_mapping and not self.lora_split_mapping[key] and "_scale" in key and is_dst:
ret = paddle.to_tensor(ret)
Expand Down