Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 7 additions & 8 deletions paddlenlp/experimental/transformers/chatglm/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from paddlenlp.experimental.transformers.generation_utils import (
GenerationInferenceModel,
)
from paddlenlp.experimental.transformers.utils import infererence_model_from_pretrained
from paddlenlp.transformers import ChatGLMConfig, ChatGLMPretrainedModel
from paddlenlp.transformers.model_outputs import (
BaseModelOutputWithPastAndCrossAttentions,
Expand Down Expand Up @@ -388,20 +389,20 @@ def set_state_dict(self, state_dict, use_structured_name=True):
head_dim = embed_dim // config.num_attention_heads

for k, v in state_dict.items():
if k.startswith("transformer.word_embeddings.weight"):
if k.startswith("chatglm.transformer.word_embeddings.weight"):
self.word_embeddings.weight.set_value(v.astype(dtype))
continue
elif k.startswith("transformer.final_layernorm.weight"):
elif k.startswith("chatglm.transformer.final_layernorm.weight"):
self.transformer_block.ffn_ln_scales[config.num_hidden_layers - 1].set_value(v.astype("float32"))
continue
elif k.startswith("transformer.final_layernorm.bias"):
elif k.startswith("chatglm.transformer.final_layernorm.bias"):
self.transformer_block.ffn_ln_biases[config.num_hidden_layers - 1].set_value(v.astype("float32"))
continue
elif k.startswith("lm_head.weight"):
continue
elif k.endswith("rotary_embeddings.inv_freq") or k.endswith("rotary_emb.inv_freq"):
continue
idx = int(k.split(".")[2])
idx = int(k.split(".")[3])
if k.endswith("input_layernorm.weight"):
if idx == 0:
self.input_layernorm.weight.set_value(v.astype(dtype))
Expand Down Expand Up @@ -583,9 +584,7 @@ def __init__(self, config: ChatGLMConfig):

@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
# TODO: Support safetensors loading.
kwargs["use_safetensors"] = False
return super().from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
return infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs, return_numpy=False)

@classmethod
def get_cache_kvs_shape(
Expand Down Expand Up @@ -746,6 +745,6 @@ def forward(
@paddle.no_grad()
def set_state_dict(self, state_dict):
self.lm_head.weight.set_value(
state_dict["transformer.word_embeddings.weight"].astype(self.lm_head.weight.dtype)
state_dict["chatglm.transformer.word_embeddings.weight"].astype(self.lm_head.weight.dtype)
)
self.model.transformer.set_state_dict({k: state_dict[k] for k in state_dict.keys()})
5 changes: 2 additions & 3 deletions paddlenlp/experimental/transformers/gpt/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from paddlenlp.experimental.transformers.generation_utils import (
GenerationInferenceModel,
)
from paddlenlp.experimental.transformers.utils import infererence_model_from_pretrained
from paddlenlp.transformers import GPTConfig, GPTPretrainedModel
from paddlenlp.transformers.gpt.modeling import GPTEmbeddings, parallel_matmul
from paddlenlp.transformers.model_outputs import (
Expand Down Expand Up @@ -446,9 +447,7 @@ def __init__(self, config):

@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
# TODO: Support safetensors loading.
kwargs["use_safetensors"] = False
return super().from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
return infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs)

@classmethod
def get_cache_kvs_shape(
Expand Down
59 changes: 4 additions & 55 deletions paddlenlp/experimental/transformers/llama/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
GenerationBlockInferenceModel,
GenerationInferenceModel,
)
from paddlenlp.experimental.transformers.utils import infererence_model_from_pretrained
from paddlenlp.transformers import LlamaConfig, LlamaPretrainedModel
from paddlenlp.transformers.conversion_utils import split_param_func
from paddlenlp.transformers.llama.modeling import LlamaLMHead
Expand Down Expand Up @@ -1139,9 +1140,7 @@ def __init__(self, config):

@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
# TODO: Support safetensors loading.
kwargs["use_safetensors"] = False
return super().from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
return infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs)

@classmethod
def get_cache_kvs_shape(
Expand Down Expand Up @@ -1238,9 +1237,7 @@ def __init__(self, config):

@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
# TODO: Support safetensors loading.
kwargs["use_safetensors"] = False
return super().from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
return infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs)

@classmethod
def get_cache_kvs_shape(
Expand Down Expand Up @@ -1477,55 +1474,7 @@ def get_tensor_parallel_split_mappings(num_layers):

@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
# TODO: Support safetensors loading.
kwargs["use_safetensors"] = False
from paddlenlp.transformers.utils import (
ContextManagers,
is_safetensors_available,
)

from_hf_hub = kwargs.pop("from_hf_hub", False)
config = kwargs.pop("config", None)
from_aistudio = kwargs.get("from_aistudio", False)
subfolder = kwargs.get("subfolder", None)
variant = kwargs.pop("variant", None)
use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False)
convert_from_torch = kwargs.pop("convert_from_torch", None)
cache_dir = kwargs.pop("cache_dir", None)

init_contexts = []
with ContextManagers(init_contexts):
model = cls(config)

if not config.single_card_ptq:
resolved_archive_file = pretrained_model_name_or_path
else:
resolved_archive_file = cls._resolve_model_file_path(
pretrained_model_name_or_path,
cache_dir=cache_dir,
subfolder=subfolder,
from_hf_hub=from_hf_hub,
from_aistudio=from_aistudio,
config=config,
convert_from_torch=convert_from_torch,
use_safetensors=use_safetensors,
variant=variant,
)[0]
logger.info(f"Load model form {resolved_archive_file}")

if config.tensor_parallel_degree > 1 and config.single_card_ptq:
logger.info(f"convert_tensor_parallel {config.tensor_parallel_degree}")
model.state_dict = model.convert_tensor_parallel(resolved_archive_file, config)
elif config.tensor_parallel_degree > 1:
resolved_archive_file = os.path.join(
resolved_archive_file, f"mp_{config.tensor_parallel_rank:0>2d}_sharding_00_pp_00", "model.pdparams"
)
model.state_dict = paddle.load(resolved_archive_file, return_numpy=True)
else:
model.state_dict = paddle.load(resolved_archive_file, return_numpy=True)
model.set_state_dict(model.state_dict)

return model
return infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs)

@classmethod
def get_cache_kvs_shape(
Expand Down
5 changes: 2 additions & 3 deletions paddlenlp/experimental/transformers/opt/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from paddlenlp.experimental.transformers.generation_utils import (
GenerationInferenceModel,
)
from paddlenlp.experimental.transformers.utils import infererence_model_from_pretrained
from paddlenlp.transformers import OPTPretrainedModel
from paddlenlp.transformers.model_utils import (
dy2st_nocheck_guard_context,
Expand Down Expand Up @@ -329,9 +330,7 @@ def __init__(self, config: OPTConfig, **kwargs):

@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
# TODO: Support safetensors loading.
kwargs["use_safetensors"] = kwargs.get("use_safetensors", False)
return super().from_pretrained(pretrained_model_name_or_path, *args, **kwargs)
return infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs)

@classmethod
def get_cache_kvs_shape(
Expand Down
9 changes: 3 additions & 6 deletions paddlenlp/experimental/transformers/qwen/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from paddlenlp.experimental.transformers.generation_utils import (
GenerationInferenceModel,
)
from paddlenlp.experimental.transformers.utils import infererence_model_from_pretrained
from paddlenlp.transformers import QWenConfig, QWenPretrainedModel
from paddlenlp.transformers.model_outputs import (
BaseModelOutputWithPast,
Expand Down Expand Up @@ -377,12 +378,8 @@ def set_output_embeddings(self, new_embeddings):
self.lm_head = new_embeddings

@classmethod
def from_pretrained(
cls, pretrained_model_name_or_path, from_hf_hub: bool = False, subfolder: str | None = None, *args, **kwargs
):
# TODO: Support safetensors loading.
kwargs["use_safetensors"] = False
return super().from_pretrained(pretrained_model_name_or_path, from_hf_hub, subfolder, *args, **kwargs)
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
return infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs)

@classmethod
def get_cache_kvs_shape(
Expand Down
77 changes: 77 additions & 0 deletions paddlenlp/experimental/transformers/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations

import os

import paddle

from paddlenlp.transformers.model_utils import (
dtype_guard,
load_tp_checkpoint,
no_init_weights,
)
from paddlenlp.transformers.utils import (
ContextManagers,
is_paddle_support_lazy_init,
is_safetensors_available,
)


def infererence_model_from_pretrained(cls, pretrained_model_name_or_path, args, kwargs, return_numpy=True):
r"""
Instantiate a pretrained model configuration from a pre-trained model name or path.
"""
config = kwargs.pop("config", None)
cache_dir = kwargs.pop("cache_dir", None)
dtype = kwargs.pop("dtype", None)
if dtype is None:
dtype = config.dtype
subfolder = kwargs.pop("subfolder", None)
if subfolder is None:
subfolder = ""
variant = kwargs.pop("variant", None)
use_safetensors = kwargs.pop("use_safetensors", None if is_safetensors_available() else False)
low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)

init_contexts = []
if low_cpu_mem_usage or config.quantization_config.is_weight_quantize():
# Instantiate model.
init_contexts.append(no_init_weights(_enable=True))
if is_paddle_support_lazy_init():
init_contexts.append(paddle.LazyGuard())
if dtype:
init_contexts.append(dtype_guard(dtype))

# init the model
with ContextManagers(init_contexts):
model = cls(config)

resolved_archive_file, _, _, _ = cls._resolve_model_file_path(
pretrained_model_name_or_path,
cache_dir=cache_dir,
subfolder=subfolder,
from_hf_hub=False,
from_aistudio=False,
config=config,
convert_from_torch=False,
use_safetensors=use_safetensors,
variant=variant,
)

model_path = os.path.dirname(resolved_archive_file)
state_dict = load_tp_checkpoint(model_path, cls, config, return_numpy=return_numpy)
model.set_state_dict(state_dict)

return model
Loading