Skip to content

Commit fe19531

Browse files
committed
fix
1 parent d46655c commit fe19531

File tree

2 files changed

+6
-5
lines changed

2 files changed

+6
-5
lines changed

paddlenlp/transformers/llama/tokenizer.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,7 @@ def __init__(
7272
self.add_bos_token = add_bos_token
7373
self.add_eos_token = add_eos_token
7474
self.decode_with_prefix_space = decode_with_prefix_space
75-
# self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
76-
self.sp_model = self.get_spm_processor(kwargs.pop("from_slow", False))
77-
self.sp_model.Load(vocab_file)
75+
self.sp_model = self.get_spm_processor(kwargs.pop("from_slow", True))
7876

7977
@property
8078
def vocab_size(self):
@@ -101,7 +99,7 @@ def bos_token_id(self) -> Optional[int]:
10199
def eos_token_id(self) -> Optional[int]:
102100
return self.sp_model.eos_id()
103101

104-
def get_spm_processor(self, from_slow=False):
102+
def get_spm_processor(self, from_slow=True):
105103
tokenizer = spm.SentencePieceProcessor(**self.sp_model_kwargs)
106104
if from_slow: # no dependency on protobuf
107105
tokenizer.Load(self.vocab_file)

paddlenlp/transformers/tokenizer_utils_base.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1600,7 +1600,10 @@ def _from_pretrained(
16001600
from_hf_hub=False,
16011601
**kwargs,
16021602
):
1603-
from_slow = kwargs.get("from_slow", False)
1603+
if cls.__name__.endswith("Fast"):
1604+
from_slow = kwargs.get("from_slow", False)
1605+
else:
1606+
from_slow = kwargs.get("from_slow", True)
16041607
has_tokenizer_file = resolved_vocab_files.get("tokenizer_file", None) is not None
16051608
if (from_slow or not has_tokenizer_file) and cls.slow_tokenizer_class is not None:
16061609
slow_tokenizer = (cls.slow_tokenizer_class)._from_pretrained(

0 commit comments

Comments
 (0)