File tree Expand file tree Collapse file tree 2 files changed +6
-5
lines changed Expand file tree Collapse file tree 2 files changed +6
-5
lines changed Original file line number Diff line number Diff line change @@ -72,9 +72,7 @@ def __init__(
7272 self .add_bos_token = add_bos_token
7373 self .add_eos_token = add_eos_token
7474 self .decode_with_prefix_space = decode_with_prefix_space
75- # self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
76- self .sp_model = self .get_spm_processor (kwargs .pop ("from_slow" , False ))
77- self .sp_model .Load (vocab_file )
75+ self .sp_model = self .get_spm_processor (kwargs .pop ("from_slow" , True ))
7876
7977 @property
8078 def vocab_size (self ):
@@ -101,7 +99,7 @@ def bos_token_id(self) -> Optional[int]:
10199 def eos_token_id (self ) -> Optional [int ]:
102100 return self .sp_model .eos_id ()
103101
104- def get_spm_processor (self , from_slow = False ):
102+ def get_spm_processor (self , from_slow = True ):
105103 tokenizer = spm .SentencePieceProcessor (** self .sp_model_kwargs )
106104 if from_slow : # no dependency on protobuf
107105 tokenizer .Load (self .vocab_file )
Original file line number Diff line number Diff line change @@ -1600,7 +1600,10 @@ def _from_pretrained(
16001600 from_hf_hub = False ,
16011601 ** kwargs ,
16021602 ):
1603- from_slow = kwargs .get ("from_slow" , False )
1603+ if cls .__name__ .endswith ("Fast" ):
1604+ from_slow = kwargs .get ("from_slow" , False )
1605+ else :
1606+ from_slow = kwargs .get ("from_slow" , True )
16041607 has_tokenizer_file = resolved_vocab_files .get ("tokenizer_file" , None ) is not None
16051608 if (from_slow or not has_tokenizer_file ) and cls .slow_tokenizer_class is not None :
16061609 slow_tokenizer = (cls .slow_tokenizer_class )._from_pretrained (
You can’t perform that action at this time.
0 commit comments