-
Notifications
You must be signed in to change notification settings - Fork 30.2k
Description
def get_training_corpus():
dataset = list(data["msg"])
for start_idx in range(0, len(dataset), 1000):
samples = dataset[start_idx: start_idx + 1000]
yield samples
DOWNLOADED_MODEL_PATH = 'model'
MODEL_NAME = 'microsoft/deberta-base'
if DOWNLOADED_MODEL_PATH=='model':
os.makedirs('model', exist_ok=True)
old_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
training_corpus = get_training_corpus()
BertTokenizer_tokenizer = old_tokenizer.train_new_from_iterator(training_corpus, 10000)
BertTokenizer_tokenizer.save_pretrained('model')
BertTokenizer_tokenizer = AutoTokenizer.from_pretrained('model')
config = DebertaConfig.from_pretrained(MODEL_NAME)
config.save_pretrained('model')
# config = BertConfig.from_pretrained('aaa_model')
else:
BertTokenizer_tokenizer = AutoTokenizer.from_pretrained('model')
config = DebertaConfig.from_pretrained('model')
dataset = LineByLineTextDataset(
tokenizer=BertTokenizer_tokenizer,
file_path="aa.txt",
block_size=512,
)
data_collator = DataCollatorForLanguageModeling(tokenizer=BertTokenizer_tokenizer,mlm=True, mlm_probability=0.15)
persian_model = DebertaForMaskedLM(config=config)
batch_size = 15
training_args = TrainingArguments(
output_dir='aaa_debert_model',
overwrite_output_dir=True,
num_train_epochs=3,
learning_rate=5e-05,
per_device_train_batch_size=batch_size,
save_steps=len(dataset) / batch_size,
save_total_limit=2,
prediction_loss_only=True
)
trainer = Trainer(
model=persian_model,
args=training_args,
data_collator=data_collator,
train_dataset=dataset
)
trainer.train()
trainer.save_model('aaa_debert_model/pretrain')
BertTokenizer_tokenizer.save_pretrained('aaa_debert_model/aaa_model')
error message:
trainer.train()
File "C:\Users\HP.conda\envs\tensorflow26-py37\lib\site-packages\transformers\trainer.py", line 1332, in train
tr_loss_step = self.training_step(model, inputs)
File "C:\Users\HP.conda\envs\tensorflow26-py37\lib\site-packages\transformers\trainer.py", line 1909, in training_step
loss.backward()
File "C:\Users\HP.conda\envs\tensorflow26-py37\lib\site-packages\torch_tensor.py", line 363, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "C:\Users\HP.conda\envs\tensorflow26-py37\lib\site-packages\torch\autograd_init_.py", line 175, in backward
allow_unreachable=True, accumulate_grad=True) # Calls into the C++ engine to run the backward pass
File "C:\Users\HP.conda\envs\tensorflow26-py37\lib\site-packages\torch\autograd\function.py", line 253, in apply
return user_fn(self, *args)
File "C:\Users\HP.conda\envs\tensorflow26-py37\lib\site-packages\transformers\models\deberta\modeling_deberta.py", line 114, in backward
inputGrad = _softmax_backward_data(grad_output, output, self.dim, output)
TypeError: _softmax_backward_data(): argument 'input_dtype' (position 4) must be torch.dtype, not Tensor
bert roberta no problem。Am I doing something wrong? Python 3.9 transformers 4.15.0 win10 torch 1.11.0