Skip to content

debert TypeError: _softmax_backward_data(): argument 'input_dtype' (position 4) must be torch.dtype, not Tensor #16587

@AiIsBetter

Description

@AiIsBetter

def get_training_corpus():
dataset = list(data["msg"])
for start_idx in range(0, len(dataset), 1000):
samples = dataset[start_idx: start_idx + 1000]
yield samples

DOWNLOADED_MODEL_PATH = 'model'
MODEL_NAME = 'microsoft/deberta-base'
if DOWNLOADED_MODEL_PATH=='model':
os.makedirs('model', exist_ok=True)
old_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
training_corpus = get_training_corpus()
BertTokenizer_tokenizer = old_tokenizer.train_new_from_iterator(training_corpus, 10000)
BertTokenizer_tokenizer.save_pretrained('model')
BertTokenizer_tokenizer = AutoTokenizer.from_pretrained('model')
config = DebertaConfig.from_pretrained(MODEL_NAME)
config.save_pretrained('model')
# config = BertConfig.from_pretrained('aaa_model')
else:
BertTokenizer_tokenizer = AutoTokenizer.from_pretrained('model')
config = DebertaConfig.from_pretrained('model')
dataset = LineByLineTextDataset(
tokenizer=BertTokenizer_tokenizer,
file_path="aa.txt",
block_size=512,
)
data_collator = DataCollatorForLanguageModeling(tokenizer=BertTokenizer_tokenizer,mlm=True, mlm_probability=0.15)
persian_model = DebertaForMaskedLM(config=config)

batch_size = 15
training_args = TrainingArguments(
output_dir='aaa_debert_model',
overwrite_output_dir=True,
num_train_epochs=3,
learning_rate=5e-05,
per_device_train_batch_size=batch_size,
save_steps=len(dataset) / batch_size,
save_total_limit=2,
prediction_loss_only=True
)

trainer = Trainer(
model=persian_model,
args=training_args,
data_collator=data_collator,
train_dataset=dataset

)

trainer.train()
trainer.save_model('aaa_debert_model/pretrain')
BertTokenizer_tokenizer.save_pretrained('aaa_debert_model/aaa_model')

error message:
trainer.train()
File "C:\Users\HP.conda\envs\tensorflow26-py37\lib\site-packages\transformers\trainer.py", line 1332, in train
tr_loss_step = self.training_step(model, inputs)
File "C:\Users\HP.conda\envs\tensorflow26-py37\lib\site-packages\transformers\trainer.py", line 1909, in training_step
loss.backward()
File "C:\Users\HP.conda\envs\tensorflow26-py37\lib\site-packages\torch_tensor.py", line 363, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "C:\Users\HP.conda\envs\tensorflow26-py37\lib\site-packages\torch\autograd_init_.py", line 175, in backward
allow_unreachable=True, accumulate_grad=True) # Calls into the C++ engine to run the backward pass
File "C:\Users\HP.conda\envs\tensorflow26-py37\lib\site-packages\torch\autograd\function.py", line 253, in apply
return user_fn(self, *args)
File "C:\Users\HP.conda\envs\tensorflow26-py37\lib\site-packages\transformers\models\deberta\modeling_deberta.py", line 114, in backward
inputGrad = _softmax_backward_data(grad_output, output, self.dim, output)
TypeError: _softmax_backward_data(): argument 'input_dtype' (position 4) must be torch.dtype, not Tensor

bert roberta no problem。Am I doing something wrong? Python 3.9 transformers 4.15.0 win10 torch 1.11.0

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions