We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 556f1ac commit c67ba66Copy full SHA for c67ba66
paddlenlp/trainer/trainer.py
@@ -992,6 +992,10 @@ def _inner_training_loop(
992
else:
993
tr_loss_step = self.training_step(model, inputs)
994
995
+ if not args.fp16:
996
+ if not paddle.isfinite(tr_loss_step).all().item():
997
+ raise ValueError(f"Loss contains inf or nan values at rank {paddle.distributed.get_rank()}")
998
+
999
tr_loss += tr_loss_step
1000
1001
def fused_allreduce_gradients_no_sync(paramlist, hcg):
0 commit comments