@@ -908,21 +908,21 @@ def set_output_embeddings(self, new_embeddings):
908
908
@replace_return_docstrings (output_type = CausalLMOutputWithCrossAttentions , config_class = _CONFIG_FOR_DOC )
909
909
def forward (
910
910
self ,
911
- input_ids = None ,
912
- attention_mask = None ,
913
- token_type_ids = None ,
914
- position_ids = None ,
915
- head_mask = None ,
916
- inputs_embeds = None ,
917
- encoder_hidden_states = None ,
918
- encoder_attention_mask = None ,
919
- labels = None ,
920
- past_key_values = None ,
921
- use_cache = None ,
922
- output_attentions = None ,
923
- output_hidden_states = None ,
924
- return_dict = None ,
925
- ):
911
+ input_ids : Optional [ torch . LongTensor ] = None ,
912
+ attention_mask : Optional [ torch . FloatTensor ] = None ,
913
+ token_type_ids : Optional [ torch . LongTensor ] = None ,
914
+ position_ids : Optional [ torch . LongTensor ] = None ,
915
+ head_mask : Optional [ torch . FloatTensor ] = None ,
916
+ inputs_embeds : Optional [ torch . FloatTensor ] = None ,
917
+ encoder_hidden_states : Optional [ torch . FloatTensor ] = None ,
918
+ encoder_attention_mask : Optional [ torch . FloatTensor ] = None ,
919
+ labels : Optional [ torch . LongTensor ] = None ,
920
+ past_key_values : Optional [ Tuple [ Tuple [ torch . FloatTensor ]]] = None ,
921
+ use_cache : Optional [ bool ] = None ,
922
+ output_attentions : Optional [ bool ] = None ,
923
+ output_hidden_states : Optional [ bool ] = None ,
924
+ return_dict : Optional [ bool ] = None ,
925
+ ) -> Union [ Tuple , CausalLMOutputWithCrossAttentions ] :
926
926
r"""
927
927
encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
928
928
Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
@@ -1069,19 +1069,19 @@ def set_output_embeddings(self, new_embeddings):
1069
1069
)
1070
1070
def forward (
1071
1071
self ,
1072
- input_ids = None ,
1073
- attention_mask = None ,
1074
- token_type_ids = None ,
1075
- position_ids = None ,
1076
- head_mask = None ,
1077
- inputs_embeds = None ,
1078
- encoder_hidden_states = None ,
1079
- encoder_attention_mask = None ,
1080
- labels = None ,
1081
- output_attentions = None ,
1082
- output_hidden_states = None ,
1083
- return_dict = None ,
1084
- ):
1072
+ input_ids : Optional [ torch . LongTensor ] = None ,
1073
+ attention_mask : Optional [ torch . FloatTensor ] = None ,
1074
+ token_type_ids : Optional [ torch . LongTensor ] = None ,
1075
+ position_ids : Optional [ torch . LongTensor ] = None ,
1076
+ head_mask : Optional [ torch . FloatTensor ] = None ,
1077
+ inputs_embeds : Optional [ torch . FloatTensor ] = None ,
1078
+ encoder_hidden_states : Optional [ torch . FloatTensor ] = None ,
1079
+ encoder_attention_mask : Optional [ torch . FloatTensor ] = None ,
1080
+ labels : Optional [ torch . LongTensor ] = None ,
1081
+ output_attentions : Optional [ bool ] = None ,
1082
+ output_hidden_states : Optional [ bool ] = None ,
1083
+ return_dict : Optional [ bool ] = None ,
1084
+ ) -> Union [ Tuple , MaskedLMOutput ] :
1085
1085
r"""
1086
1086
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
1087
1087
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
@@ -1183,17 +1183,17 @@ def __init__(self, config):
1183
1183
)
1184
1184
def forward (
1185
1185
self ,
1186
- input_ids = None ,
1187
- attention_mask = None ,
1188
- token_type_ids = None ,
1189
- position_ids = None ,
1190
- head_mask = None ,
1191
- inputs_embeds = None ,
1192
- labels = None ,
1193
- output_attentions = None ,
1194
- output_hidden_states = None ,
1195
- return_dict = None ,
1196
- ):
1186
+ input_ids : Optional [ torch . LongTensor ] = None ,
1187
+ attention_mask : Optional [ torch . FloatTensor ] = None ,
1188
+ token_type_ids : Optional [ torch . LongTensor ] = None ,
1189
+ position_ids : Optional [ torch . LongTensor ] = None ,
1190
+ head_mask : Optional [ torch . FloatTensor ] = None ,
1191
+ inputs_embeds : Optional [ torch . FloatTensor ] = None ,
1192
+ labels : Optional [ torch . LongTensor ] = None ,
1193
+ output_attentions : Optional [ bool ] = None ,
1194
+ output_hidden_states : Optional [ bool ] = None ,
1195
+ return_dict : Optional [ bool ] = None ,
1196
+ ) -> Union [ Tuple , SequenceClassifierOutput ] :
1197
1197
r"""
1198
1198
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
1199
1199
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
@@ -1282,17 +1282,17 @@ def __init__(self, config):
1282
1282
)
1283
1283
def forward (
1284
1284
self ,
1285
- input_ids = None ,
1286
- token_type_ids = None ,
1287
- attention_mask = None ,
1288
- labels = None ,
1289
- position_ids = None ,
1290
- head_mask = None ,
1291
- inputs_embeds = None ,
1292
- output_attentions = None ,
1293
- output_hidden_states = None ,
1294
- return_dict = None ,
1295
- ):
1285
+ input_ids : Optional [ torch . LongTensor ] = None ,
1286
+ token_type_ids : Optional [ torch . LongTensor ] = None ,
1287
+ attention_mask : Optional [ torch . FloatTensor ] = None ,
1288
+ labels : Optional [ torch . LongTensor ] = None ,
1289
+ position_ids : Optional [ torch . LongTensor ] = None ,
1290
+ head_mask : Optional [ torch . FloatTensor ] = None ,
1291
+ inputs_embeds : Optional [ torch . FloatTensor ] = None ,
1292
+ output_attentions : Optional [ bool ] = None ,
1293
+ output_hidden_states : Optional [ bool ] = None ,
1294
+ return_dict : Optional [ bool ] = None ,
1295
+ ) -> Union [ Tuple , MultipleChoiceModelOutput ] :
1296
1296
r"""
1297
1297
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
1298
1298
Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
@@ -1380,17 +1380,17 @@ def __init__(self, config):
1380
1380
)
1381
1381
def forward (
1382
1382
self ,
1383
- input_ids = None ,
1384
- attention_mask = None ,
1385
- token_type_ids = None ,
1386
- position_ids = None ,
1387
- head_mask = None ,
1388
- inputs_embeds = None ,
1389
- labels = None ,
1390
- output_attentions = None ,
1391
- output_hidden_states = None ,
1392
- return_dict = None ,
1393
- ):
1383
+ input_ids : Optional [ torch . LongTensor ] = None ,
1384
+ attention_mask : Optional [ torch . FloatTensor ] = None ,
1385
+ token_type_ids : Optional [ torch . LongTensor ] = None ,
1386
+ position_ids : Optional [ torch . LongTensor ] = None ,
1387
+ head_mask : Optional [ torch . FloatTensor ] = None ,
1388
+ inputs_embeds : Optional [ torch . FloatTensor ] = None ,
1389
+ labels : Optional [ torch . LongTensor ] = None ,
1390
+ output_attentions : Optional [ bool ] = None ,
1391
+ output_hidden_states : Optional [ bool ] = None ,
1392
+ return_dict : Optional [ bool ] = None ,
1393
+ ) -> Union [ Tuple , TokenClassifierOutput ] :
1394
1394
r"""
1395
1395
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
1396
1396
Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
@@ -1484,18 +1484,18 @@ def __init__(self, config):
1484
1484
)
1485
1485
def forward (
1486
1486
self ,
1487
- input_ids = None ,
1488
- attention_mask = None ,
1489
- token_type_ids = None ,
1490
- position_ids = None ,
1491
- head_mask = None ,
1492
- inputs_embeds = None ,
1493
- start_positions = None ,
1494
- end_positions = None ,
1495
- output_attentions = None ,
1496
- output_hidden_states = None ,
1497
- return_dict = None ,
1498
- ):
1487
+ input_ids : Optional [ torch . LongTensor ] = None ,
1488
+ attention_mask : Optional [ torch . FloatTensor ] = None ,
1489
+ token_type_ids : Optional [ torch . LongTensor ] = None ,
1490
+ position_ids : Optional [ torch . LongTensor ] = None ,
1491
+ head_mask : Optional [ torch . FloatTensor ] = None ,
1492
+ inputs_embeds : Optional [ torch . FloatTensor ] = None ,
1493
+ start_positions : Optional [ torch . LongTensor ] = None ,
1494
+ end_positions : Optional [ torch . LongTensor ] = None ,
1495
+ output_attentions : Optional [ bool ] = None ,
1496
+ output_hidden_states : Optional [ bool ] = None ,
1497
+ return_dict : Optional [ bool ] = None ,
1498
+ ) -> Union [ Tuple , QuestionAnsweringModelOutput ] :
1499
1499
r"""
1500
1500
start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
1501
1501
Labels for position (index) of the start of the labelled span for computing the token classification loss.
0 commit comments