Skip to content

Commit e56de4f

Browse files
hiromu166elusenji
authored andcommitted
Add Doc Tests for Reformer PyTorch (huggingface#16565)
* start working * fix: ReformerForQA doctest * fix: ReformerModelWithLMHead doctest * fix: ReformerModelForSC doctest * fix: ReformerModelForMLM doctest * add: documentation_tests.txt * make fixup * change: ReformerModelForSC doctest * change: checkpoint
1 parent cdf36d4 commit e56de4f

File tree

2 files changed

+118
-13
lines changed

2 files changed

+118
-13
lines changed

src/transformers/models/reformer/modeling_reformer.py

Lines changed: 117 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
add_start_docstrings,
4141
add_start_docstrings_to_model_forward,
4242
logging,
43+
replace_return_docstrings,
4344
)
4445
from .configuration_reformer import ReformerConfig
4546

@@ -2311,12 +2312,7 @@ def set_output_embeddings(self, new_embeddings):
23112312
self.lm_head.decoder = new_embeddings
23122313

23132314
@add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING)
2314-
@add_code_sample_docstrings(
2315-
processor_class=_TOKENIZER_FOR_DOC,
2316-
checkpoint=_CHECKPOINT_FOR_DOC,
2317-
output_type=MaskedLMOutput,
2318-
config_class=_CONFIG_FOR_DOC,
2319-
)
2315+
@replace_return_docstrings(output_type=MaskedLMOutput, config_class=_CONFIG_FOR_DOC)
23202316
def forward(
23212317
self,
23222318
input_ids: Optional[torch.Tensor] = None,
@@ -2335,6 +2331,44 @@ def forward(
23352331
Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
23362332
config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked),
23372333
the loss is only computed for the tokens with labels
2334+
2335+
Returns:
2336+
2337+
Example:
2338+
2339+
```python
2340+
>>> import torch
2341+
>>> from transformers import ReformerTokenizer, ReformerForMaskedLM
2342+
2343+
>>> tokenizer = ReformerTokenizer.from_pretrained("hf-internal-testing/tiny-random-reformer")
2344+
>>> model = ReformerForMaskedLM.from_pretrained("hf-internal-testing/tiny-random-reformer")
2345+
2346+
>>> # add mask_token
2347+
>>> tokenizer.add_special_tokens({"mask_token": "[MASK]"}) # doctest: +IGNORE_RESULT
2348+
>>> inputs = tokenizer("The capital of France is [MASK].", return_tensors="pt")
2349+
2350+
>>> with torch.no_grad():
2351+
... logits = model(**inputs).logits
2352+
2353+
>>> # retrieve index of [MASK]
2354+
>>> mask_token_index = (inputs.input_ids == tokenizer.mask_token_id)[0].nonzero(as_tuple=True)[0]
2355+
2356+
>>> predicted_token_id = logits[0, mask_token_index].argmax(axis=-1)
2357+
>>> tokenizer.decode(predicted_token_id)
2358+
'it'
2359+
```
2360+
2361+
```python
2362+
>>> labels = tokenizer("The capital of France is Paris.", return_tensors="pt")["input_ids"]
2363+
>>> # mask labels of non-[MASK] tokens
2364+
>>> labels = torch.where(
2365+
... inputs.input_ids == tokenizer.mask_token_id, labels[:, : inputs["input_ids"].shape[-1]], -100
2366+
... )
2367+
2368+
>>> outputs = model(**inputs, labels=labels)
2369+
>>> round(outputs.loss.item(), 2)
2370+
7.09
2371+
```
23382372
"""
23392373
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
23402374

@@ -2393,12 +2427,7 @@ def __init__(self, config):
23932427
self.post_init()
23942428

23952429
@add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING)
2396-
@add_code_sample_docstrings(
2397-
processor_class=_TOKENIZER_FOR_DOC,
2398-
checkpoint=_CHECKPOINT_FOR_DOC,
2399-
output_type=SequenceClassifierOutput,
2400-
config_class=_CONFIG_FOR_DOC,
2401-
)
2430+
@replace_return_docstrings(output_type=SequenceClassifierOutput, config_class=_CONFIG_FOR_DOC)
24022431
def forward(
24032432
self,
24042433
input_ids: Optional[torch.Tensor] = None,
@@ -2417,6 +2446,79 @@ def forward(
24172446
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
24182447
config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
24192448
`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
2449+
2450+
Returns:
2451+
2452+
Example of single-label classification:
2453+
2454+
```python
2455+
>>> import torch
2456+
>>> from transformers import ReformerTokenizer, ReformerForSequenceClassification
2457+
2458+
>>> tokenizer = ReformerTokenizer.from_pretrained("hf-internal-testing/tiny-random-reformer")
2459+
>>> model = ReformerForSequenceClassification.from_pretrained("hf-internal-testing/tiny-random-reformer")
2460+
2461+
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
2462+
2463+
>>> with torch.no_grad():
2464+
... logits = model(**inputs).logits
2465+
2466+
>>> predicted_class_id = logits.argmax().item()
2467+
>>> model.config.id2label[predicted_class_id]
2468+
'LABEL_1'
2469+
```
2470+
2471+
```python
2472+
>>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)`
2473+
>>> num_labels = len(model.config.id2label)
2474+
>>> model = ReformerForSequenceClassification.from_pretrained(
2475+
... "hf-internal-testing/tiny-random-reformer", num_labels=num_labels
2476+
... )
2477+
2478+
>>> labels = torch.tensor(1)
2479+
>>> loss = model(**inputs, labels=labels).loss
2480+
>>> round(loss.item(), 2)
2481+
0.69
2482+
```
2483+
2484+
Example of multi-label classification:
2485+
2486+
```python
2487+
>>> import torch
2488+
>>> from transformers import ReformerTokenizer, ReformerForSequenceClassification
2489+
2490+
>>> tokenizer = ReformerTokenizer.from_pretrained("hf-internal-testing/tiny-random-reformer")
2491+
>>> model = ReformerForSequenceClassification.from_pretrained(
2492+
... "hf-internal-testing/tiny-random-reformer", problem_type="multi_label_classification"
2493+
... )
2494+
2495+
>>> # add pad_token
2496+
>>> tokenizer.add_special_tokens({"pad_token": "[PAD]"}) # doctest: +IGNORE_RESULT
2497+
>>> inputs = tokenizer("Hello, my dog is cute", max_length=100, padding="max_length", return_tensors="pt")
2498+
2499+
>>> with torch.no_grad():
2500+
... logits = model(**inputs).logits
2501+
2502+
>>> predicted_class_id = logits.argmax().item()
2503+
>>> model.config.id2label[predicted_class_id]
2504+
'LABEL_1'
2505+
```
2506+
2507+
```python
2508+
>>> # To train a model on `num_labels` classes, you can pass `num_labels=num_labels` to `.from_pretrained(...)`
2509+
>>> num_labels = len(model.config.id2label)
2510+
>>> model = ReformerForSequenceClassification.from_pretrained(
2511+
... "hf-internal-testing/tiny-random-reformer", num_labels=num_labels
2512+
... )
2513+
>>> model.train() # doctest: +IGNORE_RESULT
2514+
2515+
>>> num_labels = len(model.config.id2label)
2516+
>>> labels = torch.nn.functional.one_hot(torch.tensor([predicted_class_id]), num_classes=num_labels).to(
2517+
... torch.float
2518+
... )
2519+
>>> loss = model(**inputs, labels=labels).loss
2520+
>>> loss.backward() # doctest: +IGNORE_RESULT
2521+
```
24202522
"""
24212523
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
24222524

@@ -2514,9 +2616,11 @@ def __init__(self, config):
25142616
@add_start_docstrings_to_model_forward(REFORMER_INPUTS_DOCSTRING)
25152617
@add_code_sample_docstrings(
25162618
processor_class=_TOKENIZER_FOR_DOC,
2517-
checkpoint=_CHECKPOINT_FOR_DOC,
2619+
checkpoint="hf-internal-testing/tiny-random-reformer",
25182620
output_type=QuestionAnsweringModelOutput,
25192621
config_class=_CONFIG_FOR_DOC,
2622+
expected_output="''",
2623+
expected_loss=3.28,
25202624
)
25212625
def forward(
25222626
self,

utils/documentation_tests.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ src/transformers/models/mobilebert/modeling_tf_mobilebert.py
2424
src/transformers/models/pegasus/modeling_pegasus.py
2525
src/transformers/models/plbart/modeling_plbart.py
2626
src/transformers/models/poolformer/modeling_poolformer.py
27+
src/transformers/models/reformer/modeling_reformer.py
2728
src/transformers/models/resnet/modeling_resnet.py
2829
src/transformers/models/roberta/modeling_roberta.py
2930
src/transformers/models/roberta/modeling_tf_roberta.py

0 commit comments

Comments
 (0)