Skip to content

Commit f1590b2

Browse files
Muennighoffamyeroberts
authored andcommitted
Update BLOOM parameter counts (huggingface#18531)
* Update BLOOM parameter counts * Update BLOOM parameter counts
1 parent 089ad23 commit f1590b2

File tree

6 files changed

+39
-39
lines changed

6 files changed

+39
-39
lines changed

docs/source/en/model_doc/bloom.mdx

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ The BLOOM model has been proposed with its various versions through the [BigScie
1818
The architecture of BLOOM is essentially similar to GPT3 (auto-regressive model for next token prediction), but has been trained on 46 different languages and 13 programming languages.
1919
Several smaller versions of the models have been trained on the same dataset. BLOOM is available in the following versions:
2020

21-
- [bloom-350m](https://huggingface.co/bigscience/bloom-350m)
22-
- [bloom-760m](https://huggingface.co/bigscience/bloom-760m)
23-
- [bloom-1b3](https://huggingface.co/bigscience/bloom-1b3)
24-
- [bloom-2b5](https://huggingface.co/bigscience/bloom-2b5)
25-
- [bloom-6b3](https://huggingface.co/bigscience/bloom-6b3)
21+
- [bloom-560m](https://huggingface.co/bigscience/bloom-560m)
22+
- [bloom-1b1](https://huggingface.co/bigscience/bloom-1b1)
23+
- [bloom-1b7](https://huggingface.co/bigscience/bloom-1b7)
24+
- [bloom-3b](https://huggingface.co/bigscience/bloom-3b)
25+
- [bloom-7b1](https://huggingface.co/bigscience/bloom-7b1)
2626
- [bloom](https://huggingface.co/bigscience/bloom) (176B parameters)
2727

2828

src/transformers/models/bloom/configuration_bloom.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,11 @@
3131

3232
BLOOM_PRETRAINED_CONFIG_ARCHIVE_MAP = {
3333
"bigscience/bloom": "https://huggingface.co/bigscience/bloom/resolve/main/config.json",
34-
"bigscience/bloom-350m": "https://huggingface.co/bigscience/bloom-350m/blob/main/config.json",
35-
"bigscience/bloom-760m": "https://huggingface.co/bigscience/bloom-760m/blob/main/config.json",
36-
"bigscience/bloom-1b3": "https://huggingface.co/bigscience/bloom-1b3/blob/main/config.json",
37-
"bigscience/bloom-2b5": "https://huggingface.co/bigscience/bloom-2b5/blob/main/config.json",
38-
"bigscience/bloom-6b3": "https://huggingface.co/bigscience/bloom-6b3/blob/main/config.json",
34+
"bigscience/bloom-560m": "https://huggingface.co/bigscience/bloom-560m/blob/main/config.json",
35+
"bigscience/bloom-1b1": "https://huggingface.co/bigscience/bloom-1b1/blob/main/config.json",
36+
"bigscience/bloom-1b7": "https://huggingface.co/bigscience/bloom-1b7/blob/main/config.json",
37+
"bigscience/bloom-3b": "https://huggingface.co/bigscience/bloom-3b/blob/main/config.json",
38+
"bigscience/bloom-7b1": "https://huggingface.co/bigscience/bloom-7b1/blob/main/config.json",
3939
}
4040

4141

src/transformers/models/bloom/modeling_bloom.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,17 @@
3838

3939
logger = logging.get_logger(__name__)
4040

41-
_CHECKPOINT_FOR_DOC = "bigscience/bloom-350m"
41+
_CHECKPOINT_FOR_DOC = "bigscience/bloom-560m"
4242
_CONFIG_FOR_DOC = "BloomConfig"
4343
_TOKENIZER_FOR_DOC = "BloomTokenizerFast"
4444

4545
BLOOM_PRETRAINED_MODEL_ARCHIVE_LIST = [
4646
"bigscience/bigscience-small-testing",
47-
"bigscience/bloom-350m",
48-
"bigscience/bloom-760m",
49-
"bigscience/bloom-1b3",
50-
"bigscience/bloom-2b5",
51-
"bigscience/bloom-6b3",
47+
"bigscience/bloom-560m",
48+
"bigscience/bloom-1b1",
49+
"bigscience/bloom-1b7",
50+
"bigscience/bloom-3b",
51+
"bigscience/bloom-7b1",
5252
"bigscience/bloom",
5353
]
5454

src/transformers/models/bloom/tokenization_bloom_fast.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,11 @@
3636
PRETRAINED_VOCAB_FILES_MAP = {
3737
"tokenizer_file": {
3838
"bigscience/tokenizer": "https://huggingface.co/bigscience/tokenizer/blob/main/tokenizer.json",
39-
"bigscience/bloom-350m": "https://huggingface.co/bigscience/bloom-350m/blob/main/tokenizer.json",
40-
"bigscience/bloom-760m": "https://huggingface.co/bigscience/bloom-760m/blob/main/tokenizer.json",
41-
"bigscience/bloom-1b3": "https://huggingface.co/bigscience/bloom-1b3/blob/main/tokenizer.json",
42-
"bigscience/bloom-2b5": "https://huggingface.co/bigscience/bloom-2b5/blob/main/tokenizer.json",
43-
"bigscience/bloom-6b3": "https://huggingface.co/bigscience/bloom-2b5/blob/main/tokenizer.json",
39+
"bigscience/bloom-560m": "https://huggingface.co/bigscience/bloom-560m/blob/main/tokenizer.json",
40+
"bigscience/bloom-1b1": "https://huggingface.co/bigscience/bloom-1b1/blob/main/tokenizer.json",
41+
"bigscience/bloom-1b7": "https://huggingface.co/bigscience/bloom-1b7/blob/main/tokenizer.json",
42+
"bigscience/bloom-3b": "https://huggingface.co/bigscience/bloom-3b/blob/main/tokenizer.json",
43+
"bigscience/bloom-7b1": "https://huggingface.co/bigscience/bloom-7b1/blob/main/tokenizer.json",
4444
"bigscience/bloom": "https://huggingface.co/bigscience/bloom/blob/main/tokenizer.json",
4545
},
4646
}

tests/models/bloom/test_modeling_bloom.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -379,27 +379,27 @@ def test_model_from_pretrained(self):
379379
def test_simple_generation(self):
380380
# This test is a bit flaky. For some GPU architectures, pytorch sets by default allow_fp16_reduced_precision_reduction = True and some operations
381381
# do not give the same results under this configuration, especially torch.baddmm and torch.bmm. https://pytorch.org/docs/stable/notes/numerical_accuracy.html#fp16-on-mi200
382-
# As we leave the default value (True) for allow_fp16_reduced_precision_reduction , the tests failed when running in half-precision with smaller models (350m)
382+
# As we leave the default value (True) for allow_fp16_reduced_precision_reduction , the tests failed when running in half-precision with smaller models (560m)
383383
# Please see: https://pytorch.org/docs/stable/notes/cuda.html#reduced-precision-reduction-in-fp16-gemms
384384
# This discrepancy is observed only when using small models and seems to be stable for larger models.
385385
# Our conclusion is that these operations are flaky for small inputs but seems to be stable for larger inputs (for the functions `baddmm` and `bmm`), and therefore for larger models.
386386

387387
# Here is a summary of an ablation study of our observations
388388
# EXPECTED_OUTPUT = "I enjoy walking with my cute dog, and I love to watch the kids play. I am a very active person, and I am a very good listener. I am a very good person, and I am a very good person. I am a"
389-
# 350m + allow_fp16_reduced_precision_reduction = False + torch.bmm ==> PASS
390-
# 350m + allow_fp16_reduced_precision_reduction = False + torch.baddm ==> PASS
391-
# 350m + allow_fp16_reduced_precision_reduction = True + torch.baddm ==> PASS
392-
# 350m + allow_fp16_reduced_precision_reduction = True + torch.bmm ==> FAIL
389+
# 560m + allow_fp16_reduced_precision_reduction = False + torch.bmm ==> PASS
390+
# 560m + allow_fp16_reduced_precision_reduction = False + torch.baddm ==> PASS
391+
# 560m + allow_fp16_reduced_precision_reduction = True + torch.baddm ==> PASS
392+
# 560m + allow_fp16_reduced_precision_reduction = True + torch.bmm ==> FAIL
393393

394394
# EXPECTED_OUTPUT = "I enjoy walking with my cute dog, but I also enjoy hiking, biking, and swimming. I love to cook and bake. I love to cook and bake. I love to cook and bake. I love to cook and bake. I love"
395-
# >=760m + allow_fp16_reduced_precision_reduction = True + torch.baddm ==> PASS (for use_cache=True and use_cache=False)
396-
# >=760m + allow_fp16_reduced_precision_reduction = True + torch.bmm ==> PASS
397-
# >=760m + allow_fp16_reduced_precision_reduction = False + torch.bmm ==> PASS
395+
# >=1b1 + allow_fp16_reduced_precision_reduction = True + torch.baddm ==> PASS (for use_cache=True and use_cache=False)
396+
# >=1b1 + allow_fp16_reduced_precision_reduction = True + torch.bmm ==> PASS
397+
# >=1b1 + allow_fp16_reduced_precision_reduction = False + torch.bmm ==> PASS
398398

399-
path_350m = "bigscience/bloom-350m"
400-
model = BloomForCausalLM.from_pretrained(path_350m, use_cache=True, revision="gs555750").cuda()
399+
path_560m = "bigscience/bloom-560m"
400+
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").cuda()
401401
model = model.eval()
402-
tokenizer = BloomTokenizerFast.from_pretrained(path_350m)
402+
tokenizer = BloomTokenizerFast.from_pretrained(path_560m)
403403

404404
input_sentence = "I enjoy walking with my cute dog"
405405
# This output has been obtained using fp32 model on the huggingface DGX workstation - NVIDIA A100 GPU
@@ -416,10 +416,10 @@ def test_simple_generation(self):
416416
@slow
417417
@require_torch_gpu
418418
def test_batch_generation(self):
419-
path_350m = "bigscience/bloom-350m"
420-
model = BloomForCausalLM.from_pretrained(path_350m, use_cache=True, revision="gs555750").cuda()
419+
path_560m = "bigscience/bloom-560m"
420+
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").cuda()
421421
model = model.eval()
422-
tokenizer = BloomTokenizerFast.from_pretrained(path_350m, padding_side="left")
422+
tokenizer = BloomTokenizerFast.from_pretrained(path_560m, padding_side="left")
423423

424424
input_sentence = ["I enjoy walking with my cute dog", "I enjoy walking with my cute dog"]
425425

@@ -437,10 +437,10 @@ def test_batch_generation(self):
437437
@require_torch_gpu
438438
def test_batch_generation_padd(self):
439439

440-
path_350m = "bigscience/bloom-350m"
441-
model = BloomForCausalLM.from_pretrained(path_350m, use_cache=True, revision="gs555750").cuda()
440+
path_560m = "bigscience/bloom-560m"
441+
model = BloomForCausalLM.from_pretrained(path_560m, use_cache=True, revision="gs555750").cuda()
442442
model = model.eval()
443-
tokenizer = BloomTokenizerFast.from_pretrained(path_350m, padding_side="left")
443+
tokenizer = BloomTokenizerFast.from_pretrained(path_560m, padding_side="left")
444444

445445
input_sentence = ["I enjoy walking with my cute dog", "Hello my name is"]
446446
input_sentence_without_pad = "Hello my name is"

tests/onnx/test_onnx_v2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ def test_values_override(self):
215215
}
216216

217217
PYTORCH_EXPORT_WITH_PAST_MODELS = {
218-
("bloom", "bigscience/bloom-350m"),
218+
("bloom", "bigscience/bloom-560m"),
219219
("gpt2", "gpt2"),
220220
("gpt-neo", "EleutherAI/gpt-neo-125M"),
221221
}

0 commit comments

Comments
 (0)