Skip to content

Commit 0e724a4

Browse files
authored
Add save/load for pt2e example (#1927)
Signed-off-by: Kaihui-intel <[email protected]>
1 parent 50eb6fb commit 0e724a4

File tree

12 files changed

+502
-307
lines changed

12 files changed

+502
-307
lines changed

examples/3.x_api/pytorch/cv/static_quant/main.py

Lines changed: 236 additions & 270 deletions
Large diffs are not rendered by default.
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#!/bin/bash
2+
set -x
3+
4+
function main {
5+
6+
init_params "$@"
7+
run_benchmark
8+
9+
}
10+
11+
# init params
12+
function init_params {
13+
iters=100
14+
batch_size=16
15+
tuned_checkpoint=saved_results
16+
echo ${max_eval_samples}
17+
for var in "$@"
18+
do
19+
case $var in
20+
--topology=*)
21+
topology=$(echo $var |cut -f2 -d=)
22+
;;
23+
--dataset_location=*)
24+
dataset_location=$(echo $var |cut -f2 -d=)
25+
;;
26+
--input_model=*)
27+
input_model=$(echo $var |cut -f2 -d=)
28+
;;
29+
--mode=*)
30+
mode=$(echo $var |cut -f2 -d=)
31+
;;
32+
--batch_size=*)
33+
batch_size=$(echo $var |cut -f2 -d=)
34+
;;
35+
--iters=*)
36+
iters=$(echo ${var} |cut -f2 -d=)
37+
;;
38+
--int8=*)
39+
int8=$(echo ${var} |cut -f2 -d=)
40+
;;
41+
--config=*)
42+
tuned_checkpoint=$(echo $var |cut -f2 -d=)
43+
;;
44+
*)
45+
echo "Error: No such parameter: ${var}"
46+
exit 1
47+
;;
48+
esac
49+
done
50+
51+
}
52+
53+
54+
# run_benchmark
55+
function run_benchmark {
56+
extra_cmd=''
57+
58+
if [[ ${mode} == "accuracy" ]]; then
59+
mode_cmd=" --accuracy "
60+
elif [[ ${mode} == "performance" ]]; then
61+
mode_cmd=" --performance --iters "${iters}
62+
else
63+
echo "Error: No such mode: ${mode}"
64+
exit 1
65+
fi
66+
if [[ ${int8} == "true" ]]; then
67+
extra_cmd=$extra_cmd" --int8"
68+
fi
69+
echo $extra_cmd
70+
71+
72+
echo $extra_cmd
73+
74+
if [ "${topology}" = "resnet18_pt2e_static" ]; then
75+
model_name_or_path="resnet18"
76+
fi
77+
78+
if [[ ${mode} == "accuracy" ]]; then
79+
python main.py \
80+
--pretrained \
81+
-a resnet18 \
82+
-b 30 \
83+
--tuned_checkpoint ${tuned_checkpoint} \
84+
${dataset_location} \
85+
${extra_cmd} \
86+
${mode_cmd}
87+
elif [[ ${mode} == "performance" ]]; then
88+
incbench --num_cores_per_instance 4 \
89+
main.py \
90+
--pretrained \
91+
-a resnet18 \
92+
-b 30 \
93+
--tuned_checkpoint ${tuned_checkpoint} \
94+
${dataset_location} \
95+
${extra_cmd} \
96+
${mode_cmd}
97+
else
98+
echo "Error: No such mode: ${mode}"
99+
exit 1
100+
fi
101+
}
102+
103+
main "$@"

examples/3.x_api/pytorch/cv/static_quant/run_quant.sh

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ function main {
1010

1111
# init params
1212
function init_params {
13+
tuned_checkpoint="saved_results"
1314
for var in "$@"
1415
do
1516
case $var in
@@ -39,7 +40,13 @@ function run_tuning {
3940
if [ "${topology}" = "resnet18_pt2e_static" ]; then
4041
model_name_or_path="resnet18"
4142
fi
42-
python main.py -a ${model_name_or_path} ${dataset_location} -q -e
43+
python main.py \
44+
--pretrained \
45+
-t \
46+
-a resnet18 \
47+
-b 30 \
48+
--tuned_checkpoint ${tuned_checkpoint} \
49+
${dataset_location}
4350
}
4451

4552
main "$@"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
#!/bin/bash
2+
set -x
3+
4+
function main {
5+
6+
init_params "$@"
7+
run_benchmark
8+
9+
}
10+
11+
# init params
12+
function init_params {
13+
iters=100
14+
batch_size=16
15+
tuned_checkpoint=saved_results
16+
task=lambada_openai
17+
echo ${max_eval_samples}
18+
for var in "$@"
19+
do
20+
case $var in
21+
--topology=*)
22+
topology=$(echo $var |cut -f2 -d=)
23+
;;
24+
--dataset_location=*)
25+
dataset_location=$(echo $var |cut -f2 -d=)
26+
;;
27+
--input_model=*)
28+
input_model=$(echo $var |cut -f2 -d=)
29+
;;
30+
--mode=*)
31+
mode=$(echo $var |cut -f2 -d=)
32+
;;
33+
--batch_size=*)
34+
batch_size=$(echo $var |cut -f2 -d=)
35+
;;
36+
--iters=*)
37+
iters=$(echo ${var} |cut -f2 -d=)
38+
;;
39+
--int8=*)
40+
int8=$(echo ${var} |cut -f2 -d=)
41+
;;
42+
--config=*)
43+
tuned_checkpoint=$(echo $var |cut -f2 -d=)
44+
;;
45+
*)
46+
echo "Error: No such parameter: ${var}"
47+
exit 1
48+
;;
49+
esac
50+
done
51+
52+
}
53+
54+
55+
# run_benchmark
56+
function run_benchmark {
57+
extra_cmd=''
58+
59+
if [[ ${mode} == "accuracy" ]]; then
60+
mode_cmd=" --accuracy "
61+
extra_cmd=$extra_cmd
62+
elif [[ ${mode} == "performance" ]]; then
63+
mode_cmd=" --performance --iters "${iters}
64+
extra_cmd=$extra_cmd
65+
else
66+
echo "Error: No such mode: ${mode}"
67+
exit 1
68+
fi
69+
70+
if [[ ${int8} == "true" ]]; then
71+
extra_cmd=$extra_cmd" --int8"
72+
fi
73+
echo $extra_cmd
74+
75+
echo $extra_cmd
76+
77+
if [ "${topology}" = "opt_125m_pt2e_static" ]; then
78+
model_name_or_path="facebook/opt-125m"
79+
fi
80+
if [[ ${mode} == "accuracy" ]]; then
81+
python -u run_clm_no_trainer.py \
82+
--model ${model_name_or_path} \
83+
--output_dir ${tuned_checkpoint} \
84+
--task ${task} \
85+
--batch_size ${batch_size} \
86+
${extra_cmd} ${mode_cmd}
87+
elif [[ ${mode} == "performance" ]]; then
88+
incbench --num_cores_per_instance 4 run_clm_no_trainer.py \
89+
--model ${model_name_or_path} \
90+
--batch_size ${batch_size} \
91+
--output_dir ${tuned_checkpoint} \
92+
${extra_cmd} ${mode_cmd}
93+
else
94+
echo "Error: No such mode: ${mode}"
95+
exit 1
96+
fi
97+
}
98+
99+
main "$@"

examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py

Lines changed: 39 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
"--revision", default=None,
1515
help="Transformers parameter: set the model hub commit number")
1616
parser.add_argument("--dataset", nargs="?", default="NeelNanda/pile-10k", const="NeelNanda/pile-10k")
17-
parser.add_argument("--output_dir", nargs="?", default="./saved_results")
17+
parser.add_argument("--output_dir", nargs="?", default="")
1818
parser.add_argument("--quantize", action="store_true")
1919
parser.add_argument("--approach", type=str, default='static',
2020
help="Select from ['dynamic', 'static', 'weight-only']")
@@ -80,7 +80,7 @@ def get_example_inputs(tokenizer):
8080
dynamic_shapes = {"input_ids": (batch, seq_len)}
8181
example_inputs = get_example_inputs(tokenizer)
8282
exported_model = export(user_model, example_inputs=example_inputs, dynamic_shapes=dynamic_shapes)
83-
83+
8484
quant_config = get_default_static_config()
8585
# prepare
8686
prepare_model = prepare(exported_model, quant_config)
@@ -90,17 +90,32 @@ def get_example_inputs(tokenizer):
9090
prepare_model(*example_inputs)
9191
# convert
9292
converted_model = convert(prepare_model)
93-
# inference
94-
from torch._inductor import config
93+
94+
# save
95+
if args.output_dir:
96+
converted_model.save(example_inputs=example_inputs, output_dir = args.output_dir)
97+
98+
99+
100+
if args.int8:
101+
if args.output_dir:
102+
print("Load int8 model.")
103+
from neural_compressor.torch.quantization import load
104+
model = load(args.output_dir)
95105

96-
config.freezing = True
97-
opt_model = torch.compile(converted_model)
106+
model.config = user_model.config # for lm eval
107+
108+
# Compile the quantized model and replace the Q/DQ pattern with Q-operator
109+
from torch._inductor import config
98110

99-
opt_model.config = user_model.config # for lm eval
100-
user_model = opt_model
111+
config.freezing = True
112+
opt_model = torch.compile(model)
101113

114+
opt_model.config = user_model.config # for lm eval
115+
user_model = opt_model
102116

103117
if args.accuracy:
118+
104119
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
105120
eval_args = LMEvalParser(
106121
model="hf",
@@ -120,29 +135,21 @@ def get_example_inputs(tokenizer):
120135
print('Batch size = %d' % args.batch_size)
121136

122137
if args.performance:
123-
# user_model.eval()
124-
from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
138+
batch_size, input_leng = args.batch_size, 512
139+
example_inputs = torch.ones((batch_size, input_leng), dtype=torch.long)
140+
print("Batch size = {:d}".format(batch_size))
141+
print("The length of input tokens = {:d}".format(input_leng))
125142
import time
126143

127-
samples = args.iters * args.batch_size
128-
eval_args = LMEvalParser(
129-
model="hf",
130-
user_model=user_model,
131-
tokenizer=tokenizer,
132-
batch_size=args.batch_size,
133-
tasks=args.tasks,
134-
limit=samples,
135-
device="cpu",
136-
)
137-
start = time.time()
138-
results = evaluate(eval_args)
139-
end = time.time()
140-
for task_name in args.tasks.split(","):
141-
if task_name == "wikitext":
142-
acc = results["results"][task_name]["word_perplexity,none"]
143-
else:
144-
acc = results["results"][task_name]["acc,none"]
145-
print("Accuracy: %.5f" % acc)
146-
print('Throughput: %.3f samples/sec' % (samples / (end - start)))
147-
print('Latency: %.3f ms' % ((end - start) * 1000 / samples))
148-
print('Batch size = %d' % args.batch_size)
144+
total_iters = args.iters
145+
warmup_iters = 5
146+
with torch.no_grad():
147+
for i in range(total_iters):
148+
if i == warmup_iters:
149+
start = time.time()
150+
user_model(example_inputs)
151+
end = time.time()
152+
latency = (end - start) / ((total_iters - warmup_iters) * args.batch_size)
153+
throughput = ((total_iters - warmup_iters) * args.batch_size) / (end - start)
154+
print("Latency: {:.3f} ms".format(latency * 10**3))
155+
print("Throughput: {:.3f} samples/sec".format(throughput))

examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_quant.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,9 @@ function run_tuning {
3939

4040
if [ "${topology}" = "opt_125m_pt2e_static" ]; then
4141
model_name_or_path="facebook/opt-125m"
42+
output_dir="saved_results"
4243
fi
43-
python run_clm_no_trainer.py --model ${model_name_or_path} --quantize --accuracy --tasks "lambada_openai"
44+
python run_clm_no_trainer.py --model ${model_name_or_path} --quantize --output_dir ${output_dir} --tasks "lambada_openai"
4445
}
4546

4647
main "$@"

neural_compressor/common/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from neural_compressor.common.utils import (
1717
level,
18+
level_name,
1819
logger,
1920
Logger,
2021
TuningLogger,
@@ -31,6 +32,7 @@
3132
__all__ = [
3233
"options",
3334
"level",
35+
"level_name",
3436
"logger",
3537
"Logger",
3638
"TuningLogger",

neural_compressor/common/utils/logger.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
__all__ = [
2626
"level",
27+
"level_name",
2728
"Logger", # TODO: not expose it
2829
"logger",
2930
"TuningLogger",
@@ -138,6 +139,7 @@ def warning(msg, *args, **kwargs):
138139

139140

140141
level = Logger().get_logger().level
142+
level_name = logging.getLevelName(level)
141143

142144
logger = Logger
143145

neural_compressor/torch/algorithms/pt2e_quant/half_precision_rewriter.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,8 @@ def transformation(gm: torch.fx.GraphModule, node_candidate_list: List[str], tar
185185
for pattern_pair in HALF_PRECISION_PATTERN_REGISTRY[target_dtype].values():
186186
apply_single_pattern_pair(gm, pattern_pair, node_candidate_list)
187187
utils.logger.info("Half precision conversion is done:")
188-
gm.print_readable(True)
188+
if utils.level_name == "DEBUG": # pragma: no cover
189+
gm.print_readable(True)
189190

190191

191192
# =============================================================================

neural_compressor/torch/algorithms/pt2e_quant/save_load.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ def save(model, example_inputs, output_dir="./saved_results"):
3434
os.makedirs(output_dir, exist_ok=True)
3535
qmodel_file_path = os.path.join(os.path.abspath(os.path.expanduser(output_dir)), WEIGHT_NAME)
3636
qconfig_file_path = os.path.join(os.path.abspath(os.path.expanduser(output_dir)), QCONFIG_NAME)
37-
quantized_ep = torch.export.export(model, example_inputs)
37+
dynamic_shapes = model.dynamic_shapes
38+
quantized_ep = torch.export.export(model, example_inputs, dynamic_shapes=dynamic_shapes)
3839
torch.export.save(quantized_ep, qmodel_file_path)
3940
for key, op_config in model.qconfig.items():
4041
model.qconfig[key] = op_config.to_dict()

0 commit comments

Comments
 (0)