1+ import copy
2+
13import pytest
24import torch
35import transformers
46
57from neural_compressor .torch .algorithms .weight_only .autoround import AutoRoundQuantizer , get_autoround_default_run_fn
6- from neural_compressor .torch .quantization import AutoRoundConfig , quantize
8+ from neural_compressor .torch .quantization import (
9+ AutoRoundConfig ,
10+ convert ,
11+ get_default_AutoRound_config ,
12+ prepare ,
13+ quantize ,
14+ )
715from neural_compressor .torch .utils import logger
816
917try :
1422 auto_round_installed = False
1523
1624
17- @pytest .fixture (scope = "module" )
18- def gpt_j ():
25+ def get_gpt_j ():
1926 tiny_gptj = transformers .AutoModelForCausalLM .from_pretrained (
2027 "hf-internal-testing/tiny-random-GPTJForCausalLM" ,
2128 torchscript = True ,
@@ -25,17 +32,15 @@ def gpt_j():
2532
2633@pytest .mark .skipif (not auto_round_installed , reason = "auto_round module is not installed" )
2734class TestAutoRound :
28- @staticmethod
29- @pytest .fixture (scope = "class" , autouse = True )
30- def gpt_j_model (gpt_j ):
31- yield gpt_j
35+ def setup_class (self ):
36+ self .gptj = get_gpt_j ()
3237
3338 def setup_method (self , method ):
3439 logger .info (f"Running TestAutoRound test: { method .__name__ } " )
3540
36- def test_autoround (self , gpt_j_model ):
41+ def test_autoround (self ):
3742 inp = torch .ones ([1 , 10 ], dtype = torch .long )
38-
43+ gpt_j_model = copy . deepcopy ( self . gptj )
3944 tokenizer = transformers .AutoTokenizer .from_pretrained (
4045 "hf-internal-testing/tiny-random-GPTJForCausalLM" , trust_remote_code = True
4146 )
@@ -73,9 +78,9 @@ def test_autoround(self, gpt_j_model):
7378 assert "scale" in q_model .autoround_config ["transformer.h.0.attn.k_proj" ].keys ()
7479 assert torch .float32 == q_model .autoround_config ["transformer.h.0.attn.k_proj" ]["scale_dtype" ]
7580
76- def test_new_api (self , gpt_j_model ):
81+ def test_quantizer (self ):
7782 inp = torch .ones ([1 , 10 ], dtype = torch .long )
78-
83+ gpt_j_model = copy . deepcopy ( self . gptj )
7984 tokenizer = transformers .AutoTokenizer .from_pretrained (
8085 "hf-internal-testing/tiny-random-GPTJForCausalLM" , trust_remote_code = True
8186 )
@@ -110,3 +115,34 @@ def test_new_api(self, gpt_j_model):
110115 assert "transformer.h.0.attn.k_proj" in q_model .autoround_config .keys ()
111116 assert "scale" in q_model .autoround_config ["transformer.h.0.attn.k_proj" ].keys ()
112117 assert torch .float32 == q_model .autoround_config ["transformer.h.0.attn.k_proj" ]["scale_dtype" ]
118+
119+ def test_prepare_and_convert_api (self ):
120+ inp = torch .ones ([1 , 10 ], dtype = torch .long )
121+ gpt_j_model = copy .deepcopy (self .gptj )
122+ tokenizer = transformers .AutoTokenizer .from_pretrained (
123+ "hf-internal-testing/tiny-random-GPTJForCausalLM" , trust_remote_code = True
124+ )
125+
126+ out1 = gpt_j_model (inp )
127+ quant_config = get_default_AutoRound_config ()
128+ logger .info (f"Test AutoRound with config { quant_config } " )
129+
130+ run_fn = get_autoround_default_run_fn
131+ run_args = (
132+ tokenizer ,
133+ "NeelNanda/pile-10k" ,
134+ 20 ,
135+ 10 ,
136+ )
137+ fp32_model = gpt_j_model
138+
139+ # quantizer execute
140+ model = prepare (model = fp32_model , quant_config = quant_config )
141+ run_fn (model , * run_args )
142+ q_model = convert (model )
143+
144+ out2 = q_model (inp )
145+ assert torch .allclose (out1 [0 ], out2 [0 ], atol = 1e-1 )
146+ assert "transformer.h.0.attn.k_proj" in q_model .autoround_config .keys ()
147+ assert "scale" in q_model .autoround_config ["transformer.h.0.attn.k_proj" ].keys ()
148+ assert torch .float32 == q_model .autoround_config ["transformer.h.0.attn.k_proj" ]["scale_dtype" ]
0 commit comments