Skip to content

Commit b4e37b7

Browse files
authored
Fix bugs for gptq exporting with static_groups (#1614)
Signed-off-by: YIYANGCAI <[email protected]>
1 parent f812e67 commit b4e37b7

File tree

1 file changed

+7
-1
lines changed
  • neural_compressor/adaptor/torch_utils

1 file changed

+7
-1
lines changed

neural_compressor/adaptor/torch_utils/gptq.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -668,7 +668,8 @@ def tmp(_, inp, out):
668668
gptq_config[self.get_full_layer_name(layer_name, block_idx)] = {"scale": scale}
669669
if not weight_config_this_layer["sym"]:
670670
gptq_config[self.get_full_layer_name(layer_name, block_idx)]["zero"] = zp
671-
if weight_config_this_layer["act_order"]: # save perm for restoring the weights
671+
if weight_config_this_layer["act_order"] and not weight_config_this_layer["static_groups"]:
672+
# save perm for restoring the weights, but only when static_groups is not enabled.
672673
gptq_config[self.get_full_layer_name(layer_name, block_idx)]["perm"] = gptq_for_this_block[
673674
layer_name
674675
].perm
@@ -828,6 +829,11 @@ def fasterquant(self, W, blocksize=128, percdamp=0.01, groupsize=-1, act_order=F
828829
zero.append(self.quantizer.zero)
829830
else:
830831
idx = i1 + i
832+
if (i1 + i) % groupsize == 0:
833+
# load the pre-calculated quantization parameters in groups
834+
static_quantizer = groups[(i1 + i) // groupsize]
835+
scale.append(static_quantizer.scale)
836+
zero.append(static_quantizer.zero)
831837
if act_order:
832838
idx = perm[idx]
833839
self.quantizer = groups[idx // groupsize]

0 commit comments

Comments
 (0)