Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 10 additions & 9 deletions tests/llmcompressor/transformers/gptq/test_oneshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,15 @@
type: "int"
symmetric: true
strategy: "channel"
targets: ["Linear"]
targets: ["re:.*model.layers.2.self_attn.q_proj$"]
"""

recipe_modifier_full = GPTQModifier(
ignore=["lm_head"],
config_groups={
"group_0": QuantizationScheme(
targets=["Linear"], weights=QuantizationArgs(num_bits=4, strategy="channel")
targets=["re:.*model.layers.2.self_attn.q_proj$"],
weights=QuantizationArgs(num_bits=4, strategy="channel"),
)
},
)
Expand All @@ -36,18 +37,18 @@
ignore=["lm_head"],
config_groups={
"group_0": QuantizationScheme(
targets=["Linear"],
targets=["re:.*model.layers.2.self_attn.q_proj$"],
weights=QuantizationArgs(num_bits=4, strategy="group", group_size=128),
)
},
)

recipe_modifier_shorthand_a = GPTQModifier(
ignore=["lm_head"], targets="Linear", scheme="W4A16"
ignore=["lm_head"], targets="re:.*model.layers.2.self_attn.q_proj$", scheme="W4A16"
)

recipe_modifier_shorthand_b = GPTQModifier(
ignore=["lm_head"], scheme={"W4A16": ["Linear"]}
ignore=["lm_head"], scheme={"W4A16": ["re:.*model.layers.2.self_attn.q_proj$"]}
)


Expand All @@ -65,7 +66,7 @@ def setUp(self):
import torch

self.output = "./oneshot_output"
self.model = "Xenova/llama2.c-stories110M"
self.model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
self.dataset = "open_platypus"
self.device = "cuda:0" if torch.cuda.is_available() else "cpu"

Expand Down Expand Up @@ -95,17 +96,17 @@ def test_oneshot_application(self):
assert quantization_config is not None

# check config is set properly
assert quantization_config.ignore == ["lm_head"]
assert "lm_head" in quantization_config.ignore
assert len(quantization_config.config_groups) == 1
quant_scheme = quantization_config.config_groups["group_0"]
assert isinstance(quant_scheme, QuantizationScheme)
assert quant_scheme.targets == ["Linear"]
assert quant_scheme.targets == ["re:.*model.layers.2.self_attn.q_proj$"]
weight_args = quantization_config.config_groups["group_0"].weights
assert isinstance(weight_args, QuantizationArgs)
assert weight_args.num_bits == 4

# Check a specific layer is quantized
targetted_linear_layer = model_loaded.model.layers[0].self_attn.k_proj
targetted_linear_layer = model_loaded.model.layers[2].self_attn.q_proj
assert hasattr(targetted_linear_layer, "quantization_scheme")

# Check lm-head is not quantized
Expand Down