Skip to content

Commit 07c1fd7

Browse files
Merge pull request #32 from vllm-project/sa/revert_naive_compressor
Revert naive compression format
2 parents e4cf25d + e1a47a3 commit 07c1fd7

File tree

2 files changed

+11
-2
lines changed

2 files changed

+11
-2
lines changed

src/llmcompressor/transformers/compression/quantization_format.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,15 @@ def infer_quantization_format(
6060
return CompressionFormat.marlin_24
6161
return CompressionFormat.pack_quantized
6262
else: # w8a8 float and int
63+
if len(weight_args) == 1:
64+
if (
65+
weight_args[0].type == QuantizationType.FLOAT.value
66+
and weight_args[0].num_bits == 8
67+
):
68+
return CompressionFormat.float_quantized
69+
if weight_args[0].type == QuantizationType.INT.value:
70+
return CompressionFormat.int_quantized
71+
6372
return CompressionFormat.naive_quantized
6473
else:
6574
# format will be inferred from config

tests/llmcompressor/transformers/compression/test_infer_quant_format.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@
1111
@pytest.mark.parametrize(
1212
"preset,sparsity_structure,expected_format",
1313
[
14-
["W8A8", "unstructured", "naive-quantized"],
14+
["W8A8", "unstructured", "int-quantized"],
1515
["W8A16", "unstructured", "pack-quantized"],
1616
["W8A16", "2:4", "marlin-24"],
1717
["W4A16", "unstructured", "pack-quantized"],
1818
["W4A16", "2:4", "marlin-24"],
19-
["FP8", "unstructured", "naive-quantized"],
19+
["FP8", "unstructured", "float-quantized"],
2020
],
2121
)
2222
def test_infer_quant_format(preset, sparsity_structure, expected_format):

0 commit comments

Comments
 (0)