Skip to content

Commit b6dc7d4

Browse files
authored
Fix examples style, Fix noqa comment (#123)
1 parent 8e43aaa commit b6dc7d4

File tree

4 files changed

+21
-11
lines changed

4 files changed

+21
-11
lines changed

examples/quantization_kv_cache/llama3_fp8_kv_example.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,17 @@
2525
ds = load_dataset(DATASET_ID, split=DATASET_SPLIT)
2626
ds = ds.shuffle(seed=42).select(range(NUM_CALIBRATION_SAMPLES))
2727

28+
2829
def process_and_tokenize(example):
2930
text = tokenizer.apply_chat_template(example["messages"], tokenize=False)
30-
return tokenizer(text, padding=False, max_length=MAX_SEQUENCE_LENGTH, truncation=True, add_special_tokens=False)
31+
return tokenizer(
32+
text,
33+
padding=False,
34+
max_length=MAX_SEQUENCE_LENGTH,
35+
truncation=True,
36+
add_special_tokens=False,
37+
)
38+
3139

3240
ds = ds.map(process_and_tokenize, remove_columns=ds.column_names)
3341

examples/quantization_w8a8_fp8/gemma2_example.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,22 +7,21 @@
77

88
# 1) Load model.
99
model = SparseAutoModelForCausalLM.from_pretrained(
10-
MODEL_ID, device_map="auto", torch_dtype="auto")
10+
MODEL_ID, device_map="auto", torch_dtype="auto"
11+
)
1112
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
1213

1314
# 2) Configure the quantization algorithm and scheme.
1415
# In this case, we:
1516
# * quantize the weights to fp8 with per channel via ptq
1617
# * quantize the activations to fp8 with dynamic per token
1718
recipe = QuantizationModifier(
18-
targets="Linear", scheme="FP8_DYNAMIC", ignore=["lm_head"])
19+
targets="Linear", scheme="FP8_DYNAMIC", ignore=["lm_head"]
20+
)
1921

2022
# 3) Apply quantization and save in compressed-tensors format.
2123
OUTPUT_DIR = MODEL_ID.split("/")[1] + "-FP8-Dynamic"
22-
oneshot(model=model,
23-
recipe=recipe,
24-
output_dir=OUTPUT_DIR,
25-
tokenizer=tokenizer)
24+
oneshot(model=model, recipe=recipe, output_dir=OUTPUT_DIR, tokenizer=tokenizer)
2625

2726
# Confirm generations of the quantized model look sane.
2827
print("========== SAMPLE GENERATION ==============")

examples/quantization_w8a8_int8/gemma2_example.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
# 1) Select model and load it.
88
MODEL_ID = "google/gemma-2-2b-it"
99
model = SparseAutoModelForCausalLM.from_pretrained(
10-
MODEL_ID, device_map="auto", torch_dtype="auto",)
10+
MODEL_ID,
11+
device_map="auto",
12+
torch_dtype="auto",
13+
)
1114
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
1215

1316
# 2) Prepare calibration dataset.
@@ -62,7 +65,7 @@ def tokenize(sample):
6265
recipe=recipe,
6366
max_seq_length=MAX_SEQUENCE_LENGTH,
6467
num_calibration_samples=NUM_CALIBRATION_SAMPLES,
65-
output_dir=MODEL_ID.split("/")[1] + "-INT8"
68+
output_dir=MODEL_ID.split("/")[1] + "-INT8",
6669
)
6770

6871
# Confirm generations of the quantized model look sane.

tests/e2e/vLLM/test_vllm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,11 @@ class TestvLLM(unittest.TestCase):
4141
run on a cadence defined by the `cadence` field. Each config defines the model
4242
to quantize. Optionally, a dataset id and split can be provided for calibration.
4343
Finally, all config files must list a scheme. The scheme can be a preset scheme
44-
from https://github.com/neuralmagic/compressed-tensors/blob/main/src/compressed_tensors/quantization/quant_scheme.py # noqa: E501
44+
from https://github.com/neuralmagic/compressed-tensors/blob/main/src/compressed_tensors/quantization/quant_scheme.py
4545
or another identifier which can be used for the particular test case. If a recipe
4646
is not provided, it is assumed that the scheme provided is a preset scheme and will
4747
be used for quantization. Otherwise, the recipe will always be used if given.
48-
"""
48+
""" # noqa: E501
4949

5050
model = None
5151
scheme = None

0 commit comments

Comments
 (0)