6
6
3. Saves compressed model to destination (local or S3)
7
7
8
8
Usage:
9
- python compress_checkpoint.py <source_path> <destination_path>
9
+ python compress_checkpoint.py <source_path> <destination_path> [--recipe <recipe_path>]
10
10
11
11
source_path: Path to checkpoint (local or S3)
12
12
destination_path: Where to save compressed checkpoint (local or S3)
13
+ recipe_path: Optional path to quantization config YAML file
13
14
"""
14
15
15
16
import argparse
22
23
import boto3
23
24
import torch
24
25
from llmcompressor import oneshot
25
- from llmcompressor .modifiers .quantization import QuantizationModifier
26
26
from transformers import AutoTokenizer , Qwen2VLForConditionalGeneration , Qwen2_5_VLForConditionalGeneration
27
27
28
28
from olmocr .s3_utils import parse_s3_path
@@ -150,7 +150,7 @@ def copy_additional_files(source_path: str, dest_path: str, temp_source_dir: Opt
150
150
shutil .copy2 (source_file , dest_file )
151
151
152
152
153
- def compress_checkpoint (source_path : str , dest_path : str ) -> None :
153
+ def compress_checkpoint (source_path : str , dest_path : str , recipe_path : str ) -> None :
154
154
"""Compress OlmOCR checkpoint using FP8 quantization."""
155
155
# Load model and tokenizer
156
156
model , tokenizer , temp_source_dir = load_model_and_tokenizer (source_path )
@@ -162,16 +162,9 @@ def compress_checkpoint(source_path: str, dest_path: str) -> None:
162
162
print (f"{ name } : shape={ list (param .shape )} , dtype={ param .dtype } " )
163
163
print ("=========================\n " )
164
164
165
- # Configure FP8 dynamic quantization
166
- print ("\n Applying FP8 dynamic quantization..." )
167
- recipe = QuantizationModifier (
168
- targets = "Linear" ,
169
- scheme = "FP8_DYNAMIC" ,
170
- ignore = ["re:.*lm_head" , "re:visual.*" ],
171
- )
172
-
173
- # Apply the quantization
174
- oneshot (model = model , recipe = recipe )
165
+ # Apply quantization using provided recipe
166
+ print (f"\n Applying quantization using recipe: { recipe_path } " )
167
+ oneshot (model = model , recipe = recipe_path )
175
168
print ("✓ Quantization completed successfully" )
176
169
177
170
# Save the compressed model
@@ -218,25 +211,26 @@ def main():
218
211
epilog = """
219
212
Examples:
220
213
# Local to local
221
- python compress_checkpoint.py /path/to/checkpoint /path/to/compressed
214
+ python compress_checkpoint.py /path/to/checkpoint /path/to/compressed --recipe train/quantization_configs/qwen2_5vl_w8a8_fp8.yaml
222
215
223
216
# S3 to S3
224
- python compress_checkpoint.py s3://bucket/checkpoint s3://bucket/compressed
217
+ python compress_checkpoint.py s3://bucket/checkpoint s3://bucket/compressed --recipe train/quantization_configs/qwen2vl_w8a8_fp8.yaml
225
218
226
219
# S3 to local
227
- python compress_checkpoint.py s3://bucket/checkpoint /path/to/compressed
220
+ python compress_checkpoint.py s3://bucket/checkpoint /path/to/compressed --recipe train/quantization_configs/qwen2_5vl_w8a8_fp8.yaml
228
221
229
222
# Local to S3
230
- python compress_checkpoint.py /path/to/checkpoint s3://bucket/compressed
223
+ python compress_checkpoint.py /path/to/checkpoint s3://bucket/compressed --recipe train/quantization_configs/qwen2vl_w8a8_fp8.yaml
231
224
"""
232
225
)
233
226
parser .add_argument ("source" , help = "Source checkpoint path (local or S3)" )
234
227
parser .add_argument ("destination" , help = "Destination path for compressed checkpoint (local or S3)" )
228
+ parser .add_argument ("--recipe" , required = True , help = "Path to quantization recipe YAML file" )
235
229
236
230
args = parser .parse_args ()
237
231
238
232
try :
239
- compress_checkpoint (args .source , args .destination )
233
+ compress_checkpoint (args .source , args .destination , args . recipe )
240
234
except Exception as e :
241
235
print (f"\n ❌ Error: { e } " )
242
236
return 1
0 commit comments