|
11 | 11 | source_path: Path to checkpoint (local or S3)
|
12 | 12 | destination_path: Where to save compressed checkpoint (local or S3)
|
13 | 13 | recipe_path: Path to quantization config YAML file
|
14 |
| - num_calibration_samples: Number of calibration samples to use (default: 100, set to 0 to disable) |
| 14 | + num_calibration_samples: Number of calibration samples to use (default: 256, set to 0 to disable) |
15 | 15 | calibration_pdfs: Glob pattern for PDF paths to use for calibration (required when num_calibration_samples > 0)
|
16 | 16 | """
|
17 | 17 |
|
@@ -253,7 +253,7 @@ def data_collator(batch):
|
253 | 253 | return {key: torch.tensor(value) for key, value in batch[0].items()}
|
254 | 254 |
|
255 | 255 |
|
256 |
| -def compress_checkpoint(source_path: str, dest_path: str, recipe_path: str, num_calibration_samples: int = 100, calibration_pdfs: Optional[List[str]] = None) -> None: |
| 256 | +def compress_checkpoint(source_path: str, dest_path: str, recipe_path: str, num_calibration_samples: int = 256, calibration_pdfs: Optional[List[str]] = None) -> None: |
257 | 257 | """Compress OlmOCR checkpoint using FP8 quantization."""
|
258 | 258 | # Load model and tokenizer
|
259 | 259 | model, tokenizer, temp_source_dir = load_model_and_tokenizer(source_path)
|
@@ -366,8 +366,8 @@ def main():
|
366 | 366 | parser.add_argument("source", help="Source checkpoint path (local or S3)")
|
367 | 367 | parser.add_argument("destination", help="Destination path for compressed checkpoint (local or S3)")
|
368 | 368 | parser.add_argument("--recipe", required=True, help="Path to quantization recipe YAML file")
|
369 |
| - parser.add_argument("--num-calibration-samples", type=int, default=100, |
370 |
| - help="Number of calibration samples to use (default: 100, set to 0 to disable)") |
| 369 | + parser.add_argument("--num-calibration-samples", type=int, default=256, |
| 370 | + help="Number of calibration samples to use (default: 256s, set to 0 to disable)") |
371 | 371 | parser.add_argument("--calibration-pdfs", type=str, default=None,
|
372 | 372 | help="Glob pattern for calibration PDF paths (e.g., '/path/to/pdfs/*.pdf' or '/data/**/*.pdf'). Required when num-calibration-samples > 0.")
|
373 | 373 |
|
|
0 commit comments