Fix 2of4 Apply Example (#1181)

dsikka · web-flow · commit f326cd4209af · 2025-02-25T01:57:09.000Z
Summary
- Fix typo
- Fix info log at the end of the script to no longer be cut off
diff --git a/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py b/examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py
@@ -54,8 +54,8 @@
     warmup_ratio=warmup_ratio,
 )
 logger.info(
-    "Note: llcompressor does not currently support running ",
-    "compressed models in the marlin-24 format. The model ",
-    "produced from this example can be run on vLLM with ",
-    "dtype=torch.float16",
+    "llmcompressor does not currently support running compressed models in the marlin24 format."  # noqa
+)
+logger.info(
+    "The model produced from this example can be run on vLLM with dtype=torch.float16"
 )

Original file line number	Diff line number	Diff line change
`@@ -54,8 +54,8 @@`
`54`	`54`	`warmup_ratio=warmup_ratio,`
`55`	`55`	`)`
`56`	`56`	`logger.info(`
`57`		`- "Note: llcompressor does not currently support running ",`
`58`		`- "compressed models in the marlin-24 format. The model ",`
`59`		`- "produced from this example can be run on vLLM with ",`
`60`		`- "dtype=torch.float16",`
	`57`	`+ "llmcompressor does not currently support running compressed models in the marlin24 format." # noqa`
	`58`	`+)`
	`59`	`+logger.info(`
	`60`	`+ "The model produced from this example can be run on vLLM with dtype=torch.float16"`
`61`	`61`	`)`