Skip to content

Commit dfaacea

Browse files
committed
Merge branch 'hz-fix-example-quantization-2of4' of https://github.com/vllm-project/llm-compressor into hz-fix-example-quantization-2of4
2 parents ad3f2db + f1744d7 commit dfaacea

File tree

3 files changed

+11
-4
lines changed

3 files changed

+11
-4
lines changed

src/llmcompressor/transformers/finetune/session_mixin.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ def compute_loss(
270270
model: Module,
271271
inputs: Dict[str, Any],
272272
return_outputs: bool = False,
273-
num_items_in_batch: Optional[int] = None,
273+
num_items_in_batch: Optional[torch.Tensor] = None,
274274
) -> Union[torch.Tensor, Tuple[torch.Tensor, Any]]:
275275
"""
276276
Override for the compute_loss to factor trigger callbacks and filter columns
@@ -279,6 +279,7 @@ def compute_loss(
279279
:param inputs: the inputs to pass through the model for calculating the loss
280280
:param return_outputs: True to return the outputs with the loss,
281281
False otherwise
282+
:param num_items_in_batch: the number of items which contribute to loss
282283
:return: the resulting loss if not return_outputs, otherwise a tuple
283284
containing the loss and the model's outputs
284285
"""

src/llmcompressor/utils/dev.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,17 @@ def dispatch_for_generation(model: PreTrainedModel) -> PreTrainedModel:
126126
"""
127127
remove_dispatch(model)
128128

129+
no_split_module_classes = model._get_no_split_modules("auto")
129130
max_memory = get_balanced_memory(
130131
model,
131132
dtype=model.dtype,
132-
no_split_module_classes=model._get_no_split_modules("auto"),
133+
no_split_module_classes=no_split_module_classes,
134+
)
135+
device_map = infer_auto_device_map(
136+
model,
137+
dtype=model.dtype,
138+
max_memory=max_memory,
139+
no_split_module_classes=no_split_module_classes,
133140
)
134-
device_map = infer_auto_device_map(model, dtype=model.dtype, max_memory=max_memory)
135141

136142
return dispatch_model(model, device_map=device_map)

tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune_with_tokenizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def test_oneshot_and_finetune_with_tokenizer(self):
3636
self.model,
3737
)
3838
model_loaded = AutoModelForCausalLM.from_pretrained(
39-
self.model, device_map="cuda:0", torch_dtype="auto"
39+
self.model, torch_dtype="auto"
4040
)
4141

4242
dataset_loaded = load_dataset(

0 commit comments

Comments
 (0)