Skip to content

Commit 3ee99bd

Browse files
author
Sara Adkins
authored
DeepSeek: Fix Hessian Estimation (#157)
* loop through all decoder layers to figure out hessians * remove print
1 parent f8e0ef6 commit 3ee99bd

File tree

2 files changed

+19
-15
lines changed
  • src/llmcompressor/transformers/compression
  • tests/llmcompressor/transformers/compression/run_compressed_configs

2 files changed

+19
-15
lines changed

src/llmcompressor/transformers/compression/helpers.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -107,21 +107,25 @@ def hessian_memory_requirements(model: torch.nn.Module) -> int:
107107
:return: number of bytes required to reserve for GPTQ on a single layer
108108
"""
109109
transformer_layers = get_layers(get_no_split_params(model), model)
110-
single_layer = transformer_layers[list(transformer_layers.keys())[0]]
111-
total_hessian_elems = 0
112-
max_column_size = 0
113-
for _, module in single_layer.named_modules():
114-
if isinstance(module, Linear):
115-
for param in module.parameters():
116-
column_size = param.shape[1]
117-
total_hessian_elems += column_size * column_size
118-
if column_size > max_column_size:
119-
# max extra memory for inverse calculation
120-
max_column_size = column_size
121-
110+
total_hessian_elems = {}
111+
max_column_size = {}
112+
for no_split_name, no_split_layer in transformer_layers.items():
113+
total_hessian_elems[no_split_name] = 0
114+
max_column_size[no_split_name] = 0
115+
for name, module in no_split_layer.named_modules():
116+
if isinstance(module, Linear):
117+
for param in module.parameters():
118+
column_size = param.shape[1]
119+
total_hessian_elems[no_split_name] += column_size * column_size
120+
if column_size > max_column_size[no_split_name]:
121+
# max extra memory for inverse calculation
122+
max_column_size[no_split_name] = column_size
123+
124+
max_total_hessian_elems = max(total_hessian_elems.values())
125+
overall_max_column_size = max(max_column_size.values())
122126
bytes_per_weight = 32 // 8 # hessians are float32
123-
inverse_reserved = max_column_size * max_column_size
124-
return (total_hessian_elems + inverse_reserved) * bytes_per_weight
127+
inverse_reserved = overall_max_column_size * overall_max_column_size
128+
return (max_total_hessian_elems + inverse_reserved) * bytes_per_weight
125129

126130

127131
def quantization_memory_requirement(model: torch.nn.Module) -> int:
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
cadence: "commit"
22
test_type: "regression"
3-
model_stub: "nm-testing/tinyllama-w4a16-compressed"
3+
model_stub: "nm-testing/tinyllama-w8a8-compressed"

0 commit comments

Comments
 (0)