Skip to content

Commit d509a44

Browse files
committed
[aot] Fix aot quantization for weight only quantization (#2079)
(cherry picked from commit 00f7412)
1 parent 5d1477a commit d509a44

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

serving/docker/partition/partition.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535

3636
ALLOW_PATTERNS = ["*.json", "*.pt", "*.bin", "*.txt"]
3737

38+
WEIGHT_ONLY_QUANTIZATION_TYPES = ["static_int8"]
39+
3840

3941
class PartitionService(object):
4042

@@ -326,7 +328,9 @@ def main():
326328
extract_python_jar(PYTHON_CACHE_DIR)
327329

328330
service = PartitionService(properties_manager)
329-
if properties_manager.properties.get('option.quantize'):
331+
if properties_manager.properties.get(
332+
'option.quantize') and properties_manager.properties.get(
333+
'option.quantize') not in WEIGHT_ONLY_QUANTIZATION_TYPES:
330334
service.run_quantization()
331335
else:
332336
service.run_partition()

0 commit comments

Comments
 (0)