Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 67 additions & 24 deletions neural_compressor/common/utils/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
"CpuInfo",
"default_tuning_logger",
"call_counter",
"cpu_info",
]


Expand Down Expand Up @@ -89,7 +90,7 @@ def __call__(self, *args, **kwargs):

@singleton
class CpuInfo(object):
"""CPU info collection."""
"""Get CPU Info."""

def __init__(self):
"""Get whether the cpu numerical format is bf16, the number of sockets, cores and cores per socket."""
Expand All @@ -110,14 +111,28 @@ def __init__(self):
b"\xB8\x07\x00\x00\x00" b"\x0f\xa2" b"\xC3", # mov eax, 7 # cpuid # ret
)
self._bf16 = bool(eax & (1 << 5))
# TODO: The implementation will be refined in the future.
# https://github.com/intel/neural-compressor/tree/detect_sockets
if "arch" in info and "ARM" in info["arch"]: # pragma: no cover
self._sockets = 1
else:
self._sockets = self.get_number_of_sockets()
self._cores = psutil.cpu_count(logical=False)
self._cores_per_socket = int(self._cores / self._sockets)
self._info = info
# detect the below info when needed
self._cores = None
self._sockets = None
self._cores_per_socket = None

@staticmethod
def _detect_cores():
physical_cores = psutil.cpu_count(logical=False)
return physical_cores

@property
def cores(self):
"""Get the number of cores in platform."""
if self._cores is None:
self._cores = self._detect_cores()
return self._cores

@cores.setter
def cores(self, num_of_cores):
"""Set the number of cores in platform."""
self._cores = num_of_cores

@property
def bf16(self):
Expand All @@ -130,30 +145,58 @@ def vnni(self):
return self._vnni

@property
def cores_per_socket(self):
def cores_per_socket(self) -> int:
"""Get the cores per socket."""
if self._cores_per_socket is None:
self._cores_per_socket = self.cores // self.sockets
return self._cores_per_socket

def get_number_of_sockets(self) -> int:
"""Get number of sockets in platform."""
@property
def sockets(self):
"""Get the number of sockets in platform."""
if self._sockets is None:
self._sockets = self._get_number_of_sockets()
return self._sockets

@sockets.setter
def sockets(self, num_of_sockets):
"""Set the number of sockets in platform."""
self._sockets = num_of_sockets

def _get_number_of_sockets(self) -> int:
if "arch" in self._info and "ARM" in self._info["arch"]: # pragma: no cover
return 1

num_sockets = None
cmd = "cat /proc/cpuinfo | grep 'physical id' | sort -u | wc -l"
if psutil.WINDOWS:
cmd = r'wmic cpu get DeviceID | C:\Windows\System32\find.exe /C "CPU"'
elif psutil.MACOS: # pragma: no cover
cmd = "sysctl -n machdep.cpu.core_count"

with subprocess.Popen(
args=cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=False,
) as proc:
proc.wait()
if proc.stdout:
for line in proc.stdout:
return int(line.decode("utf-8", errors="ignore").strip())
return 0
num_sockets = None
try:
with subprocess.Popen(
args=cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
universal_newlines=False,
) as proc:
proc.wait()
if proc.stdout:
for line in proc.stdout:
num_sockets = int(line.decode("utf-8", errors="ignore").strip())
except Exception as e:
logger.error("Failed to get number of sockets: %s" % e)
if isinstance(num_sockets, int) and num_sockets >= 1:
return num_sockets
else:
logger.warning("Failed to get number of sockets, return 1 as default.")
return 1


cpu_info = CpuInfo()


def dump_elapsed_time(customized_msg=""):
Expand Down
31 changes: 19 additions & 12 deletions neural_compressor/torch/quantization/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import torch

import neural_compressor.torch.utils as torch_utils
from neural_compressor.common.base_config import (
BaseConfig,
config_registry,
Expand Down Expand Up @@ -219,14 +220,17 @@ def get_config_set_for_tuning(cls) -> Union[None, "RTNConfig", List["RTNConfig"]
dtype=["int4", "nf4"], use_sym=[True, False], group_size=[32, 128], use_mse_search=[False, True]
)

@classmethod
def get_predefined_configs(cls) -> Dict[torch_utils.ProcessorType, "RTNConfig"]:
pre_defined_configs: Dict[torch_utils.ProcessorType, RTNConfig] = {}
pre_defined_configs[torch_utils.ProcessorType.Client] = cls(use_layer_wise=True)
pre_defined_configs[torch_utils.ProcessorType.Server] = cls()
return pre_defined_configs

def get_default_rtn_config() -> RTNConfig:
"""Generate the default rtn config.

Returns:
the default rtn config.
"""
return RTNConfig()
def get_default_rtn_config(processor_type: Optional[Union[str, torch_utils.ProcessorType]] = None) -> RTNConfig:
process_type = torch_utils.get_processor_type_from_user_config(processor_type)
return RTNConfig.get_predefined_configs()[process_type]


def get_default_double_quant_config(type="BNB_NF4"):
Expand Down Expand Up @@ -378,14 +382,17 @@ def get_config_set_for_tuning(cls) -> Union[None, "GPTQConfig", List["GPTQConfig
# TODO fwk owner needs to update it.
return GPTQConfig(act_order=[True, False], use_sym=[False, True])

@classmethod
def get_predefined_configs(cls) -> Dict[torch_utils.ProcessorType, "GPTQConfig"]:
pre_defined_configs: Dict[torch_utils.ProcessorType, GPTQConfig] = {}
pre_defined_configs[torch_utils.ProcessorType.Client] = cls(use_layer_wise=True)
pre_defined_configs[torch_utils.ProcessorType.Server] = cls()
return pre_defined_configs

def get_default_gptq_config() -> GPTQConfig:
"""Generate the default gptq config.

Returns:
the default gptq config.
"""
return GPTQConfig()
def get_default_gptq_config(processor_type: Optional[Union[str, torch_utils.ProcessorType]] = None) -> RTNConfig:
process_type = torch_utils.get_processor_type_from_user_config(processor_type)
return GPTQConfig.get_predefined_configs()[process_type]


######################## AWQ Config ###############################
Expand Down
64 changes: 62 additions & 2 deletions neural_compressor/torch/utils/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,15 @@
# limitations under the License.


from typing import Callable, Dict, List, Tuple, Union
import enum
from typing import Callable, Dict, List, Optional, Tuple, Union

import psutil
import torch
from prettytable import PrettyTable
from typing_extensions import TypeAlias

from neural_compressor.common.utils import LazyImport, Mode, logger
from neural_compressor.common.utils import LazyImport, Mode, cpu_info, logger

OP_NAME_AND_TYPE_TUPLE_TYPE: TypeAlias = Tuple[str, Union[torch.nn.Module, Callable]]

Expand Down Expand Up @@ -278,3 +280,61 @@ def get_model_device(model: torch.nn.Module):
"""
for n, p in model.named_parameters():
return p.data.device.type # p.data.device == device(type='cpu')


class ProcessorType(enum.Enum):
Client = "Client"
Server = "Server"


def detect_processor_type_based_on_hw():
"""Detects the processor type based on the hardware configuration.

Returns:
ProcessorType: The detected processor type (Server or Client).
"""
# Detect the processor type based on below conditions:
# 1. If there are more than one sockets, it is a server.
# 2. If the memory size is greater than 64GB, it is a server.
log_mgs = "Processor type detected as {processor_type} due to {reason}."
if cpu_info.sockets > 1:
logger.info(log_mgs.format(processor_type=ProcessorType.Server.value, reason="there are more than one sockets"))
return ProcessorType.Server
elif psutil.virtual_memory().total / (1024**3) > 64:
logger.info(
log_mgs.format(processor_type=ProcessorType.Server.value, reason="the memory size is greater than 64GB")
)
return ProcessorType.Server
else:
logger.info(
f"Processor type detected as {ProcessorType.Client.value}, pass `processor_type='server'` to override it if needed."
)
return ProcessorType.Client


def get_processor_type_from_user_config(user_processor_type: Optional[Union[str, ProcessorType]] = None):
"""Get the processor type.

Get the processor type based on the user configuration or automatically detect it based on the hardware.

Args:
user_processor_type (Optional[Union[str, ProcessorType]]): The user-specified processor type. Defaults to None.

Returns:
ProcessorType: The detected or user-specified processor type.

Raises:
AssertionError: If the user-specified processor type is not supported.
NotImplementedError: If the processor type is not recognized.
"""
if user_processor_type is None:
processor_type = detect_processor_type_based_on_hw()
elif isinstance(user_processor_type, ProcessorType):
processor_type = user_processor_type
elif isinstance(user_processor_type, str):
user_processor_type = user_processor_type.lower().capitalize()
assert user_processor_type in ProcessorType.__members__, f"Unsupported processor type: {user_processor_type}"
processor_type = ProcessorType(user_processor_type)
else:
raise NotImplementedError(f"Unsupported processor type: {user_processor_type}")
return processor_type
67 changes: 55 additions & 12 deletions test/3x/torch/test_config.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import copy
import unittest

import pytest
import torch
import transformers

import neural_compressor.torch.utils as torch_utils
from neural_compressor.torch.quantization import (
AutoRoundConfig,
AWQConfig,
Expand All @@ -13,6 +15,7 @@
SmoothQuantConfig,
StaticQuantConfig,
TEQConfig,
get_default_gptq_config,
get_default_hqq_config,
get_default_rtn_config,
quantize,
Expand Down Expand Up @@ -331,15 +334,55 @@ def test_hqq_config(self):
self.assertEqual(hqq_config.to_dict(), hqq_config2.to_dict())


class TestQuantConfigForAutotune(unittest.TestCase):
def test_expand_config(self):
# test the expand functionalities, the user is not aware it

tune_config = RTNConfig(bits=[4, 6])
expand_config_list = RTNConfig.expand(tune_config)
self.assertEqual(expand_config_list[0].bits, 4)
self.assertEqual(expand_config_list[1].bits, 6)


if __name__ == "__main__":
unittest.main()
class TestQuantConfigBasedonProcessorType:

@pytest.mark.parametrize("config_cls", [RTNConfig, GPTQConfig])
def test_get_config_based_on_processor_type(self, config_cls):
config_for_client = config_cls.get_predefined_configs()[torch_utils.ProcessorType.Client]
assert (
config_for_client.use_layer_wise
), f"Expect use_layer_wise to be True, got {config_for_client.use_layer_wise}"

config_for_server = config_cls.get_predefined_configs()[torch_utils.ProcessorType.Server]
assert (
config_for_server.use_layer_wise is False
), f"Expect use_layer_wise to be False, got {config_for_server.use_layer_wise}"

@pytest.fixture
def force_client(self, monkeypatch):
monkeypatch.setattr(torch_utils.utility.cpu_info, "sockets", 1)

# force the ram size detected by psutil <= 64GB
class MockMemory:
def __init__(self, total):
self.total = total

# Patch the psutil.virtual_memory() method
monkeypatch.setattr(torch_utils.utility.psutil, "virtual_memory", lambda: MockMemory(16 * 1024**3))

def test_auto_detect_processor_type(self, force_client):
p_type = torch_utils.detect_processor_type_based_on_hw()
assert (
p_type == torch_utils.ProcessorType.Client
), f"Expect processor type to be {torch_utils.ProcessorType.Client}, got {p_type}"

@pytest.fixture
def force_server(self, monkeypatch):
monkeypatch.setattr(torch_utils.utility.cpu_info, "sockets", 2)

def test_get_default_config_force_server(self, force_server):
rtn_config = get_default_rtn_config()
assert not rtn_config.use_layer_wise, f"Expect use_layer_wise to be `False`, got {rtn_config.use_layer_wise}"
gptq_config = get_default_gptq_config()
assert not gptq_config.use_layer_wise, f"Expect use_layer_wise to be `False`, got {gptq_config.use_layer_wise}"

@pytest.mark.parametrize("p_type", [None, torch_utils.ProcessorType.Client, torch_utils.ProcessorType.Server])
def test_get_default_config(self, p_type):
rtn_config = get_default_rtn_config(processor_type=p_type)
assert rtn_config.use_layer_wise == (
p_type == torch_utils.ProcessorType.Client
), f"Expect use_layer_wise to be {p_type == torch_utils.ProcessorType.Client}, got {rtn_config.use_layer_wise}"
gptq_config = get_default_gptq_config(processor_type=p_type)
assert gptq_config.use_layer_wise == (
p_type == torch_utils.ProcessorType.Client
), f"Expect use_layer_wise to be {p_type == torch_utils.ProcessorType.Client}, got {gptq_config.use_layer_wise}"