intel · yiliu30 · Jul 11, 2024 · Jul 9, 2024 · Jul 9, 2024 · Jul 10, 2024
diff --git a/neural_compressor/common/utils/utility.py b/neural_compressor/common/utils/utility.py
@@ -39,6 +39,7 @@
     "CpuInfo",
     "default_tuning_logger",
     "call_counter",
+    "cpu_info",
 ]
 
 
@@ -89,7 +90,7 @@ def __call__(self, *args, **kwargs):
 
 @singleton
 class CpuInfo(object):
-    """CPU info collection."""
+    """Get CPU Info."""
 
     def __init__(self):
         """Get whether the cpu numerical format is bf16, the number of sockets, cores and cores per socket."""
@@ -110,14 +111,28 @@ def __init__(self):
                     b"\xB8\x07\x00\x00\x00" b"\x0f\xa2" b"\xC3",  # mov eax, 7  # cpuid  # ret
                 )
                 self._bf16 = bool(eax & (1 << 5))
-        # TODO: The implementation will be refined in the future.
-        # https://github.com/intel/neural-compressor/tree/detect_sockets
-        if "arch" in info and "ARM" in info["arch"]:  # pragma: no cover
-            self._sockets = 1
-        else:
-            self._sockets = self.get_number_of_sockets()
-        self._cores = psutil.cpu_count(logical=False)
-        self._cores_per_socket = int(self._cores / self._sockets)
+        self._info = info
+        # detect the below info when needed
+        self._cores = None
+        self._sockets = None
+        self._cores_per_socket = None
+
+    @staticmethod
+    def _detect_cores():
+        physical_cores = psutil.cpu_count(logical=False)
+        return physical_cores
+
+    @property
+    def cores(self):
+        """Get the number of cores in platform."""
+        if self._cores is None:
+            self._cores = self._detect_cores()
+        return self._cores
+
+    @cores.setter
+    def cores(self, num_of_cores):
+        """Set the number of cores in platform."""
+        self._cores = num_of_cores
 
     @property
     def bf16(self):
@@ -130,30 +145,58 @@ def vnni(self):
         return self._vnni
 
     @property
-    def cores_per_socket(self):
+    def cores_per_socket(self) -> int:
         """Get the cores per socket."""
+        if self._cores_per_socket is None:
+            self._cores_per_socket = self.cores // self.sockets
         return self._cores_per_socket
 
-    def get_number_of_sockets(self) -> int:
-        """Get number of sockets in platform."""
+    @property
+    def sockets(self):
+        """Get the number of sockets in platform."""
+        if self._sockets is None:
+            self._sockets = self._get_number_of_sockets()
+        return self._sockets
+
+    @sockets.setter
+    def sockets(self, num_of_sockets):
+        """Set the number of sockets in platform."""
+        self._sockets = num_of_sockets
+
+    def _get_number_of_sockets(self) -> int:
+        if "arch" in self._info and "ARM" in self._info["arch"]:  # pragma: no cover
+            return 1
+
+        num_sockets = None
         cmd = "cat /proc/cpuinfo | grep 'physical id' | sort -u | wc -l"
         if psutil.WINDOWS:
             cmd = r'wmic cpu get DeviceID | C:\Windows\System32\find.exe /C "CPU"'
         elif psutil.MACOS:  # pragma: no cover
             cmd = "sysctl -n machdep.cpu.core_count"
 
-        with subprocess.Popen(
-            args=cmd,
-            shell=True,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            universal_newlines=False,
-        ) as proc:
-            proc.wait()
-            if proc.stdout:
-                for line in proc.stdout:
-                    return int(line.decode("utf-8", errors="ignore").strip())
-        return 0
+        num_sockets = None
+        try:
+            with subprocess.Popen(
+                args=cmd,
+                shell=True,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                universal_newlines=False,
+            ) as proc:
+                proc.wait()
+                if proc.stdout:
+                    for line in proc.stdout:
+                        num_sockets = int(line.decode("utf-8", errors="ignore").strip())
+        except Exception as e:
+            logger.error("Failed to get number of sockets: %s" % e)
+        if isinstance(num_sockets, int) and num_sockets >= 1:
+            return num_sockets
+        else:
+            logger.warning("Failed to get number of sockets, return 1 as default.")
+            return 1
+
+
+cpu_info = CpuInfo()
 
 
 def dump_elapsed_time(customized_msg=""):

diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py
@@ -23,6 +23,7 @@
 
 import torch
 
+import neural_compressor.torch.utils as torch_utils
 from neural_compressor.common.base_config import (
     BaseConfig,
     config_registry,
@@ -219,14 +220,17 @@ def get_config_set_for_tuning(cls) -> Union[None, "RTNConfig", List["RTNConfig"]
             dtype=["int4", "nf4"], use_sym=[True, False], group_size=[32, 128], use_mse_search=[False, True]
         )
 
+    @classmethod
+    def get_predefined_configs(cls) -> Dict[torch_utils.ProcessorType, "RTNConfig"]:
+        pre_defined_configs: Dict[torch_utils.ProcessorType, RTNConfig] = {}
+        pre_defined_configs[torch_utils.ProcessorType.Client] = cls(use_layer_wise=True)
+        pre_defined_configs[torch_utils.ProcessorType.Server] = cls()
+        return pre_defined_configs
 
-def get_default_rtn_config() -> RTNConfig:
-    """Generate the default rtn config.
 
-    Returns:
-        the default rtn config.
-    """
-    return RTNConfig()
+def get_default_rtn_config(processor_type: Optional[Union[str, torch_utils.ProcessorType]] = None) -> RTNConfig:
+    process_type = torch_utils.get_processor_type_from_user_config(processor_type)
+    return RTNConfig.get_predefined_configs()[process_type]
 
 
 def get_default_double_quant_config(type="BNB_NF4"):
@@ -378,14 +382,17 @@ def get_config_set_for_tuning(cls) -> Union[None, "GPTQConfig", List["GPTQConfig
         # TODO fwk owner needs to update it.
         return GPTQConfig(act_order=[True, False], use_sym=[False, True])
 
+    @classmethod
+    def get_predefined_configs(cls) -> Dict[torch_utils.ProcessorType, "GPTQConfig"]:
+        pre_defined_configs: Dict[torch_utils.ProcessorType, GPTQConfig] = {}
+        pre_defined_configs[torch_utils.ProcessorType.Client] = cls(use_layer_wise=True)
+        pre_defined_configs[torch_utils.ProcessorType.Server] = cls()
+        return pre_defined_configs
 
-def get_default_gptq_config() -> GPTQConfig:
-    """Generate the default gptq config.
 
-    Returns:
-        the default gptq config.
-    """
-    return GPTQConfig()
+def get_default_gptq_config(processor_type: Optional[Union[str, torch_utils.ProcessorType]] = None) -> RTNConfig:
+    process_type = torch_utils.get_processor_type_from_user_config(processor_type)
+    return GPTQConfig.get_predefined_configs()[process_type]
 
 
 ######################## AWQ Config ###############################

diff --git a/neural_compressor/torch/utils/utility.py b/neural_compressor/torch/utils/utility.py
@@ -13,13 +13,15 @@
 # limitations under the License.
 
 
-from typing import Callable, Dict, List, Tuple, Union
+import enum
+from typing import Callable, Dict, List, Optional, Tuple, Union
 
+import psutil
 import torch
 from prettytable import PrettyTable
 from typing_extensions import TypeAlias
 
-from neural_compressor.common.utils import LazyImport, Mode, logger
+from neural_compressor.common.utils import LazyImport, Mode, cpu_info, logger
 
 OP_NAME_AND_TYPE_TUPLE_TYPE: TypeAlias = Tuple[str, Union[torch.nn.Module, Callable]]
 
@@ -278,3 +280,61 @@ def get_model_device(model: torch.nn.Module):
     """
     for n, p in model.named_parameters():
         return p.data.device.type  # p.data.device == device(type='cpu')
+
+
+class ProcessorType(enum.Enum):
+    Client = "Client"
+    Server = "Server"
+
+
+def detect_processor_type_based_on_hw():
+    """Detects the processor type based on the hardware configuration.
+
+    Returns:
+        ProcessorType: The detected processor type (Server or Client).
+    """
+    # Detect the processor type based on below conditions:
+    #   1. If there are more than one sockets, it is a server.
+    #   2. If the memory size is greater than 64GB, it is a server.
+    log_mgs = "Processor type detected as {processor_type} due to {reason}."
+    if cpu_info.sockets > 1:
+        logger.info(log_mgs.format(processor_type=ProcessorType.Server.value, reason="there are more than one sockets"))
+        return ProcessorType.Server
+    elif psutil.virtual_memory().total / (1024**3) > 64:
+        logger.info(
+            log_mgs.format(processor_type=ProcessorType.Server.value, reason="the memory size is greater than 64GB")
+        )
+        return ProcessorType.Server
+    else:
+        logger.info(
+            f"Processor type detected as {ProcessorType.Client.value}, pass `processor_type='server'` to override it if needed."
+        )
+        return ProcessorType.Client
+
+
+def get_processor_type_from_user_config(user_processor_type: Optional[Union[str, ProcessorType]] = None):
+    """Get the processor type.
+
+    Get the processor type based on the user configuration or automatically detect it based on the hardware.
+
+    Args:
+        user_processor_type (Optional[Union[str, ProcessorType]]): The user-specified processor type. Defaults to None.
+
+    Returns:
+        ProcessorType: The detected or user-specified processor type.
+
+    Raises:
+        AssertionError: If the user-specified processor type is not supported.
+        NotImplementedError: If the processor type is not recognized.
+    """
+    if user_processor_type is None:
+        processor_type = detect_processor_type_based_on_hw()
+    elif isinstance(user_processor_type, ProcessorType):
+        processor_type = user_processor_type
+    elif isinstance(user_processor_type, str):
+        user_processor_type = user_processor_type.lower().capitalize()
+        assert user_processor_type in ProcessorType.__members__, f"Unsupported processor type: {user_processor_type}"
+        processor_type = ProcessorType(user_processor_type)
+    else:
+        raise NotImplementedError(f"Unsupported processor type: {user_processor_type}")
+    return processor_type
diff --git a/test/3x/torch/test_config.py b/test/3x/torch/test_config.py
@@ -1,9 +1,11 @@
 import copy
 import unittest
 
+import pytest
 import torch
 import transformers
 
+import neural_compressor.torch.utils as torch_utils
 from neural_compressor.torch.quantization import (
     AutoRoundConfig,
     AWQConfig,
@@ -13,6 +15,7 @@
     SmoothQuantConfig,
     StaticQuantConfig,
     TEQConfig,
+    get_default_gptq_config,
     get_default_hqq_config,
     get_default_rtn_config,
     quantize,
@@ -331,15 +334,55 @@ def test_hqq_config(self):
         self.assertEqual(hqq_config.to_dict(), hqq_config2.to_dict())
 
 
-class TestQuantConfigForAutotune(unittest.TestCase):
-    def test_expand_config(self):
-        # test the expand functionalities, the user is not aware it
-
-        tune_config = RTNConfig(bits=[4, 6])
-        expand_config_list = RTNConfig.expand(tune_config)
-        self.assertEqual(expand_config_list[0].bits, 4)
-        self.assertEqual(expand_config_list[1].bits, 6)
-
-
-if __name__ == "__main__":
-    unittest.main()
+class TestQuantConfigBasedonProcessorType:
+
+    @pytest.mark.parametrize("config_cls", [RTNConfig, GPTQConfig])
+    def test_get_config_based_on_processor_type(self, config_cls):
+        config_for_client = config_cls.get_predefined_configs()[torch_utils.ProcessorType.Client]
+        assert (
+            config_for_client.use_layer_wise
+        ), f"Expect use_layer_wise to be True, got {config_for_client.use_layer_wise}"
+
+        config_for_server = config_cls.get_predefined_configs()[torch_utils.ProcessorType.Server]
+        assert (
+            config_for_server.use_layer_wise is False
+        ), f"Expect use_layer_wise to be False, got {config_for_server.use_layer_wise}"
+
+    @pytest.fixture
+    def force_client(self, monkeypatch):
+        monkeypatch.setattr(torch_utils.utility.cpu_info, "sockets", 1)
+
+        # force the ram size detected by psutil <= 64GB
+        class MockMemory:
+            def __init__(self, total):
+                self.total = total
+
+        # Patch the psutil.virtual_memory() method
+        monkeypatch.setattr(torch_utils.utility.psutil, "virtual_memory", lambda: MockMemory(16 * 1024**3))
+
+    def test_auto_detect_processor_type(self, force_client):
+        p_type = torch_utils.detect_processor_type_based_on_hw()
+        assert (
+            p_type == torch_utils.ProcessorType.Client
+        ), f"Expect processor type to be {torch_utils.ProcessorType.Client}, got {p_type}"
+
+    @pytest.fixture
+    def force_server(self, monkeypatch):
+        monkeypatch.setattr(torch_utils.utility.cpu_info, "sockets", 2)
+
+    def test_get_default_config_force_server(self, force_server):
+        rtn_config = get_default_rtn_config()
+        assert not rtn_config.use_layer_wise, f"Expect use_layer_wise to be `False`, got {rtn_config.use_layer_wise}"
+        gptq_config = get_default_gptq_config()
+        assert not gptq_config.use_layer_wise, f"Expect use_layer_wise to be `False`, got {gptq_config.use_layer_wise}"
+
+    @pytest.mark.parametrize("p_type", [None, torch_utils.ProcessorType.Client, torch_utils.ProcessorType.Server])
+    def test_get_default_config(self, p_type):
+        rtn_config = get_default_rtn_config(processor_type=p_type)
+        assert rtn_config.use_layer_wise == (
+            p_type == torch_utils.ProcessorType.Client
+        ), f"Expect use_layer_wise to be {p_type == torch_utils.ProcessorType.Client}, got {rtn_config.use_layer_wise}"
+        gptq_config = get_default_gptq_config(processor_type=p_type)
+        assert gptq_config.use_layer_wise == (
+            p_type == torch_utils.ProcessorType.Client
+        ), f"Expect use_layer_wise to be {p_type == torch_utils.ProcessorType.Client}, got {gptq_config.use_layer_wise}"