SwanHubX · Zeyi-Lin · Jun 6, 2025 · Jun 6, 2025 · Jun 6, 2025 · Jun 6, 2025
diff --git a/README.md b/README.md
@@ -400,11 +400,12 @@ SwanLab会对AI训练过程中所使用的**硬件信息**和**资源使用情
 | 苹果SOC | ✅ | ✅ | [apple.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/soc/apple.py) |
 | 寒武纪MLU | ✅ | ✅ | [cambricon.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/mlu/cambricon.py) |
 | 昆仑芯XPU | ✅ | ✅ | [kunlunxin.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/xpu/kunlunxin.py) |
-| 摩尔线程GPU | ✅ | ✅ | [moorethread.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/gpu/moorethread.py) |
+| 摩尔线程GPU | ✅ | ✅ | [moorethreads.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/gpu/moorethreads.py) |
 | 沐曦GPU | ✅ | ✅ | [metax.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/gpu/metax.py) |
-| CPU | ✅ | ✅ | [cpu.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/cpu.py) |
-| 内存 | ✅ | ✅ | [memory.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/memory.py) |
-| 硬盘 | ✅ | ✅ | [disk.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/disk.py) |
+| 海光DCU     | ✅        | ✅        | [hygon.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/dcu/hygon.py) |
+| CPU         | ✅        | ✅        | [cpu.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/cpu.py) |
+| 内存        | ✅        | ✅        | [memory.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/memory.py) |
+| 硬盘        | ✅        | ✅        | [disk.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/disk.py) |
 | 网络 | ✅ | ✅ | [network.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/network.py) |
 
 如果你希望记录其他硬件，欢迎提交Issue与PR！

diff --git a/README_EN.md b/README_EN.md
@@ -388,10 +388,13 @@ SwanLab records the **hardware information** and **resource usage** during AI tr
 | --- | --- | --- | --- |
 | Nvidia GPU | ✅ | ✅ | [nvidia.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/gpu/nvidia.py) |
 | Ascend NPU | ✅ | ✅ | [ascend.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/npu/ascend.py) |
-| Combricon MLU | ✅ | ✅ | [cambricon.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/mlu/cambricon.py) |
-| Kunlunxin XPU | ✅ | ✅ | [kunlunxin.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/xpu/kunlunxin.py) |
-| MooreThreads GPU | ✅ | ✅ | [moorethread.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/gpu/moorethread.py) |
-| CPU | ✅ | ✅ | [cpu.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/cpu.py) |
+| Apple SOC        | ✅                     | ✅                   | [apple.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/soc/apple.py) |
+| Combricon MLU    | ✅                     | ✅                   | [cambricon.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/mlu/cambricon.py) |
+| Kunlunxin XPU    | ✅                     | ✅                   | [kunlunxin.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/xpu/kunlunxin.py) |
+| MooreThreads GPU | ✅ | ✅ | [moorethreads.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/gpu/moorethreads.py) |
+| MetaX GPU | ✅ | ✅ | [metax.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/gpu/metax.py) |
+| Hygon DCU | ✅ | ✅ | [hygon.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/dcu/hygon.py) |
+| CPU              | ✅                     | ✅                   | [cpu.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/cpu.py) |
 | Memory | ✅ | ✅ | [memory.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/memory.py) |
 | Disk | ✅ | ✅ | [disk.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/disk.py) |
 | Network | ✅ | ✅ | [network.py](https://github.com/SwanHubX/SwanLab/blob/main/swanlab/data/run/metadata/hardware/network.py) |

diff --git a/swanlab/data/run/metadata/hardware/dcu/hygon.py b/swanlab/data/run/metadata/hardware/dcu/hygon.py
@@ -196,12 +196,12 @@ def get_memory_usage(self) -> dict:
 
     def get_temperature_usage(self) -> dict:
         """
-        获取指定DCU设备的温度(°C) (默认采集显存温度）
+        获取指定DCU设备的温度(°C) (采集 Junction 核心温度)
         {
           "card0": {
             "Temperature (Sensor edge) (C)": "52.0",
-            "Temperature (Sensor junction) (C)": "51.0",
-            "Temperature (Sensor mem) (C)": "52.0" // collected
+            "Temperature (Sensor junction) (C)": "51.0", // collected
+            "Temperature (Sensor mem) (C)": "52.0"
           }
         }
         """
@@ -221,7 +221,7 @@ def get_temperature_usage(self) -> dict:
                 "value": math.nan,
                 "config": self.per_temp_configs[f"DCU {dcu_id}"],
             }
-            dcu_temp = dcu_info["Temperature (Sensor mem) (C)"]
+            dcu_temp = dcu_info["Temperature (Sensor junction) (C)"]
             temp_infos[dcu_id]["value"] = float(dcu_temp)
         return temp_infos
 

diff --git a/swanlab/data/run/metadata/hardware/mlu/cambricon.py b/swanlab/data/run/metadata/hardware/mlu/cambricon.py
@@ -8,9 +8,10 @@
 import math
 import platform
 import subprocess
-from typing import Tuple, Optional
+from typing import Any, Dict, Optional, Tuple
 
-from ..type import HardwareFuncResult, HardwareInfoList, HardwareConfig, HardwareCollector as H
+from ..type import HardwareCollector as H
+from ..type import HardwareConfig, HardwareFuncResult, HardwareInfoList
 from ..utils import generate_key, random_index
 
 
@@ -22,7 +23,7 @@ def get_cambricon_mlu_info() -> HardwareFuncResult:
     if platform.system() != "Linux":
         return None, None
 
-    info = {"driver": None, "mlu": None}
+    info: Dict[str, Any] = {"driver": None, "mlu": None}
     collector = None
     try:
         driver, mlu_map = map_mlu()
@@ -151,23 +152,22 @@ def get_utilization_usage(self) -> dict:
             mlu_ids.append(mlu_id)
 
         index = 0
-        lines = output.split("\n")
+        lines = output.strip().split("\n")
         for line in lines:
+            line = line.strip()
             if "mlu average" in line.lower():
                 util_infos[mlu_ids[index]] = {
                     "key": self.util_key.format(mlu_index=mlu_ids[index]),
                     "name": f"MLU {mlu_ids[index]} Utilization (%)",
                     "value": math.nan,
                     "config": self.per_util_configs[f"MLU {mlu_ids[index]}"],
                 }
-                line = line.split(":")
                 # 获得此mlu的利用率数值
-                util = line[-1].replace("%", "").strip()
+                util = line.split(":")[-1].replace("%", "").strip()
                 if util.isdigit():
-                    util_infos[mlu_ids[index]]['value'] = float(util)
+                    util_infos[mlu_ids[index]]["value"] = float(util)
                 index += 1
                 continue
-
         return util_infos
 
     def get_memory_usage(self) -> dict:
@@ -188,30 +188,28 @@ def get_memory_usage(self) -> dict:
             mlu_ids.append(mlu_id)
 
         index = 0
-        lines = output.split("\n")
-        for line_index, line in enumerate(lines):
+        lines = output.strip().split("\n")
+        for line_idx, line in enumerate(lines):
             # 如果包含mlu average，则表示该行是mlu的利用率
             if "physical memory usage" in line.lower():
-                if "used" in lines[line_index + 2].lower():
-                    used_line = lines[line_index + 2]
+                if "used" in lines[line_idx + 2].lower():
+                    used_line = lines[line_idx + 2]
                     # 初始化mlu的利用率
                     memory_infos[mlu_ids[index]] = {
                         "key": self.memory_key.format(mlu_index=mlu_ids[index]),
                         "name": f"MLU {mlu_ids[index]} Memory Allocated (%)",
                         "value": math.nan,
                         "config": self.per_memory_configs[f"MLU {mlu_ids[index]}"],
                     }
-                    used_line = used_line.split(":")
                     # 获得此mlu的显存数值（MiB）
-                    memory = used_line[-1].replace("MiB", "").strip()
+                    memory = used_line.split(":")[-1].replace("MiB", "").strip()
                     if memory.isdigit():
                         # 计算mlu显存占用率
-                        memory_infos[mlu_ids[index]]['value'] = (
-                            float(memory) / (self.mlu_map[mlu_ids[index]]['memory'] * 1024) * 100
+                        memory_infos[mlu_ids[index]]["value"] = (
+                            float(memory) / (float(self.mlu_map[mlu_ids[index]]["memory"]) * 1024) * 100
                         )
                     index += 1
                     continue
-
         return memory_infos
 
     def get_temperature_usage(self) -> dict:
@@ -227,23 +225,23 @@ def get_temperature_usage(self) -> dict:
             mlu_ids.append(mlu_id)
 
         index = 0
-        lines = output.split("\n")
-        for line in lines:
-            if "chip" in line.lower():
-                temp_infos[mlu_ids[index]] = {
-                    "key": self.temp_key.format(mlu_index=mlu_ids[index]),
-                    "name": f"MLU {mlu_ids[index]} Temperature (°C)",
-                    "value": math.nan,
-                    "config": self.per_temp_configs[f"MLU {mlu_ids[index]}"],
-                }
-                line = line.split(":")
-                # 获得此mlu的温度数值
-                temp = line[-1].replace("C", "").strip()
-                if temp.isdigit():
-                    temp_infos[mlu_ids[index]]['value'] = float(temp)
-                index += 1
-                continue
-
+        lines = output.strip().split("\n")
+        for line_idx, line in enumerate(lines):
+            if "temperature" in line.lower():
+                if "chip" in lines[line_idx + 2].lower():
+                    temp_line = lines[line_idx + 2]
+                    temp_infos[mlu_ids[index]] = {
+                        "key": self.temp_key.format(mlu_index=mlu_ids[index]),
+                        "name": f"MLU {mlu_ids[index]} Temperature (°C)",
+                        "value": math.nan,
+                        "config": self.per_temp_configs[f"MLU {mlu_ids[index]}"],
+                    }
+                    # 获得此mlu的温度数值
+                    temp = temp_line.split(":")[-1].replace("C", "").strip()
+                    if temp.isdigit():
+                        temp_infos[mlu_ids[index]]["value"] = float(temp)
+                    index += 1
+                    continue
         return temp_infos
 
     def get_power_usage(self) -> dict:
@@ -259,21 +257,21 @@ def get_power_usage(self) -> dict:
             mlu_ids.append(mlu_id)
 
         index = 0
-        lines = output.split("\n")
-        for line in lines:
-            if "usage" in line.lower() and "mlu memory usage" not in line.lower():
-                power_infos[mlu_ids[index]] = {
-                    "key": self.power_key.format(mlu_index=mlu_ids[index]),
-                    "name": f"MLU {mlu_ids[index]} Power (W)",
-                    "value": math.nan,
-                    "config": self.per_power_configs[f"MLU {mlu_ids[index]}"],
-                }
-                line = line.split(":")
-                # 获得此mlu的功耗数值
-                power = line[-1].replace("W", "").strip()
-                if power.isdigit():
-                    power_infos[mlu_ids[index]]['value'] = float(power)
-                index += 1
-                continue
-
+        lines = output.strip().split("\n")
+        for line_idx, line in enumerate(lines):
+            if "power" in line.lower() and line_idx + 1 < len(lines):
+                if "usage" in lines[line_idx + 1].lower():
+                    power_line = lines[line_idx + 1]
+                    power_infos[mlu_ids[index]] = {
+                        "key": self.power_key.format(mlu_index=mlu_ids[index]),
+                        "name": f"MLU {mlu_ids[index]} Power (W)",
+                        "value": math.nan,
+                        "config": self.per_power_configs[f"MLU {mlu_ids[index]}"],
+                    }
+                    # 获得此mlu的功耗数值
+                    power = power_line.split(":")[-1].replace("W", "").strip()
+                    if power.isdigit():
+                        power_infos[mlu_ids[index]]["value"] = float(power)
+                    index += 1
+                    continue
         return power_infos