Added support for safetensors to inspect command

engelmi · engelmi · commit 10d468e3bcc6 · 2025-07-07T15:06:31.000+02:00
Signed-off-by: Michael Engel &lt;mengel@redhat.com&gt;
diff --git a/ramalama/model.py b/ramalama/model.py
@@ -27,10 +27,13 @@
 from ramalama.config import CONFIG, DEFAULT_PORT, DEFAULT_PORT_RANGE
 from ramalama.console import should_colorize
 from ramalama.engine import Engine, dry_run
-from ramalama.gguf_parser import GGUFInfoParser
 from ramalama.kube import Kube
 from ramalama.logger import logger
-from ramalama.model_inspect import GGUFModelInfo, ModelInfoBase
+from ramalama.model_inspect.base_info import ModelInfoBase
+from ramalama.model_inspect.gguf_info import GGUFModelInfo
+from ramalama.model_inspect.gguf_parser import GGUFInfoParser
+from ramalama.model_inspect.safetensor_info import SafetensorModelInfo
+from ramalama.model_inspect.safetensor_parser import SafetensorInfoParser
 from ramalama.model_store.global_store import GlobalModelStore
 from ramalama.model_store.store import ModelStore
 from ramalama.quadlet import Quadlet
@@ -838,6 +841,10 @@ def inspect(self, args):
             gguf_info: GGUFModelInfo = GGUFInfoParser.parse(model_name, model_registry, model_path)
             print(gguf_info.serialize(json=args.json, all=args.all))
             return
+        if SafetensorInfoParser.is_model_safetensor(model_name):
+            safetensor_info: SafetensorModelInfo = SafetensorInfoParser.parse(model_name, model_registry, model_path)
+            print(safetensor_info.serialize(json=args.json, all=args.all))
+            return
 
         print(ModelInfoBase(model_name, model_registry, model_path).serialize(json=args.json))
 
diff --git a/ramalama/model_inspect/base_info.py b/ramalama/model_inspect/base_info.py
@@ -0,0 +1,47 @@
+import json
+import shutil
+import sys
+from dataclasses import dataclass
+
+
+def get_terminal_width():
+    return shutil.get_terminal_size().columns if sys.stdout.isatty() else 80
+
+
+def adjust_new_line(line: str) -> str:
+    filler = "..."
+    max_width = get_terminal_width()
+    adjusted_length = max_width - len(filler)
+
+    adjust_for_newline = 1 if line.endswith("\n") else 0
+    if len(line) - adjust_for_newline > max_width:
+        return line[: adjusted_length - adjust_for_newline] + filler + "\n" if adjust_for_newline == 1 else ""
+    return line if line.endswith("\n") else line + "\n"
+
+
+@dataclass
+class Tensor:
+    name: str
+    n_dimensions: int
+    dimensions: list[int]
+    type: str
+    offset: int
+
+
+@dataclass
+class ModelInfoBase:
+    Name: str
+    Registry: str
+    Path: str
+
+    def serialize(self, json: bool = False) -> str:
+        if json:
+            return self.to_json()
+
+        ret = adjust_new_line(f"{self.Name}\n")
+        ret = ret + adjust_new_line(f"   Path: {self.Path}\n")
+        ret = ret + adjust_new_line(f"   Registry: {self.Registry}")
+        return ret
+
+    def to_json(self) -> str:
+        return json.dumps(self.__dict__, sort_keys=True, indent=4)
diff --git a/ramalama/model_inspect/error.py b/ramalama/model_inspect/error.py
@@ -0,0 +1,3 @@
+# Basic error when parsing model files
+class ParseError(Exception):
+    pass
diff --git a/ramalama/model_inspect/gguf_info.py b/ramalama/model_inspect/gguf_info.py
@@ -1,54 +1,8 @@
 import json
-import shutil
-import sys
-from dataclasses import dataclass
 from typing import Any, Dict
 
 from ramalama.endian import GGUFEndian
-
-
-def get_terminal_width():
-    if sys.stdout.isatty():
-        return shutil.get_terminal_size().columns
-    return 80
-
-
-def adjust_new_line(line: str) -> str:
-    filler = "..."
-    max_width = get_terminal_width()
-    adjusted_length = max_width - len(filler)
-
-    adjust_for_newline = 1 if line.endswith("\n") else 0
-    if len(line) - adjust_for_newline > max_width:
-        return line[: adjusted_length - adjust_for_newline] + filler + "\n" if adjust_for_newline == 1 else ""
-    if not line.endswith("\n"):
-        return line + "\n"
-    return line
-
-
-@dataclass
-class Tensor:
-    name: str
-    n_dimensions: int
-    dimensions: list[int]
-    type: str
-    offset: int
-
-
-@dataclass
-class ModelInfoBase:
-    Name: str
-    Registry: str
-    Path: str
-
-    def serialize(self, json: bool = False) -> str:
-        ret = adjust_new_line(f"{self.Name}\n")
-        ret = ret + adjust_new_line(f"   Path: {self.Path}\n")
-        ret = ret + adjust_new_line(f"   Registry: {self.Registry}")
-        return ret
-
-    def to_json(self) -> str:
-        return json.dumps(self, sort_keys=True, indent=4)
+from ramalama.model_inspect.base_info import ModelInfoBase, Tensor, adjust_new_line
 
 
 class GGUFModelInfo(ModelInfoBase):
diff --git a/ramalama/model_inspect/gguf_parser.py b/ramalama/model_inspect/gguf_parser.py
@@ -3,9 +3,9 @@
 from enum import IntEnum
 from typing import Any, Dict
 
-import ramalama.console as console
 from ramalama.endian import GGUFEndian
-from ramalama.model_inspect import GGUFModelInfo, Tensor
+from ramalama.model_inspect.error import ParseError
+from ramalama.model_inspect.gguf_info import GGUFModelInfo, Tensor
 
 
 # Based on ggml_type in
@@ -99,19 +99,14 @@ class GGUFValueType(IntEnum):
 ]
 
 
-class ParseError(Exception):
-    pass
-
-
 class GGUFInfoParser:
     @staticmethod
     def is_model_gguf(model_path: str) -> bool:
         try:
             with open(model_path, "rb") as model_file:
                 magic_number = GGUFInfoParser.read_string(model_file, GGUFEndian.LITTLE, 4)
                 return magic_number == GGUFModelInfo.MAGIC_NUMBER
-        except Exception as ex:
-            console.warning(f"Failed to read model '{model_path}': {ex}")
+        except Exception:
             return False
 
     @staticmethod
diff --git a/ramalama/model_inspect/safetensor_info.py b/ramalama/model_inspect/safetensor_info.py
@@ -0,0 +1,47 @@
+import json
+from typing import Any, Dict
+
+from ramalama.model_inspect.base_info import ModelInfoBase, adjust_new_line
+
+
+class SafetensorModelInfo(ModelInfoBase):
+
+    def __init__(
+        self,
+        Name: str,
+        Registry: str,
+        Path: str,
+        header_data: Dict[str, Any],
+    ):
+        super().__init__(Name, Registry, Path)
+
+        self.Header: Dict[str, Any] = header_data
+
+    def serialize(self, json: bool = False, all: bool = False) -> str:
+        if json:
+            return self.to_json(all)
+
+        fmt = ""
+        metadata = self.Header.get("__metadata__", {})
+        if isinstance(metadata, dict):
+            fmt = metadata.get("format", "")
+
+        ret = super().serialize()
+        ret = ret + adjust_new_line(f"   Format: {fmt}")
+        metadata_header = "   Header: "
+        if not all:
+            metadata_header = metadata_header + f"{len(self.Header)} entries"
+        ret = ret + adjust_new_line(metadata_header)
+        if all:
+            for key, value in sorted(self.Header.items()):
+                ret = ret + adjust_new_line(f"      {key}: {value}")
+
+        return ret
+
+    def to_json(self, all: bool = False) -> str:
+        if all:
+            return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
+
+        d = {k: v for k, v in self.__dict__.items() if k != "Header"}
+        d["Metadata"] = len(self.Header)
+        return json.dumps(d, sort_keys=True, indent=4)
diff --git a/ramalama/model_inspect/safetensor_parser.py b/ramalama/model_inspect/safetensor_parser.py
@@ -0,0 +1,35 @@
+import json
+import struct
+
+import ramalama.console as console
+from ramalama.model_inspect.error import ParseError
+from ramalama.model_inspect.safetensor_info import SafetensorModelInfo
+
+# Based on safetensor format description:
+# https://github.com/huggingface/safetensors?tab=readme-ov-file#format
+
+
+class SafetensorInfoParser:
+
+    @staticmethod
+    def is_model_safetensor(model_name: str) -> bool:
+
+        # There is no magic number or something similar, so we only rely on the naming of the file here
+        return model_name.endswith(".safetensor") or model_name.endswith(".safetensors")
+
+    @staticmethod
+    def parse(model_name: str, model_registry: str, model_path: str) -> SafetensorModelInfo:
+        try:
+            with open(model_path, "rb") as model_file:
+                prefix = '<'
+                typestring = f"{prefix}Q"
+
+                header_size = struct.unpack(typestring, model_file.read(8))[0]
+                header = json.loads(model_file.read(header_size))
+
+                return SafetensorModelInfo(model_name, model_registry, model_path, header)
+
+        except Exception as ex:
+            msg = f"Failed to parse safetensor model '{model_path}': {ex}"
+            console.warning(msg)
+            raise ParseError(msg)
diff --git a/ramalama/model_store/store.py b/ramalama/model_store/store.py
@@ -9,8 +9,8 @@
 import ramalama.model_store.go2jinja as go2jinja
 from ramalama.common import perror, verify_checksum
 from ramalama.endian import EndianMismatchError, get_system_endianness
-from ramalama.gguf_parser import GGUFInfoParser, GGUFModelInfo
 from ramalama.logger import logger
+from ramalama.model_inspect.gguf_parser import GGUFInfoParser, GGUFModelInfo
 from ramalama.model_store.constants import DIRECTORY_NAME_BLOBS, DIRECTORY_NAME_REFS, DIRECTORY_NAME_SNAPSHOTS
 from ramalama.model_store.global_store import GlobalModelStore
 from ramalama.model_store.reffile import RefFile
diff --git a/test/system/100-inspect.bats b/test/system/100-inspect.bats
@@ -37,4 +37,34 @@ load setup_suite
     is "${lines[7]}" "      general.architecture: llama" "metadata general.architecture"
 }
 
+# bats test_tags=distro-integration
+@test "ramalama inspect safetensors model" {
+    ST_MODEL="https://huggingface.co/LiheYoung/depth-anything-small-hf/resolve/main/model.safetensors"
+
+    run_ramalama pull $ST_MODEL
+    run_ramalama inspect $ST_MODEL
+
+    is "${lines[0]}" "model.safetensors" "model name"
+    is "${lines[1]}" "   Path: .*store/https/huggingface.co/.*" "model path"
+    is "${lines[2]}" "   Registry: https" "model registry"
+    is "${lines[3]}" "   Format: pt" "model format"
+    is "${lines[4]}" "   Header: 288 entries" "# of metadata entries"
+}
+
+# bats test_tags=distro-integration
+@test "ramalama inspect safetensors model with --all" {
+    ST_MODEL="https://huggingface.co/LiheYoung/depth-anything-small-hf/resolve/main/model.safetensors"
+
+    run_ramalama inspect --all $ST_MODEL
+
+    is "${lines[0]}" "model.safetensors" "model name"
+    is "${lines[1]}" "   Path: .*store/https/huggingface.co/.*" "model path"
+    is "${lines[2]}" "   Registry: https" "model registry"
+    is "${lines[3]}" "   Format: pt" "model format"
+    is "${lines[4]}" "   Header: " "metadata header"
+    is "${lines[5]}" "      __metadata__: {'format': 'pt'}" "metadata"
+
+    run_ramalama rm $ST_MODEL
+}
+
 # vim: filetype=sh

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Basic error when parsing model files`
	`2`	`+class ParseError(Exception):`
	`3`	`+ pass`