fix(gguf_parser): fix memoryerror exception when loading non-native

taronaeo · taronaeo · commit b6cc604b10f1 · 2025-05-30T03:15:03.000+08:00
Signed-off-by: Aaron Teo &lt;aaron.teo1@ibm.com&gt;

fix(gguf_parser): missed some calls

Signed-off-by: Aaron Teo &lt;aaron.teo1@ibm.com&gt;

fix(gguf_parser): typo `return` vs `raise`

Signed-off-by: Aaron Teo &lt;aaron.teo1@ibm.com&gt;
diff --git a/ramalama/gguf_parser.py b/ramalama/gguf_parser.py
@@ -107,18 +107,24 @@ class GGUFInfoParser:
     def is_model_gguf(model_path: str) -> bool:
         try:
             with open(model_path, "rb") as model_file:
-                magic_number = GGUFInfoParser.read_string(model_file, 4)
+                magic_number = GGUFInfoParser.read_string(model_file, GGUFEndian.LITTLE, 4)
                 return magic_number == GGUFModelInfo.MAGIC_NUMBER
         except Exception as ex:
             console.warning(f"Failed to read model '{model_path}': {ex}")
             return False
 
     @staticmethod
-    def read_string(model: io.BufferedReader, length: int = -1) -> str:
+    def read_string(
+        model: io.BufferedReader, model_endianness: GGUFEndian = GGUFEndian.LITTLE, length: int = -1
+    ) -> str:
         if length == -1:
-            type_string = GGUF_VALUE_TYPE_FORMAT[GGUFValueType.UINT64]
-            length = struct.unpack(type_string, model.read(struct.calcsize(type_string)))[0]
-        return model.read(length).decode("utf-8")
+            length = GGUFInfoParser.read_number(model, GGUFValueType.UINT64, model_endianness)
+
+        raw = model.read(length)
+        if len(raw) < length:
+            raise ParseError(f"Unexpected EOF: wanted {length} bytes, got {len(raw)}")
+
+        return raw.decode("utf-8")
 
     @staticmethod
     def read_number(model: io.BufferedReader, value_type: GGUFValueType, model_endianness: GGUFEndian) -> float:
@@ -151,7 +157,7 @@ def read_value(model: io.BufferedReader, value_type: GGUFValueType, model_endian
         elif value_type == GGUFValueType.BOOL:
             value = GGUFInfoParser.read_bool(model, model_endianness)
         elif value_type == GGUFValueType.STRING:
-            value = GGUFInfoParser.read_string(model)
+            value = GGUFInfoParser.read_string(model, model_endianness)
         elif value_type == GGUFValueType.ARRAY:
             array_type = GGUFInfoParser.read_value_type(model, model_endianness)
             array_length = GGUFInfoParser.read_number(model, GGUFValueType.UINT64, model_endianness)
@@ -167,7 +173,7 @@ def parse(model_name: str, model_registry: str, model_path: str) -> GGUFModelInf
         model_endianness = GGUFEndian.LITTLE
 
         with open(model_path, "rb") as model:
-            magic_number = GGUFInfoParser.read_string(model, 4)
+            magic_number = GGUFInfoParser.read_string(model, model_endianness, 4)
             if magic_number != GGUFModelInfo.MAGIC_NUMBER:
                 raise ParseError(f"Invalid GGUF magic number '{magic_number}'")
 
@@ -185,13 +191,13 @@ def parse(model_name: str, model_registry: str, model_path: str) -> GGUFModelInf
 
             metadata = {}
             for _ in range(metadata_kv_count):
-                key = GGUFInfoParser.read_string(model)
+                key = GGUFInfoParser.read_string(model, model_endianness)
                 value_type = GGUFInfoParser.read_value_type(model, model_endianness)
                 metadata[key] = GGUFInfoParser.read_value(model, value_type, model_endianness)
 
             tensors: list[Tensor] = []
             for _ in range(tensor_count):
-                name = GGUFInfoParser.read_string(model)
+                name = GGUFInfoParser.read_string(model, model_endianness)
                 n_dimensions = GGUFInfoParser.read_number(model, GGUFValueType.UINT32, model_endianness)
                 dimensions: list[int] = []
                 for _ in range(n_dimensions):