Merge pull request #1673 from containers/mlx-fixes

rhatdan · web-flow · commit bf0af8034ab6 · 2025-07-08T11:51:26.000-04:00
mlx fixes
diff --git a/README.md b/README.md
@@ -116,11 +116,6 @@ RamaLama is available via PyPi at [https://pypi.org/project/ramalama](https://py
 pip install ramalama
 ```
 
-### Install via Homebrew
-```
-brew install ramalama
-```
-
 ### Install script (Linux and macOS)
 Install RamaLama by running:
 ```
diff --git a/install.sh b/install.sh
@@ -129,6 +129,13 @@ is_python3_at_least_310() {
   python3 -c 'import sys; exit(0 if sys.version_info >= (3, 10) else 1)'
 }
 
+install_uv() {
+  local host="raw.githubusercontent.com"
+  local install_uv_url="https://$host/containers/ramalama/s/install-uv.sh"
+  curl -fsSL "$install_uv_url" | bash
+  echo
+}
+
 main() {
   set -e -o pipefail
 
@@ -151,14 +158,13 @@ main() {
     fi
 
     if available brew && brew install ramalama; then
+      install_uv
+      uv tool install mlx-lm
       return 0
     fi
   fi
 
-  local host="raw.githubusercontent.com"
-  local install_uv_url="https://$host/containers/ramalama/s/install-uv.sh"
-  curl -fsSL "$install_uv_url" | bash
-  echo
+  install_uv
   uv tool install --force --python python3.12 ramalama
   print_success_info
 }
diff --git a/ramalama/cli.py b/ramalama/cli.py
@@ -26,7 +26,7 @@
 from ramalama.common import accel_image, get_accel, perror
 from ramalama.config import CONFIG
 from ramalama.logger import configure_logger, logger
-from ramalama.model import MODEL_TYPES
+from ramalama.model import MODEL_TYPES, trim_model_name
 from ramalama.model_factory import ModelFactory, New
 from ramalama.model_inspect.error import ParseError
 from ramalama.model_store.global_store import GlobalModelStore
@@ -474,12 +474,7 @@ def _list_models_from_store(args):
         if not args.all and is_partially_downloaded:
             continue
 
-        if model.startswith("huggingface://"):
-            model = model.replace("huggingface://", "hf://", 1)
-
-        if not model.startswith("ollama://") and not model.startswith("oci://"):
-            model = model.removesuffix(":latest")
-
+        model = trim_model_name(model)
         size_sum = 0
         last_modified = 0.0
         for file in files:
diff --git a/ramalama/model.py b/ramalama/model.py
@@ -63,6 +63,16 @@ def is_split_file_model(model_path):
     return bool(re.match(SPLIT_MODEL_RE, model_path))
 
 
+def trim_model_name(model):
+    if model.startswith("huggingface://"):
+        model = model.replace("huggingface://", "hf://", 1)
+
+    if not model.startswith("ollama://") and not model.startswith("oci://"):
+        model = model.removesuffix(":latest")
+
+    return model
+
+
 class ModelBase:
     def __not_implemented_error(self, param):
         return NotImplementedError(f"ramalama {param} for '{type(self).__name__}' not implemented")
@@ -479,10 +489,7 @@ def _build_mlx_exec_args(self, subcommand: str, model_path: str, args, extra: li
             Optional list of extra arguments to append verbatim.
         """
         exec_args = [
-            "python",
-            "-m",
-            "mlx_lm",
-            subcommand,
+            "mlx_lm.server",
             "--model",
             shlex.quote(model_path),
         ]
@@ -849,6 +856,7 @@ def inspect(self, args):
         print(ModelInfoBase(model_name, model_registry, model_path).serialize(json=args.json))
 
     def print_pull_message(self, model_name):
+        model_name = trim_model_name(model_name)
         # Write messages to stderr
         perror(f"Downloading {model_name} ...")
         perror(f"Trying to pull {model_name} ...")
diff --git a/test/system/080-mlx.bats b/test/system/080-mlx.bats
@@ -58,7 +58,7 @@ function skip_if_no_mlx() {
     run_ramalama --runtime=mlx --dryrun run ${MODEL}
     is "$status" "0" "MLX run should work"
     # Should use python -m mlx_lm server for the server process
-    is "$output" ".*python.*-m.*mlx_lm server.*" "should use MLX server command"
+    is "$output" ".*mlx_lm.server.*" "should use MLX server command"
     is "$output" ".*--port.*" "should include port specification"
 }
 
@@ -69,7 +69,7 @@ function skip_if_no_mlx() {
     prompt="Hello, how are you?"
     run_ramalama --runtime=mlx --dryrun run ${MODEL} "$prompt"
     is "$status" "0" "MLX run with prompt should work"
-    is "$output" ".*python.*-m.*mlx_lm server.*" "should use MLX server command"
+    is "$output" ".*mlx_lm.server.*" "should use MLX server command"
     is "$output" ".*--port.*" "should include port specification"
 }
 
@@ -98,7 +98,7 @@ function skip_if_no_mlx() {
     run_ramalama --runtime=mlx --dryrun serve ${MODEL}
     is "$status" "0" "MLX serve should work"
     # Should use python -m mlx_lm.server
-    is "$output" ".*python.*-m.*mlx_lm server.*" "should use MLX server command"
+    is "$output" ".*mlx_lm.server.*" "should use MLX server command"
     is "$output" ".*--port.*8080.*" "should include default port"
 }
 
@@ -145,7 +145,7 @@ function skip_if_no_mlx() {
     model="ollama://smollm:135m"
     run_ramalama --runtime=mlx --dryrun run "$model"
     is "$status" "0" "MLX should work with ollama model format"
-    is "$output" ".*python.*-m.*mlx_lm server.*" "should use MLX server command"
+    is "$output" ".*mlx_lm.server.*" "should use MLX server command"
 }
 
 @test "ramalama --runtime=mlx works with huggingface model format" {
@@ -155,7 +155,7 @@ function skip_if_no_mlx() {
     model="huggingface://microsoft/DialoGPT-small"
     run_ramalama --runtime=mlx --dryrun run "$model"
     is "$status" "0" "MLX should work with huggingface model format"
-    is "$output" ".*python.*-m.*mlx_lm server.*" "should use MLX server command"
+    is "$output" ".*mlx_lm.server.*" "should use MLX server command"
 }
 
 @test "ramalama --runtime=mlx rejects --name option" {
diff --git a/test/unit/test_model.py b/test/unit/test_model.py
@@ -143,10 +143,7 @@ def test_mlx_serve_args(self):
         exec_args = model.mlx_serve(args, "/path/to/model")
 
         expected_args = [
-            "python",
-            "-m",
-            "mlx_lm",
-            "server",
+            "mlx_lm.server",
             "--model",
             "/path/to/model",
             "--temp",
@@ -275,10 +272,7 @@ def test_mlx_build_exec_args_includes_server_subcommand(self, mock_machine, mock
         exec_args = model._build_mlx_exec_args("server", "/path/to/model", args, ["--port", "8080"])
 
         expected_args = [
-            "python",
-            "-m",
-            "mlx_lm",
-            "server",
+            "mlx_lm.server",
             "--model",
             "/path/to/model",
             "--temp",