Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,6 @@ RamaLama is available via PyPi at [https://pypi.org/project/ramalama](https://py
pip install ramalama
```

### Install via Homebrew
```
brew install ramalama
```

### Install script (Linux and macOS)
Install RamaLama by running:
```
Expand Down
14 changes: 10 additions & 4 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,13 @@ is_python3_at_least_310() {
python3 -c 'import sys; exit(0 if sys.version_info >= (3, 10) else 1)'
}

install_uv() {
local host="raw.githubusercontent.com"
local install_uv_url="https://$host/containers/ramalama/s/install-uv.sh"
curl -fsSL "$install_uv_url" | bash
echo
}

main() {
set -e -o pipefail

Expand All @@ -151,14 +158,13 @@ main() {
fi

if available brew && brew install ramalama; then
install_uv
uv tool install mlx-lm
return 0
fi
fi

local host="raw.githubusercontent.com"
local install_uv_url="https://$host/containers/ramalama/s/install-uv.sh"
curl -fsSL "$install_uv_url" | bash
echo
install_uv
uv tool install --force --python python3.12 ramalama
print_success_info
}
Expand Down
9 changes: 2 additions & 7 deletions ramalama/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from ramalama.common import accel_image, get_accel, perror
from ramalama.config import CONFIG
from ramalama.logger import configure_logger, logger
from ramalama.model import MODEL_TYPES
from ramalama.model import MODEL_TYPES, trim_model_name
from ramalama.model_factory import ModelFactory, New
from ramalama.model_inspect.error import ParseError
from ramalama.model_store.global_store import GlobalModelStore
Expand Down Expand Up @@ -489,12 +489,7 @@ def _list_models_from_store(args):
if not args.all and is_partially_downloaded:
continue

if model.startswith("huggingface://"):
model = model.replace("huggingface://", "hf://", 1)

if not model.startswith("ollama://") and not model.startswith("oci://"):
model = model.removesuffix(":latest")

model = trim_model_name(model)
size_sum = 0
last_modified = 0.0
for file in files:
Expand Down
16 changes: 12 additions & 4 deletions ramalama/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,16 @@ def is_split_file_model(model_path):
return bool(re.match(SPLIT_MODEL_RE, model_path))


def trim_model_name(model):
if model.startswith("huggingface://"):
model = model.replace("huggingface://", "hf://", 1)

if not model.startswith("ollama://") and not model.startswith("oci://"):
model = model.removesuffix(":latest")

return model


class ModelBase:
def __not_implemented_error(self, param):
return NotImplementedError(f"ramalama {param} for '{type(self).__name__}' not implemented")
Expand Down Expand Up @@ -479,10 +489,7 @@ def _build_mlx_exec_args(self, subcommand: str, model_path: str, args, extra: li
Optional list of extra arguments to append verbatim.
"""
exec_args = [
"python",
"-m",
"mlx_lm",
subcommand,
"mlx_lm.server",
"--model",
shlex.quote(model_path),
]
Expand Down Expand Up @@ -849,6 +856,7 @@ def inspect(self, args):
print(ModelInfoBase(model_name, model_registry, model_path).serialize(json=args.json))

def print_pull_message(self, model_name):
model_name = trim_model_name(model_name)
# Write messages to stderr
perror(f"Downloading {model_name} ...")
perror(f"Trying to pull {model_name} ...")
Expand Down
10 changes: 5 additions & 5 deletions test/system/080-mlx.bats
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ function skip_if_no_mlx() {
run_ramalama --runtime=mlx --dryrun run ${MODEL}
is "$status" "0" "MLX run should work"
# Should use python -m mlx_lm server for the server process
is "$output" ".*python.*-m.*mlx_lm server.*" "should use MLX server command"
is "$output" ".*mlx_lm.server.*" "should use MLX server command"
is "$output" ".*--port.*" "should include port specification"
}

Expand All @@ -69,7 +69,7 @@ function skip_if_no_mlx() {
prompt="Hello, how are you?"
run_ramalama --runtime=mlx --dryrun run ${MODEL} "$prompt"
is "$status" "0" "MLX run with prompt should work"
is "$output" ".*python.*-m.*mlx_lm server.*" "should use MLX server command"
is "$output" ".*mlx_lm.server.*" "should use MLX server command"
is "$output" ".*--port.*" "should include port specification"
}

Expand Down Expand Up @@ -98,7 +98,7 @@ function skip_if_no_mlx() {
run_ramalama --runtime=mlx --dryrun serve ${MODEL}
is "$status" "0" "MLX serve should work"
# Should use python -m mlx_lm.server
is "$output" ".*python.*-m.*mlx_lm server.*" "should use MLX server command"
is "$output" ".*mlx_lm.server.*" "should use MLX server command"
is "$output" ".*--port.*8080.*" "should include default port"
}

Expand Down Expand Up @@ -145,7 +145,7 @@ function skip_if_no_mlx() {
model="ollama://smollm:135m"
run_ramalama --runtime=mlx --dryrun run "$model"
is "$status" "0" "MLX should work with ollama model format"
is "$output" ".*python.*-m.*mlx_lm server.*" "should use MLX server command"
is "$output" ".*mlx_lm.server.*" "should use MLX server command"
}

@test "ramalama --runtime=mlx works with huggingface model format" {
Expand All @@ -155,7 +155,7 @@ function skip_if_no_mlx() {
model="huggingface://microsoft/DialoGPT-small"
run_ramalama --runtime=mlx --dryrun run "$model"
is "$status" "0" "MLX should work with huggingface model format"
is "$output" ".*python.*-m.*mlx_lm server.*" "should use MLX server command"
is "$output" ".*mlx_lm.server.*" "should use MLX server command"
}

@test "ramalama --runtime=mlx rejects --name option" {
Expand Down
10 changes: 2 additions & 8 deletions test/unit/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,7 @@ def test_mlx_serve_args(self):
exec_args = model.mlx_serve(args, "/path/to/model")

expected_args = [
"python",
"-m",
"mlx_lm",
"server",
"mlx_lm.server",
"--model",
"/path/to/model",
"--temp",
Expand Down Expand Up @@ -275,10 +272,7 @@ def test_mlx_build_exec_args_includes_server_subcommand(self, mock_machine, mock
exec_args = model._build_mlx_exec_args("server", "/path/to/model", args, ["--port", "8080"])

expected_args = [
"python",
"-m",
"mlx_lm",
"server",
"mlx_lm.server",
"--model",
"/path/to/model",
"--temp",
Expand Down
Loading