review comments

kush-gupt · kush-gupt · commit 406e0e49ccce · 2025-07-02T08:42:08.000-04:00
Signed-off-by: Kush Gupta &lt;kushalgupta@gmail.com&gt;
diff --git a/ramalama/chat.py b/ramalama/chat.py
@@ -68,12 +68,7 @@ def __init__(self, args):
         self.args = args
         self.request_in_process = False
         self.prompt = args.prefix
-
-        # MLX server uses /v1/chat/completions endpoint
-        if getattr(args, "runtime", None) == "mlx":
-            self.url = f"{args.url}/v1/chat/completions"
-        else:
-            self.url = f"{args.url}/chat/completions"
+        self.url = f"{args.url}/chat/completions"
         self.prep_rag_message()
 
     def prep_rag_message(self):
diff --git a/ramalama/model.py b/ramalama/model.py
@@ -9,6 +9,7 @@
 import sys
 import threading
 import time
+from typing import Optional
 
 import ramalama.chat as chat
 from ramalama.common import (
@@ -118,7 +119,7 @@ def __init__(self, model, model_store_path):
         self._model_type = type(self).__name__.lower()
 
         self._model_store_path: str = model_store_path
-        self._model_store: ModelStore | None = None
+        self._model_store: Optional[ModelStore] = None
 
         self.default_image = accel_image(CONFIG)
 
@@ -443,10 +444,10 @@ def _build_mlx_exec_args(self, subcommand: str, model_path: str, args, extra: li
             shlex.quote(model_path),
         ]
 
-        if getattr(args, "temp", None) is not None:
+        if getattr(args, "temp", None):
             exec_args += ["--temp", str(args.temp)]
 
-        if getattr(args, "seed", None) is not None:
+        if getattr(args, "seed", None):
             exec_args += ["--seed", str(args.seed)]
 
         if getattr(args, "context", None):
@@ -476,12 +477,12 @@ def _mlx_generate_response(self, model_path, prompt, args, *, return_response=Fa
             from ramalama.engine import dry_run
 
             dry_run(exec_args)
-            return None if return_response else None
+            return
 
         # For single-prompt mode, use exec_cmd
         if not return_response:
             exec_cmd(exec_args, stdout2null=False)
-            return None
+            return
 
         # For interactive mode, we need to capture the response
         # Consume stderr concurrently to avoid deadlocks if its buffer fills.