Make sure errors and progress messages go to STDERR

rhatdan · rhatdan · commit 6036c9c9f710 · 2025-07-06T06:57:25.000-04:00
Signed-off-by: Daniel J Walsh &lt;dwalsh@redhat.com&gt;
diff --git a/ramalama/chat.py b/ramalama/chat.py
@@ -11,6 +11,7 @@
 import urllib.request
 from datetime import timedelta
 
+from ramalama.common import perror
 from ramalama.config import CONFIG
 from ramalama.console import EMOJI, should_colorize
 from ramalama.engine import dry_run, stop_container
@@ -68,7 +69,7 @@ def add_api_key(args, headers=None):
     if getattr(args, "api_key", None):
         api_key_min = 20
         if len(args.api_key) < api_key_min:
-            print("Warning: Provided API key is invalid.")
+            perror("Warning: Provided API key is invalid.")
 
         headers["Authorization"] = f"Bearer {args.api_key}"
 
@@ -161,7 +162,7 @@ def _req(self):
                 break
             except Exception:
                 if sys.stdout.isatty():
-                    print(f"\r{c}", end="", flush=True)
+                    perror(f"\r{c}", end="", flush=True)
 
                 if total_time_slept > max_timeout:
                     break
@@ -176,7 +177,7 @@ def _req(self):
 
         # Only show error and kill if not in initial connection phase
         if not getattr(self.args, "initial_connection", False):
-            print(f"\rError: could not connect to: {self.url}", file=sys.stderr)
+            perror(f"\rError: could not connect to: {self.url}")
             self.kills()
         else:
             logger.debug(f"Could not connect to: {self.url}")
@@ -251,7 +252,7 @@ def chat(args):
     except TimeoutException as e:
         logger.debug(f"Timeout Exception: {e}")
         # Handle the timeout, e.g., print a message and exit gracefully
-        print("")
+        perror("")
         pass
     finally:
         # Reset the alarm to 0 to cancel any pending alarms
diff --git a/ramalama/cli.py b/ramalama/cli.py
@@ -382,7 +382,7 @@ def list_files_by_modification(args):
         if os.path.exists(path):
             models.append(path)
         else:
-            print(f"Broken symlink found in: {args.store}/models/{path} \nAttempting removal")
+            perror(f"Broken symlink found in: {args.store}/models/{path} \nAttempting removal")
             New(str(path).replace("/", "://", 1), args).remove(args)
 
     return sorted(models, key=lambda p: os.path.getmtime(p), reverse=True)
diff --git a/ramalama/common.py b/ramalama/common.py
@@ -54,7 +54,7 @@ def confirm_no_gpu(name, provider) -> bool:
             return True
         if user_input in ["no", "n"]:
             return False
-        print("Invalid input. Please enter 'yes' or 'no'.")
+        perror("Invalid input. Please enter 'yes' or 'no'.")
 
 
 def handle_provider(machine, config: Config | None = None) -> bool | None:
@@ -580,7 +580,7 @@ def attempt_to_use_versioned(conman: str, image: str, vers: str, quiet: bool, sh
     try:
         # attempt to pull the versioned image
         if not quiet:
-            print(f"Attempting to pull {image}:{vers} ...")
+            perror(f"Attempting to pull {image}:{vers} ...")
         run_cmd([conman, "pull", f"{image}:{vers}"], ignore_stderr=True)
         return True
 
diff --git a/ramalama/engine.py b/ramalama/engine.py
@@ -47,7 +47,7 @@ def add_pull_newer(self):
         if not self.args.dryrun and self.use_docker and self.args.pull == "newer":
             try:
                 if not self.args.quiet:
-                    print(f"Checking for newer image {self.args.image}")
+                    perror(f"Checking for newer image {self.args.image}")
                 run_cmd([str(self.args.engine), "pull", "-q", self.args.image], ignore_all=True)
             except Exception:  # Ignore errors, the run command will handle it.
                 pass
diff --git a/ramalama/hf_style_repo_base.py b/ramalama/hf_style_repo_base.py
@@ -220,7 +220,7 @@ def pull(self, args):
         hash, cached_files, all = self.model_store.get_cached_files(tag)
         if all:
             if not args.quiet:
-                print(f"Using cached {self.get_repo_type()}://{name}:{tag} ...")
+                perror(f"Using cached {self.get_repo_type()}://{name}:{tag} ...")
             return self.model_store.get_snapshot_file_path(hash, name)
 
         try:
@@ -252,4 +252,4 @@ def exec(self, cmd_args, args):
         try:
             exec_cmd(cmd_args)
         except FileNotFoundError as e:
-            print(f"{str(e).strip()}\n{self.get_missing_message()}")
+            perror(f"{str(e).strip()}\n{self.get_missing_message()}")
diff --git a/ramalama/http_client.py b/ramalama/http_client.py
@@ -2,6 +2,7 @@
 
 import os
 import shutil
+import sys
 import time
 import urllib.request
 
diff --git a/ramalama/huggingface.py b/ramalama/huggingface.py
@@ -130,7 +130,7 @@ def get_repo_info(repo_name):
 
 def handle_repo_info(repo_name, repo_info, runtime):
     if "safetensors" in repo_info and runtime == "llama.cpp":
-        print(
+        perror(
             "\nllama.cpp does not support running safetensor models, "
             "please use a/convert to the GGUF format using:\n"
             f"- https://huggingface.co/models?other=base_model:quantized:{repo_name} \n"
diff --git a/ramalama/model.py b/ramalama/model.py
@@ -21,6 +21,7 @@
     check_nvidia,
     exec_cmd,
     genname,
+    perror,
     set_accel_env_vars,
 )
 from ramalama.config import CONFIG, DEFAULT_PORT, DEFAULT_PORT_RANGE
@@ -188,7 +189,7 @@ def garbage_collection(self, args):
                         if not file_has_a_symlink:
                             os.remove(file_path)
                             file_path = os.path.basename(file_path)
-                            print(f"Deleted: {file_path}")
+                            perror(f"Deleted: {file_path}")
 
     def remove(self, args):
         _, tag, _ = self.extract_model_identifiers()
@@ -422,18 +423,16 @@ def _handle_mlx_chat(self, args):
                     chat.chat(args)
                     break
                 else:
-                    if args.debug:
-                        print(f"MLX server not ready, waiting... (attempt {i+1}/{max_retries})", file=sys.stderr)
+                    logger.debug(f"MLX server not ready, waiting... (attempt {i+1}/{max_retries})")
                     time.sleep(3)
                     continue
 
             except Exception as e:
                 if i >= max_retries - 1:
-                    print(f"Error: Failed to connect to MLX server after {max_retries} attempts: {e}", file=sys.stderr)
+                    perror(f"Error: Failed to connect to MLX server after {max_retries} attempts: {e}")
                     self._cleanup_server_process(args.pid2kill)
                     raise e
-                if args.debug:
-                    print(f"Connection attempt failed, retrying... (attempt {i+1}/{max_retries}): {e}", file=sys.stderr)
+                logger.debug(f"Connection attempt failed, retrying... (attempt {i+1}/{max_retries}): {e}")
                 time.sleep(3)
 
         args.initial_connection = False
@@ -843,8 +842,9 @@ def inspect(self, args):
         print(ModelInfoBase(model_name, model_registry, model_path).serialize(json=args.json))
 
     def print_pull_message(self, model_name):
-        print(f"Downloading {model_name} ...")
-        print(f"Trying to pull {model_name} ...")
+        # Write messages to stderr
+        perror(f"Downloading {model_name} ...")
+        perror(f"Trying to pull {model_name} ...")
 
 
 def distinfo_volume():
@@ -894,7 +894,7 @@ def compute_serving_port(args, quiet=False) -> str:
     if not quiet:
         openai = f"http://localhost:{target_port}"
         if args.api == "llama-stack":
-            print(f"Llama Stack RESTAPI: {openai}")
+            perror(f"Llama Stack RESTAPI: {openai}")
             openai = openai + "/v1/openai"
-            print(f"OpenAI RESTAPI: {openai}")
+            perror(f"OpenAI RESTAPI: {openai}")
     return str(target_port)
diff --git a/ramalama/oci.py b/ramalama/oci.py
@@ -311,12 +311,12 @@ def _create_manifest(self, target, imageid, args):
         run_cmd(cmd_args, stdout=None)
 
     def _convert(self, source_model, args):
-        print(f"Converting {source_model.model_store.base_path} to {self.model_store.base_path} ...")
+        perror(f"Converting {source_model.model_store.base_path} to {self.model_store.base_path} ...")
         try:
             run_cmd([self.conman, "manifest", "rm", self.model], ignore_stderr=True, stdout=None)
         except subprocess.CalledProcessError:
             pass
-        print(f"Building {self.model} ...")
+        perror(f"Building {self.model} ...")
         imageid = self.build(source_model, args)
         try:
             self._create_manifest(self.model, imageid, args)
@@ -335,7 +335,7 @@ def push(self, source_model, args):
         target = self.model
         source = source_model.model
 
-        print(f"Pushing {self.model} ...")
+        perror(f"Pushing {self.model} ...")
         conman_args = [self.conman, "push"]
         if args.authfile:
             conman_args.extend([f"--authfile={args.authfile}"])
@@ -351,14 +351,15 @@ def push(self, source_model, args):
             raise e
 
     def pull(self, args):
-        if not args.quiet:
-            print(f"Downloading {self.model} ...")
         if not args.engine:
             raise NotImplementedError("OCI images require a container engine like Podman or Docker")
 
         conman_args = [args.engine, "pull"]
         if args.quiet:
             conman_args.extend(['--quiet'])
+        else:
+            # Write message to stderr
+            perror(f"Downloading {self.model} ...")
         if str(args.tlsverify).lower() == "false":
             conman_args.extend([f"--tls-verify={args.tlsverify}"])
         if args.authfile:
diff --git a/ramalama/ollama.py b/ramalama/ollama.py
@@ -3,7 +3,7 @@
 import urllib.error
 from typing import Optional
 
-from ramalama.common import available
+from ramalama.common import available, perror
 from ramalama.model import Model
 from ramalama.model_store.snapshot_file import SnapshotFile, SnapshotFileType
 from ramalama.ollama_repo_utils import fetch_manifest_data
@@ -147,7 +147,7 @@ def pull(self, args):
         hash, cached_files, all = self.model_store.get_cached_files(tag)
         if all:
             if not args.quiet:
-                print(f"Using cached ollama://{name}:{tag} ...")
+                perror(f"Using cached ollama://{name}:{tag} ...")
             return self.model_store.get_snapshot_file_path(hash, name)
 
         ollama_repo = OllamaRepository(self.model_store.model_name)
@@ -165,7 +165,7 @@ def pull(self, args):
         # If a model has been downloaded via ollama cli, only create symlink in the snapshots directory
         if is_model_in_ollama_cache:
             if not args.quiet:
-                print(f"Using cached ollama://{name}{tag} ...")
+                perror(f"Using cached ollama://{name}{tag} ...")
             snapshot_model_path = self.model_store.get_snapshot_file_path(model_hash, self.model_store.model_name)
             os.symlink(ollama_cache_path, snapshot_model_path)
 
diff --git a/ramalama/ollama_repo_utils.py b/ramalama/ollama_repo_utils.py
@@ -2,8 +2,12 @@
 import os
 import urllib.request
 
+<<<<<<< HEAD
 from ramalama.common import perror, run_cmd, verify_checksum
 from ramalama.http_client import download_file
+=======
+from ramalama.common import download_file, perror, run_cmd, verify_checksum
+>>>>>>> 50559c7 (Make sure errors and progress messages go to STDERR)
 from ramalama.logger import logger
 
 
@@ -93,7 +97,7 @@ def pull_blob(
         download_file(url, layer_blob_path, headers=headers, show_progress=show_progress)
         # Verify checksum after downloading the blob
         if not verify_checksum(layer_blob_path):
-            print(f"Checksum mismatch for blob {layer_blob_path}, retrying download ...")
+            perror(f"Checksum mismatch for blob {layer_blob_path}, retrying download ...")
             os.remove(layer_blob_path)
             download_file(url, layer_blob_path, headers=headers, show_progress=True)
             if not verify_checksum(layer_blob_path):
diff --git a/ramalama/rag.py b/ramalama/rag.py
@@ -4,7 +4,7 @@
 import tempfile
 from urllib.parse import urlparse
 
-from ramalama.common import get_accel_env_vars, run_cmd, set_accel_env_vars
+from ramalama.common import get_accel_env_vars, perror, run_cmd, set_accel_env_vars
 from ramalama.engine import Engine
 from ramalama.logger import logger
 
@@ -21,10 +21,10 @@ def __init__(self, target):
         set_accel_env_vars()
 
     def build(self, source, target, args):
-        print(f"\nBuilding {target} ...")
+        perror(f"\nBuilding {target} ...")
         contextdir = os.path.dirname(source)
         src = os.path.basename(source)
-        print(f"adding {src} ...")
+        perror(f"adding {src} ...")
         cfile = f"""\
 FROM scratch
 COPY {src} /vector.db
diff --git a/test/unit/test_config.py b/test/unit/test_config.py
@@ -514,7 +514,7 @@ def test_config_legacy_compatibility(self):
             assert cfg.image == "test/image:latest"
 
     def test_config_empty_layers(self):
-        """Test behavior with empty configuration layers."""
+        """Test behaviour with empty configuration layers."""
         with patch("ramalama.config.load_file_config", return_value={}):
             cfg = default_config({})
 
diff --git a/test/unit/test_layered_config.py b/test/unit/test_layered_config.py
@@ -594,7 +594,7 @@ def test_layered_mixin_is_set_tracking(self):
         assert config.is_set("settings") is False
 
     def test_layered_mixin_empty_layers(self):
-        """Test behavior with empty layers."""
+        """Test behaviour with empty layers."""
         layer1 = {}
         layer2 = {"name": "layer2"}
         layer3 = {}