containers
diff --git a/‎Makefile
Lines changed: 1 addition & 2 deletions b/‎Makefile
Lines changed: 1 addition & 2 deletions
diff --git a/‎ramalama/chat.py
Lines changed: 1 addition & 1 deletion b/‎ramalama/chat.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎ramalama/cli.py
Lines changed: 1 addition & 3 deletions b/‎ramalama/cli.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎ramalama/common.py
Lines changed: 28 additions & 16 deletions b/‎ramalama/common.py
Lines changed: 28 additions & 16 deletions
diff --git a/‎ramalama/config.py
Lines changed: 7 additions & 7 deletions b/‎ramalama/config.py
Lines changed: 7 additions & 7 deletions
diff --git a/‎ramalama/file_loaders/file_manager.py
Lines changed: 4 additions & 5 deletions b/‎ramalama/file_loaders/file_manager.py
Lines changed: 4 additions & 5 deletions
diff --git a/‎ramalama/hf_style_repo_base.py
Lines changed: 1 addition & 1 deletion b/‎ramalama/hf_style_repo_base.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎ramalama/model.py
Lines changed: 20 additions & 12 deletions b/‎ramalama/model.py
Lines changed: 20 additions & 12 deletions
@@ -111,9 +111,8 @@ docs:
 .PHONY: lint
 lint:
 ifneq (,$(wildcard /usr/bin/python3))
-	/usr/bin/python3 -m compileall -q .
+	/usr/bin/python3 -m compileall -q -x '\.venv' .
 endif
-
 	! grep -ri --exclude-dir ".venv" --exclude-dir "*/.venv" "#\!/usr/bin/python3" .
 	flake8 $(PROJECT_DIR) $(PYTHON_SCRIPTS)
 	shellcheck *.sh */*.sh */*/*.sh
 
@@ -96,7 +96,7 @@ def __init__(self, args: ChatArgsType, operational_args: ChatOperationalArgs | N
             operational_args = ChatOperationalArgs()
 
         super().__init__()
-        self.conversation_history = []
+        self.conversation_history: list[dict] = []
         self.args = args
         self.request_in_process = False
         self.prompt = args.prefix
 
@@ -16,7 +16,7 @@
 try:
     import argcomplete
 
-    suppressCompleter = argcomplete.completers.SuppressCompleter
+    suppressCompleter: type[argcomplete.completers.SuppressCompleter] | None = argcomplete.completers.SuppressCompleter
 except Exception:
     suppressCompleter = None
 
@@ -44,7 +44,6 @@
 
 
 class ParsedGenerateInput:
-
     def __init__(self, gen_type: str, output_dir: str):
         self.gen_type = gen_type
         self.output_dir = output_dir
@@ -1235,7 +1234,6 @@ def inspect_cli(args):
 
 
 def main():
-
     def eprint(e, exit_code):
         perror("Error: " + str(e).strip("'\""))
         sys.exit(exit_code)
 
@@ -13,8 +13,9 @@
 import string
 import subprocess
 import sys
+from collections.abc import Callable, Iterable
 from functools import lru_cache
-from typing import TYPE_CHECKING, Callable, List, Literal, Protocol, cast, get_args
+from typing import TYPE_CHECKING, Literal, Protocol, TypeAlias, TypedDict, cast, get_args
 
 import ramalama.amdkfd as amdkfd
 from ramalama.logger import logger
@@ -230,15 +231,23 @@ def engine_version(engine: SUPPORTED_ENGINES) -> str:
     return run_cmd(cmd_args).stdout.decode("utf-8").strip()
 
 
-def load_cdi_yaml(stream) -> dict:
+class CDI_DEVICE(TypedDict):
+    name: str
+
+
+class CDI_RETURN_TYPE(TypedDict):
+    devices: list[CDI_DEVICE]
+
+
+def load_cdi_yaml(stream: Iterable[str]) -> CDI_RETURN_TYPE:
     # Returns a dict containing just the "devices" key, whose value is
     # a list of dicts, each mapping the key "name" to a device name.
     # For example: {'devices': [{'name': 'all'}]}
     # This depends on the key "name" being unique to the list of dicts
     # under "devices" and the value of the "name" key being on the
     # same line following a colon.
 
-    data = {"devices": []}
+    data: CDI_RETURN_TYPE = {"devices": []}
     for line in stream:
         if ':' in line:
             key, value = line.split(':', 1)
@@ -247,7 +256,7 @@ def load_cdi_yaml(stream) -> dict:
     return data
 
 
-def load_cdi_config(spec_dirs: List[str]) -> dict | None:
+def load_cdi_config(spec_dirs: list[str]) -> CDI_RETURN_TYPE | None:
     # Loads the first YAML or JSON CDI configuration file found in the
     # given directories."""
 
@@ -275,7 +284,7 @@ def load_cdi_config(spec_dirs: List[str]) -> dict | None:
     return None
 
 
-def find_in_cdi(devices: List[str]) -> tuple[List[str], List[str]]:
+def find_in_cdi(devices: list[str]) -> tuple[list[str], list[str]]:
     # Attempts to find a CDI configuration for each device in devices
     # and returns a list of configured devices and a list of
     # unconfigured devices.
@@ -327,11 +336,12 @@ def check_nvidia() -> Literal["cuda"] | None:
         return None
 
     smi_lines = result.stdout.splitlines()
-    parsed_lines = [[item.strip() for item in line.split(',')] for line in smi_lines if line]
+    parsed_lines: list[list[str]] = [[item.strip() for item in line.split(',')] for line in smi_lines if line]
+
     if not parsed_lines:
         return None
 
-    indices, uuids = zip(*parsed_lines) if parsed_lines else (tuple(), tuple())
+    indices, uuids = map(list, zip(*parsed_lines))
     # Get the list of devices specified by CUDA_VISIBLE_DEVICES, if any
     cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "")
     visible_devices = cuda_visible_devices.split(',') if cuda_visible_devices else []
@@ -342,14 +352,14 @@ def check_nvidia() -> Literal["cuda"] | None:
 
     configured, unconfigured = find_in_cdi(visible_devices + ["all"])
 
-    if unconfigured and "all" not in configured:
+    if unconfigured and not (configured_has_all := "all" in configured):
         perror(f"No CDI configuration found for {','.join(unconfigured)}")
         perror("You can use the \"nvidia-ctk cdi generate\" command from the ")
         perror("nvidia-container-toolkit to generate a CDI configuration.")
         perror("See ramalama-cuda(7).")
         return None
     elif configured:
-        if "all" in configured:
+        if configured_has_all:
             configured.remove("all")
             if not configured:
                 configured = indices
@@ -442,7 +452,7 @@ def check_mthreads() -> Literal["musa"] | None:
     return None
 
 
-AccelType = Literal["asahi", "cuda", "cann", "hip", "intel", "musa"]
+AccelType: TypeAlias = Literal["asahi", "cuda", "cann", "hip", "intel", "musa"]
 
 
 def get_accel() -> AccelType | Literal["none"]:
@@ -474,7 +484,7 @@ def set_gpu_type_env_vars():
     get_accel()
 
 
-GPUEnvVar = Literal[
+GPUEnvVar: TypeAlias = Literal[
     "ASAHI_VISIBLE_DEVICES",
     "ASCEND_VISIBLE_DEVICES",
     "CUDA_VISIBLE_DEVICES",
@@ -486,10 +496,10 @@ def set_gpu_type_env_vars():
 
 
 def get_gpu_type_env_vars() -> dict[GPUEnvVar, str]:
-    return {k: os.environ[k] for k in get_args(GPUEnvVar) if k in os.environ}
+    return {k: v for k in get_args(GPUEnvVar) if (v := os.environ.get(k))}
 
 
-AccelEnvVar = Literal[
+AccelEnvVar: TypeAlias = Literal[
     "CUDA_LAUNCH_BLOCKING",
     "HSA_VISIBLE_DEVICES",
     "HSA_OVERRIDE_GFX_VERSION",
@@ -498,7 +508,7 @@ def get_gpu_type_env_vars() -> dict[GPUEnvVar, str]:
 
 def get_accel_env_vars() -> dict[GPUEnvVar | AccelEnvVar, str]:
     gpu_env_vars: dict[GPUEnvVar, str] = get_gpu_type_env_vars()
-    accel_env_vars: dict[AccelEnvVar, str] = {k: os.environ[k] for k in get_args(AccelEnvVar) if k in os.environ}
+    accel_env_vars: dict[AccelEnvVar, str] = {k: v for k in get_args(AccelEnvVar) if (v := os.environ.get(k))}
     return gpu_env_vars | accel_env_vars
 
 
@@ -599,7 +609,9 @@ class AccelImageArgsOtherRuntimeRAG(Protocol):
     quiet: bool
 
 
-AccelImageArgs = None | AccelImageArgsVLLMRuntime | AccelImageArgsOtherRuntime | AccelImageArgsOtherRuntimeRAG
+AccelImageArgs: TypeAlias = (
+    None | AccelImageArgsVLLMRuntime | AccelImageArgsOtherRuntime | AccelImageArgsOtherRuntimeRAG
+)
 
 
 def accel_image(config: Config) -> str:
@@ -627,7 +639,7 @@ def accel_image(config: Config) -> str:
     vers = minor_release()
 
     should_pull = config.pull in ["always", "missing"] and not config.dryrun
-    if attempt_to_use_versioned(config.engine, image, vers, True, should_pull):
+    if config.engine and attempt_to_use_versioned(config.engine, image, vers, True, should_pull):
         return f"{image}:{vers}"
 
     return f"{image}:latest"
 
@@ -3,19 +3,19 @@
 import sys
 from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Literal, Mapping
+from typing import Any, Literal, Mapping, TypeAlias
 
 from ramalama.common import available
 from ramalama.layered_config import LayeredMixin, deep_merge
 from ramalama.toml_parser import TOMLParser
 
-PathStr = str
+PathStr: TypeAlias = str
 DEFAULT_PORT_RANGE: tuple[int, int] = (8080, 8090)
 DEFAULT_PORT: int = DEFAULT_PORT_RANGE[0]
-DEFAULT_IMAGE = "quay.io/ramalama/ramalama"
-SUPPORTED_ENGINES = Literal["podman", "docker"] | PathStr
-SUPPORTED_RUNTIMES = Literal["llama.cpp", "vllm", "mlx"]
-COLOR_OPTIONS = Literal["auto", "always", "never"]
+DEFAULT_IMAGE: str = "quay.io/ramalama/ramalama"
+SUPPORTED_ENGINES: TypeAlias = Literal["podman", "docker"] | PathStr
+SUPPORTED_RUNTIMES: TypeAlias = Literal["llama.cpp", "vllm", "mlx"]
+COLOR_OPTIONS: TypeAlias = Literal["auto", "always", "never"]
 
 
 def get_default_engine() -> SUPPORTED_ENGINES | None:
@@ -158,7 +158,7 @@ def load_env_config(env: Mapping[str, str] | None = None) -> dict[str, Any]:
     if env is None:
         env = os.environ
 
-    config = {}
+    config: dict[str, Any] = {}
     for k, v in env.items():
         if not k.startswith("RAMALAMA"):
             continue
 
@@ -23,7 +23,7 @@ def _get_loader(self, file: str) -> base.BaseFileLoader:
         return loader
 
     @abstractmethod
-    def load(self):
+    def load(self, *args, **kwargs):
         pass
 
     @classmethod
@@ -121,12 +121,11 @@ def load(self, file_path: str) -> list[dict]:
         if unsupported_files:
             unsupported_files_warning(unsupported_files, list(self.supported_extensions()))
 
-        messages = []
+        messages: list[dict] = []
         if text_files:
             messages.append({"role": "system", "content": self.text_manager.load(text_files)})
         if image_files:
-            message = {"role": "system", "content": []}
-            for content in self.image_manager.load(image_files):
-                message['content'].append({"type": "image_url", "image_url": {"url": content}})
+            content = [{"type": "image_url", "image_url": {"url": c}} for c in self.image_manager.load(image_files)]
+            message = {"role": "system", "content": content}
             messages.append(message)
         return messages
@@ -64,7 +64,7 @@ def __init__(self, name: str, organization: str, tag: str = 'latest'):
         self.name = name
         self.organization = organization
         self.tag = tag
-        self.headers = {}
+        self.headers: dict = {}
         self.blob_url = None
         self.model_filename = None
         self.model_hash = None
 
@@ -5,6 +5,7 @@
 import socket
 import sys
 import time
+from abc import ABC, abstractmethod
 from typing import Optional
 
 import ramalama.chat as chat
@@ -54,7 +55,6 @@
 
 
 class NoRefFileFound(Exception):
-
     def __init__(self, model: str, *args):
         super().__init__(*args)
 
@@ -74,7 +74,10 @@ def trim_model_name(model):
     return model
 
 
-class ModelBase:
+class ModelBase(ABC):
+    model: str
+    type: str
+
     def __not_implemented_error(self, param):
         return NotImplementedError(f"ramalama {param} for '{type(self).__name__}' not implemented")
 
@@ -90,40 +93,46 @@ def pull(self, args):
     def push(self, source_model, args):
         raise self.__not_implemented_error("push")
 
+    @abstractmethod
     def remove(self, args):
         raise self.__not_implemented_error("rm")
 
+    @abstractmethod
     def bench(self, args):
         raise self.__not_implemented_error("bench")
 
+    @abstractmethod
     def run(self, args):
         raise self.__not_implemented_error("run")
 
+    @abstractmethod
     def perplexity(self, args):
         raise self.__not_implemented_error("perplexity")
 
+    @abstractmethod
     def serve(self, args):
         raise self.__not_implemented_error("serve")
 
+    @abstractmethod
     def exists(self) -> bool:
         raise self.__not_implemented_error("exists")
 
+    @abstractmethod
     def inspect(self, args):
         raise self.__not_implemented_error("inspect")
 
 
 class Model(ModelBase):
     """Model super class"""
 
-    model = ""
-    type = "Model"
+    type: str = "Model"
 
-    def __init__(self, model, model_store_path):
+    def __init__(self, model: str, model_store_path: str):
         self.model = model
 
-        split = self.model.rsplit("/", 1)
-        self.directory = split[0] if len(split) > 1 else ""
-        self.filename = split[1] if len(split) > 1 else split[0]
+        split: list[str] = self.model.rsplit("/", 1)
+        self.directory: str = split[0] if len(split) > 1 else ""
+        self.filename: str = split[1] if len(split) > 1 else split[0]
 
         self._model_name: str
         self._model_tag: str
@@ -432,7 +441,7 @@ def _handle_mlx_chat(self, args):
                     chat.chat(args)
                     break
                 else:
-                    logger.debug(f"MLX server not ready, waiting... (attempt {i+1}/{max_retries})")
+                    logger.debug(f"MLX server not ready, waiting... (attempt {i + 1}/{max_retries})")
                     time.sleep(3)
                     continue
 
@@ -441,7 +450,7 @@ def _handle_mlx_chat(self, args):
                     perror(f"Error: Failed to connect to MLX server after {max_retries} attempts: {e}")
                     self._cleanup_server_process(args.pid2kill)
                     raise e
-                logger.debug(f"Connection attempt failed, retrying... (attempt {i+1}/{max_retries}): {e}")
+                logger.debug(f"Connection attempt failed, retrying... (attempt {i + 1}/{max_retries}): {e}")
                 time.sleep(3)
 
         args.initial_connection = False
@@ -701,7 +710,6 @@ def handle_runtime(self, args, exec_args):
         return exec_args
 
     def generate_container_config(self, args, exec_args):
-
         # Get the blob paths (src) and mounted paths (dest)
         model_src_path = self._get_entry_model_path(False, False, args.dryrun)
         chat_template_src_path = self._get_chat_template_path(False, False, args.dryrun)
@@ -791,7 +799,7 @@ def kube(self, model_paths, chat_template_paths, mmproj_paths, args, exec_args,
         kube = Kube(self.model_name, model_paths, chat_template_paths, mmproj_paths, args, exec_args)
         kube.generate().write(output_dir)
 
-    def inspect(self, args):
+    def inspect(self, args) -> None:
         self.ensure_model_exists(args)
 
         model_name = self.filename