move --image & --keep-groups to run, serve, perplexity, bench commands

rhatdan · rhatdan · commit 7dc3d9da8eb7 · 2025-07-08T09:09:41.000-04:00
This eliminates the need for pulling images by accident when not using containers. Since these commands are only used for container commands, no need for them in other places. Fixes: #1662 Signed-off-by: Daniel J Walsh <dwalsh@redhat.com>
diff --git a/docs/ramalama-bench.1.md b/docs/ramalama-bench.1.md
@@ -48,6 +48,33 @@ for a value and set the variable only if it is set on the host.
 #### **--help**, **-h**
 show this help message and exit
 
+#### **--image**=IMAGE
+OCI container image to run with specified AI model. RamaLama defaults to using
+images based on the accelerator it discovers. For example:
+`quay.io/ramalama/ramalama`. See the table below for all default images.
+The default image tag is based on the minor version of the RamaLama package.
+Version 0.10.0 of RamaLama pulls an image with a `:0.10` tag from the quay.io/ramalama OCI repository. The --image option overrides this default.
+
+The default can be overridden in the ramalama.conf file or via the
+RAMALAMA_IMAGE environment variable. `export RAMALAMA_IMAGE=quay.io/ramalama/aiimage:1.2` tells
+RamaLama to use the `quay.io/ramalama/aiimage:1.2` image.
+
+Accelerated images:
+
+| Accelerator             | Image                      |
+| ------------------------| -------------------------- |
+|  CPU, Apple             | quay.io/ramalama/ramalama  |
+|  HIP_VISIBLE_DEVICES    | quay.io/ramalama/rocm      |
+|  CUDA_VISIBLE_DEVICES   | quay.io/ramalama/cuda      |
+|  ASAHI_VISIBLE_DEVICES  | quay.io/ramalama/asahi     |
+|  INTEL_VISIBLE_DEVICES  | quay.io/ramalama/intel-gpu |
+|  ASCEND_VISIBLE_DEVICES | quay.io/ramalama/cann      |
+|  MUSA_VISIBLE_DEVICES   | quay.io/ramalama/musa      |
+
+#### **--keep-groups**
+pass --group-add keep-groups to podman (default: False)
+If GPU device on host system is accessible to user via group access, this option leaks the groups into the container.
+
 #### **--name**, **-n**
 name of the container to run the Model in
 
diff --git a/docs/ramalama-perplexity.1.md b/docs/ramalama-perplexity.1.md
@@ -53,6 +53,33 @@ for a value and set the variable only if it is set on the host.
 #### **--help**, **-h**
 show this help message and exit
 
+#### **--image**=IMAGE
+OCI container image to run with specified AI model. RamaLama defaults to using
+images based on the accelerator it discovers. For example:
+`quay.io/ramalama/ramalama`. See the table below for all default images.
+The default image tag is based on the minor version of the RamaLama package.
+Version 0.10.0 of RamaLama pulls an image with a `:0.10` tag from the quay.io/ramalama OCI repository. The --image option overrides this default.
+
+The default can be overridden in the ramalama.conf file or via the
+RAMALAMA_IMAGE environment variable. `export RAMALAMA_IMAGE=quay.io/ramalama/aiimage:1.2` tells
+RamaLama to use the `quay.io/ramalama/aiimage:1.2` image.
+
+Accelerated images:
+
+| Accelerator             | Image                      |
+| ------------------------| -------------------------- |
+|  CPU, Apple             | quay.io/ramalama/ramalama  |
+|  HIP_VISIBLE_DEVICES    | quay.io/ramalama/rocm      |
+|  CUDA_VISIBLE_DEVICES   | quay.io/ramalama/cuda      |
+|  ASAHI_VISIBLE_DEVICES  | quay.io/ramalama/asahi     |
+|  INTEL_VISIBLE_DEVICES  | quay.io/ramalama/intel-gpu |
+|  ASCEND_VISIBLE_DEVICES | quay.io/ramalama/cann      |
+|  MUSA_VISIBLE_DEVICES   | quay.io/ramalama/musa      |
+
+#### **--keep-groups**
+pass --group-add keep-groups to podman (default: False)
+If GPU device on host system is accessible to user via group access, this option leaks the groups into the container.
+
 #### **--name**, **-n**
 name of the container to run the Model in
 
diff --git a/docs/ramalama-rag.1.md b/docs/ramalama-rag.1.md
@@ -35,6 +35,33 @@ for a value and set the variable only if it is set on the host.
 #### **--help**, **-h**
 Print usage message
 
+#### **--image**=IMAGE
+OCI container image to run with specified AI model. RamaLama defaults to using
+images based on the accelerator it discovers. For example:
+`quay.io/ramalama/ramalama-rag`. See the table below for all default images.
+The default image tag is based on the minor version of the RamaLama package.
+Version 0.10.0 of RamaLama pulls an image with a `:0.10` tag from the quay.io/ramalama OCI repository. The --image option overrides this default.
+
+The default can be overridden in the ramalama.conf file or via the
+RAMALAMA_IMAGE environment variable. `export RAMALAMA_IMAGE=quay.io/ramalama/aiimage:1.2` tells
+RamaLama to use the `quay.io/ramalama/aiimage:1.2` image.
+
+Accelerated images:
+
+| Accelerator             | Image                          |
+| ------------------------| ------------------------------ |
+|  CPU, Apple             | quay.io/ramalama/ramalama-rag  |
+|  HIP_VISIBLE_DEVICES    | quay.io/ramalama/rocm-rag      |
+|  CUDA_VISIBLE_DEVICES   | quay.io/ramalama/cuda-rag      |
+|  ASAHI_VISIBLE_DEVICES  | quay.io/ramalama/asahi-rag     |
+|  INTEL_VISIBLE_DEVICES  | quay.io/ramalama/intel-gpu-rag |
+|  ASCEND_VISIBLE_DEVICES | quay.io/ramalama/cann-rag      |
+|  MUSA_VISIBLE_DEVICES   | quay.io/ramalama/musa-rag      |
+
+#### **--keep-groups**
+pass --group-add keep-groups to podman (default: False)
+If GPU device on host system is accessible to user via group access, this option leaks the groups into the container.
+
 #### **--network**=*none*
 sets the configuration for network namespaces when handling RUN instructions
 
diff --git a/docs/ramalama-run.1.md b/docs/ramalama-run.1.md
@@ -61,6 +61,33 @@ for a value and set the variable only if it is set on the host.
 #### **--help**, **-h**
 Show this help message and exit
 
+#### **--image**=IMAGE
+OCI container image to run with specified AI model. RamaLama defaults to using
+images based on the accelerator it discovers. For example:
+`quay.io/ramalama/ramalama`. See the table below for all default images.
+The default image tag is based on the minor version of the RamaLama package.
+Version 0.10.0 of RamaLama pulls an image with a `:0.10` tag from the quay.io/ramalama OCI repository. The --image option overrides this default.
+
+The default can be overridden in the ramalama.conf file or via the
+RAMALAMA_IMAGE environment variable. `export RAMALAMA_IMAGE=quay.io/ramalama/aiimage:1.2` tells
+RamaLama to use the `quay.io/ramalama/aiimage:1.2` image.
+
+Accelerated images:
+
+| Accelerator             | Image                      |
+| ------------------------| -------------------------- |
+|  CPU, Apple             | quay.io/ramalama/ramalama  |
+|  HIP_VISIBLE_DEVICES    | quay.io/ramalama/rocm      |
+|  CUDA_VISIBLE_DEVICES   | quay.io/ramalama/cuda      |
+|  ASAHI_VISIBLE_DEVICES  | quay.io/ramalama/asahi     |
+|  INTEL_VISIBLE_DEVICES  | quay.io/ramalama/intel-gpu |
+|  ASCEND_VISIBLE_DEVICES | quay.io/ramalama/cann      |
+|  MUSA_VISIBLE_DEVICES   | quay.io/ramalama/musa      |
+
+#### **--keep-groups**
+pass --group-add keep-groups to podman (default: False)
+If GPU device on host system is accessible to user via group access, this option leaks the groups into the container.
+
 #### **--keepalive**
 duration to keep a model loaded (e.g. 5m)
 
diff --git a/docs/ramalama-serve.1.md b/docs/ramalama-serve.1.md
@@ -93,6 +93,33 @@ show this help message and exit
 #### **--host**="0.0.0.0"
 IP address for llama.cpp to listen on.
 
+#### **--image**=IMAGE
+OCI container image to run with specified AI model. RamaLama defaults to using
+images based on the accelerator it discovers. For example:
+`quay.io/ramalama/ramalama`. See the table above for all default images.
+The default image tag is based on the minor version of the RamaLama package.
+Version 0.10.0 of RamaLama pulls an image with a `:0.10` tag from the quay.io/ramalama OCI repository. The --image option overrides this default.
+
+The default can be overridden in the ramalama.conf file or via the
+RAMALAMA_IMAGE environment variable. `export RAMALAMA_IMAGE=quay.io/ramalama/aiimage:1.2` tells
+RamaLama to use the `quay.io/ramalama/aiimage:1.2` image.
+
+Accelerated images:
+
+| Accelerator             | Image                      |
+| ------------------------| -------------------------- |
+|  CPU, Apple             | quay.io/ramalama/ramalama  |
+|  HIP_VISIBLE_DEVICES    | quay.io/ramalama/rocm      |
+|  CUDA_VISIBLE_DEVICES   | quay.io/ramalama/cuda      |
+|  ASAHI_VISIBLE_DEVICES  | quay.io/ramalama/asahi     |
+|  INTEL_VISIBLE_DEVICES  | quay.io/ramalama/intel-gpu |
+|  ASCEND_VISIBLE_DEVICES | quay.io/ramalama/cann      |
+|  MUSA_VISIBLE_DEVICES   | quay.io/ramalama/musa      |
+
+#### **--keep-groups**
+pass --group-add keep-groups to podman (default: False)
+If GPU device on host system is accessible to user via group access, this option leaks the groups into the container.
+
 #### **--model-draft**
 
 A draft model is a smaller, faster model that helps accelerate the decoding
diff --git a/docs/ramalama.1.md b/docs/ramalama.1.md
@@ -23,18 +23,6 @@ version of RamaLama. For example RamaLama version 1.2.3 on an NVIDIA system
 pulls quay.io/ramalama/cuda:1.2. To override the default image use the
 `--image` option.
 
-Accelerated images:
-
-| Accelerator             | Image                      |
-| ------------------------| -------------------------- |
-|  CPU, Apple             | quay.io/ramalama/ramalama  |
-|  HIP_VISIBLE_DEVICES    | quay.io/ramalama/rocm      |
-|  CUDA_VISIBLE_DEVICES   | quay.io/ramalama/cuda      |
-|  ASAHI_VISIBLE_DEVICES  | quay.io/ramalama/asahi     |
-|  INTEL_VISIBLE_DEVICES  | quay.io/ramalama/intel-gpu |
-|  ASCEND_VISIBLE_DEVICES | quay.io/ramalama/cann      |
-|  MUSA_VISIBLE_DEVICES   | quay.io/ramalama/musa      |
-
 RamaLama pulls AI Models from model registries. Starting a chatbot or a rest API service from a simple single command. Models are treated similarly to how Podman and Docker treat container images.
 
 When both Podman and Docker are installed, RamaLama defaults to Podman, The `RAMALAMA_CONTAINER_ENGINE=docker` environment variable can override this behaviour. When neither are installed RamaLama attempts to run the model with software on the local system.
@@ -137,21 +125,6 @@ The default can be overridden in the ramalama.conf file or via the RAMALAMA_CONT
 #### **--help**, **-h**
 show this help message and exit
 
-#### **--image**=IMAGE
-OCI container image to run with specified AI model. RamaLama defaults to use
-images based on the accelerator it discovers. For example:
-`quay.io/ramalama/ramalama`. See the table below for all default images.
-The default image tag is based on the minor version of the RamaLama package.
-Version 0.10.0 of RamaLama pulls $IMAGE:0.10 from the quay.io/ramalama OCI repository. The --image option overrides this default.
-
-The default can be overridden in the ramalama.conf file or via the
-RAMALAMA_IMAGE environment variable. `export RAMALAMA_IMAGE=quay.io/ramalama/aiimage:1.2` tells
-RamaLama to use the `quay.io/ramalama/aiimage:1.2` image.
-
-#### **--keep-groups**
-pass --group-add keep-groups to podman (default: False)
-Needed to access the gpu on some systems, but has an impact on security, use with caution.
-
 #### **--nocontainer**
 Do not run RamaLama in the default container (default: False)
 The default can be overridden in the ramalama.conf file.
diff --git a/docs/ramalama.conf b/docs/ramalama.conf
@@ -25,7 +25,7 @@
 # OCI model car image
 # Image to use when building and pushing --type=car models
 #
-#carimage = "registry.access.redhat.com/ubi9-micro:latest"
+#carimage = "registry.access.redhat.com/ubi10-micro:latest"
 
 # Run RamaLama in the default container.
 #
diff --git a/docs/ramalama.conf.5.md b/docs/ramalama.conf.5.md
@@ -65,7 +65,7 @@ The ramalama table contains settings to configure and manage the OCI runtime.
 Unified API layer for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry.
 Options: llama-stack, none
 
-**carimage**="registry.access.redhat.com/ubi9-micro:latest"
+**carimage**="registry.access.redhat.com/ubi10-micro:latest"
 
 OCI model car image
 
diff --git a/ramalama/cli.py b/ramalama/cli.py
@@ -21,7 +21,6 @@
 
 import ramalama.chat as chat
 import ramalama.oci
-import ramalama.rag
 from ramalama import engine
 from ramalama.chat import default_prefix
 from ramalama.common import accel_image, get_accel, perror
@@ -30,6 +29,7 @@
 from ramalama.model import MODEL_TYPES
 from ramalama.model_factory import ModelFactory, New
 from ramalama.model_store.global_store import GlobalModelStore
+from ramalama.rag import rag_image
 from ramalama.shortnames import Shortnames
 from ramalama.stack import Stack
 from ramalama.version import print_version, version
@@ -192,21 +192,6 @@ def configure_arguments(parser):
         default=CONFIG.engine,
         help="""run RamaLama using the specified container engine.
 The RAMALAMA_CONTAINER_ENGINE environment variable modifies default behaviour.""",
-    )
-    parser.add_argument(
-        "--image",
-        default=accel_image(CONFIG),
-        help="OCI container image to run with the specified AI model",
-        action=OverrideDefaultAction,
-        completer=local_images,
-    )
-    parser.add_argument(
-        "--keep-groups",
-        dest="podman_keep_groups",
-        default=CONFIG.keep_groups,
-        action="store_true",
-        help="""pass `--group-add keep-groups` to podman, if using podman.
-Needed to access gpu on some systems, but has security implications.""",
     )
     parser.add_argument(
         "--nocontainer",
@@ -520,7 +505,7 @@ def info_cli(args):
         "Engine": {
             "Name": args.engine,
         },
-        "Image": args.image,
+        "Image": accel_image(CONFIG),
         "Runtime": args.runtime,
         "Store": args.store,
         "UseContainer": args.container,
@@ -662,6 +647,7 @@ def convert_cli(args):
     model = ModelFactory(tgt, args).create_oci()
 
     source_model = _get_source_model(args)
+    args.carimage = rag_image(accel_image(CONFIG))
     model.convert(source_model, args)
 
 
@@ -789,6 +775,21 @@ def runtime_options(parser, command):
             help="IP address to listen",
             completer=suppressCompleter,
         )
+    parser.add_argument(
+        "--image",
+        default=accel_image(CONFIG),
+        help="OCI container image to run with the specified AI model",
+        action=OverrideDefaultAction,
+        completer=local_images,
+    )
+    parser.add_argument(
+        "--keep-groups",
+        dest="podman_keep_groups",
+        default=CONFIG.keep_groups,
+        action="store_true",
+        help="""pass `--group-add keep-groups` to podman.
+If GPU device on host is accessible to via group access, this option leaks the user groups into the container.""",
+    )
     if command == "run":
         parser.add_argument(
             "--keepalive", type=str, help="duration to keep a model loaded (e.g. 5m)", completer=suppressCompleter
@@ -1060,6 +1061,21 @@ def rag_parser(subparsers):
         help="environment variables to add to the running RAG container",
         completer=local_env,
     )
+    parser.add_argument(
+        "--image",
+        default=accel_image(CONFIG),
+        help="OCI container image to run with the specified AI model",
+        action=OverrideDefaultAction,
+        completer=local_images,
+    )
+    parser.add_argument(
+        "--keep-groups",
+        dest="podman_keep_groups",
+        default=CONFIG.keep_groups,
+        action="store_true",
+        help="""pass `--group-add keep-groups` to podman.
+If GPU device on host is accessible to via group access, this option leaks the user groups into the container.""",
+    )
     add_network_argument(parser, dflt=None)
     parser.add_argument(
         "--pull",
diff --git a/ramalama/config.py b/ramalama/config.py
@@ -64,7 +64,7 @@ class RamalamaSettings:
 class BaseConfig:
     container: bool = None  # type: ignore
     image: str = None  # type: ignore
-    carimage: str = "registry.access.redhat.com/ubi9-micro:latest"
+    carimage: str = "registry.access.redhat.com/ubi10-micro:latest"
     ctx_size: int = 2048
     engine: SUPPORTED_ENGINES | None = field(default_factory=get_default_engine)
     env: list[str] = field(default_factory=list)
diff --git a/ramalama/oci.py b/ramalama/oci.py
@@ -187,7 +187,7 @@ def _generate_containerfile(self, source_model, args):
         if is_car:
             content += f"FROM {args.carimage}\n"
         else:
-            content += f"FROM {args.image} as builder\n"
+            content += f"FROM {args.carimage} as builder\n"
 
         if has_gguf:
             content += (
@@ -230,7 +230,8 @@ def build(self, source_model, args):
         contextdir = source_model.model_store.blobs_directory
 
         content = self._generate_containerfile(source_model, args)
-
+        if args.debug:
+            perror(f"Containerfile: \n{content}")
         containerfile = tempfile.NamedTemporaryFile(prefix='RamaLama_Containerfile_', delete=False)
 
         # Open the file for writing.
diff --git a/test/system/015-help.bats b/test/system/015-help.bats
@@ -109,12 +109,12 @@ function check_help() {
 
 @test "ramalama verify default image" {
 
-    run_ramalama --help
+    run_ramalama run --help
     is "$output" ".*image IMAGE.*OCI container image to run with the specified AI model"  "Verify default image"
-    is "$output" ".*default: quay.io/ramalama/ramalama"  "Verify default image"
+    is "$output" ".*default: quay.io/ramalama/.*"  "Verify default image"
 
     image=m_$(safename)
-    RAMALAMA_IMAGE=${image} run_ramalama --help
+    RAMALAMA_IMAGE=${image} run_ramalama run --help
     is "$output" ".*default: ${image}"  "Verify default image from environment"
 
     conf=$RAMALAMA_TMPDIR/ramalama.conf
@@ -123,11 +123,11 @@ function check_help() {
 image="$image"
 EOF
 
-    RAMALAMA_CONFIG=${conf} run_ramalama --help
+    RAMALAMA_CONFIG=${conf} run_ramalama bench --help
     is "$output" ".*default: ${image}"  "Verify default image from ramalama.conf"
 
     image1=m_$(safename)
-    RAMALAMA_IMAGE=${image1} RAMALAMA_CONFIG=${conf} run_ramalama --help
+    RAMALAMA_IMAGE=${image1} RAMALAMA_CONFIG=${conf} run_ramalama serve --help
     is "$output" ".*default: ${image1}"  "Verify default image from environment over ramalama.conf"
 }
 
diff --git a/test/system/030-run.bats b/test/system/030-run.bats
diff --git a/test/system/040-serve.bats b/test/system/040-serve.bats
diff --git a/test/system/055-convert.bats b/test/system/055-convert.bats
diff --git a/test/system/060-info.bats b/test/system/060-info.bats
diff --git a/test/unit/test_config.py b/test/unit/test_config.py

Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,7 @@`
`25`	`25`	`# OCI model car image`
`26`	`26`	`# Image to use when building and pushing --type=car models`
`27`	`27`	`#`
`28`		`-#carimage = "registry.access.redhat.com/ubi9-micro:latest"`
	`28`	`+#carimage = "registry.access.redhat.com/ubi10-micro:latest"`
`29`	`29`
`30`	`30`	`# Run RamaLama in the default container.`
`31`	`31`	`#`