containers · ericcurtin · Jun 15, 2025 · Jun 14, 2025 · Jun 15, 2025 · sourcery-ai
@@ -1041,6 +1041,7 @@ $ cat /usr/share/ramalama/shortnames.conf
 | ------------------------------------------------------ | ---------------------------------------------------------- |
 | [ramalama(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama.1.md)                      | primary RamaLama man page                                  |
 | [ramalama-bench(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-bench.1.md)| benchmark specified AI Model                                         |
+| [ramalama-chat(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-chat.1.md)| chat with specified OpenAI RESTAPI                         |
 | [ramalama-containers(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-containers.1.md)| list all RamaLama containers                               |
 | [ramalama-convert(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-convert.1.md)      | convert AI Model from local storage to OCI Image           |
 | [ramalama-info(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-info.1.md)            | display RamaLama configuration information                 |

@@ -0,0 +1,45 @@
+% ramalama-chat 1
+
+## NAME
+ramalama\-chat - OpenAI chat with the specified RESTAPI URL
+
+## SYNOPSIS
+**ramalama chat** [*options*] [arg...]
+
+positional arguments:
+  ARGS                  overrides the default prompt, and the output is
+                        returned without entering the chatbot
+
+## DESCRIPTION
+Specify one or more AI Models to be removed from local storage
+
+## OPTIONS
+
+#### **--color**
+Indicate whether or not to use color in the chat.
+Possible values are "never", "always" and "auto". (default: auto)
+
+#### **--help**, **-h**
+Show this help message and exit
+
+#### **--prefix**
+Prefix for the user prompt (default: 🦭 > )
+
+#### **--url**=URL
+The host to send requests to (default: http://127.0.0.1:8080)
+
+## EXAMPLES
+
+```
+$ ramalama chat
+🦭 >
+
+$ ramalama chat http://localhost:1234
+🐋 >
+```
+
+## SEE ALSO
+**[ramalama(1)](ramalama.1.md)**
+
+## HISTORY
+Jun 2025, Originally compiled by Dan Walsh <[email protected]>
@@ -36,14 +36,14 @@ For REST API endpoint documentation, see: [https://github.com/ggml-org/llama.cpp
 ## OPTIONS
 
 #### **--api**=**llama-stack** | none**
-unified API layer for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry.(default: none)
+Unified API layer for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry.(default: none)
 The default can be overridden in the ramalama.conf file.
 
 #### **--authfile**=*password*
-path of the authentication file for OCI registries
+Path of the authentication file for OCI registries
 
 #### **--ctx-size**, **-c**
-size of the prompt context (default: 2048, 0 = loaded from model)
+Size of the prompt context (default: 2048, 0 = loaded from model)
 
 #### **--detach**, **-d**
 Run the container in the background and print the new container ID.

@@ -178,6 +178,7 @@ It adds support for model versioning and multiple files such as chat templates.
 | Command                                           | Description                                                |
 | ------------------------------------------------- | ---------------------------------------------------------- |
 | [ramalama-bench(1)](ramalama-bench.1.md)          | benchmark specified AI Model                               |
+| [ramalama-chat(1)](ramalama-chat.1.md)            |  OpenAI chat with the specified RESTAPI URL                |
 | [ramalama-client(1)](ramalama-client.1.md)        | interact with the AI Model server (experimental)           |
 | [ramalama-containers(1)](ramalama-containers.1.md)| list all RamaLama containers                               |
 | [ramalama-convert(1)](ramalama-convert.1.md)      | convert AI Models from local storage to OCI Image          |

@@ -22,6 +22,7 @@
 import ramalama.oci
 import ramalama.rag
 from ramalama import engine
+from ramalama.chat import RamaLamaShell, default_prefix
 from ramalama.common import accel_image, exec_cmd, get_accel, get_cmd_with_wrapper, perror
 from ramalama.config import CONFIG
 from ramalama.logger import configure_logger, logger
@@ -237,6 +238,7 @@ def configure_subcommands(parser):
     subparsers = parser.add_subparsers(dest="subcommand")
     subparsers.required = False
     bench_parser(subparsers)
+    chat_parser(subparsers)
     client_parser(subparsers)
     containers_parser(subparsers)
     convert_parser(subparsers)
@@ -905,6 +907,23 @@ def default_threads():
     return CONFIG.threads
 
 
+def chat_parser(subparsers):
+    parser = subparsers.add_parser("chat", help="OpenAI chat with the specified RESTAPI URL")
+    parser.add_argument(
+        '--color',
+        '--colour',
+        default="auto",
+        choices=['never', 'always', 'auto'],
+        help='possible values are "never", "always" and "auto".',
+    )
+    parser.add_argument("--prefix", type=str, help="prefix for the user prompt", default=default_prefix())
+    parser.add_argument("--url", type=str, default="http://127.0.0.1:8080", help="the host to send requests to")
+    parser.add_argument(
+        "ARGS", nargs="*", help="overrides the default prompt, and the output is returned without entering the chatbot"
+    )
+    parser.set_defaults(func=chat_cli)
+
+
 def run_parser(subparsers):
     parser = subparsers.add_parser("run", help="run specified AI Model as a chatbot")
     runtime_options(parser, "run")
@@ -919,6 +938,14 @@ def run_parser(subparsers):
     parser.set_defaults(func=run_cli)
 
 
+def chat_cli(args):
+    shell = RamaLamaShell(args)
+    if shell.handle_args():
+        return
+    shell.loop()
+    shell.kills()
+
+
 def run_cli(args):
     if args.rag:
         _get_rag(args)