Skip to content

Commit 2fe2e51

Browse files
authored
Merge pull request #1531 from rhatdan/chat
Add ramalama chat command
2 parents 093d5a4 + 3cd6a59 commit 2fe2e51

File tree

6 files changed

+261
-3
lines changed

6 files changed

+261
-3
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,6 +1041,7 @@ $ cat /usr/share/ramalama/shortnames.conf
10411041
| ------------------------------------------------------ | ---------------------------------------------------------- |
10421042
| [ramalama(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama.1.md) | primary RamaLama man page |
10431043
| [ramalama-bench(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-bench.1.md)| benchmark specified AI Model |
1044+
| [ramalama-chat(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-chat.1.md)| chat with specified OpenAI REST API |
10441045
| [ramalama-containers(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-containers.1.md)| list all RamaLama containers |
10451046
| [ramalama-convert(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-convert.1.md) | convert AI Model from local storage to OCI Image |
10461047
| [ramalama-info(1)](https://github.com/containers/ramalama/blob/main/docs/ramalama-info.1.md) | display RamaLama configuration information |

docs/ramalama-chat.1.md

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
% ramalama-chat 1
2+
3+
## NAME
4+
ramalama\-chat - OpenAI chat with the specified REST API URL
5+
6+
## SYNOPSIS
7+
**ramalama chat** [*options*] [arg...]
8+
9+
positional arguments:
10+
ARGS overrides the default prompt, and the output is
11+
returned without entering the chatbot
12+
13+
## DESCRIPTION
14+
Chat with an OpenAI Rest API
15+
16+
## OPTIONS
17+
18+
#### **--color**
19+
Indicate whether or not to use color in the chat.
20+
Possible values are "never", "always" and "auto". (default: auto)
21+
22+
#### **--help**, **-h**
23+
Show this help message and exit
24+
25+
#### **--prefix**
26+
Prefix for the user prompt (default: 🦭 > )
27+
28+
#### **--url**=URL
29+
The host to send requests to (default: http://127.0.0.1:8080)
30+
31+
## EXAMPLES
32+
33+
Communicate with the default local OpenAI REST API. (http://127.0.0.1:8080)
34+
With Podman containers.
35+
```
36+
$ ramalama chat
37+
🦭 >
38+
39+
Communicate with an alternative OpenAI REST API URL. With Docker containers.
40+
$ ramalama chat --url http://localhost:1234
41+
🐋 >
42+
```
43+
44+
## SEE ALSO
45+
**[ramalama(1)](ramalama.1.md)**
46+
47+
## HISTORY
48+
Jun 2025, Originally compiled by Dan Walsh <[email protected]>

docs/ramalama-serve.1.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,14 @@ For REST API endpoint documentation, see: [https://github.com/ggml-org/llama.cpp
3636
## OPTIONS
3737

3838
#### **--api**=**llama-stack** | none**
39-
unified API layer for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry.(default: none)
39+
Unified API layer for Inference, RAG, Agents, Tools, Safety, Evals, and Telemetry.(default: none)
4040
The default can be overridden in the ramalama.conf file.
4141

4242
#### **--authfile**=*password*
43-
path of the authentication file for OCI registries
43+
Path of the authentication file for OCI registries
4444

4545
#### **--ctx-size**, **-c**
46-
size of the prompt context (default: 2048, 0 = loaded from model)
46+
Size of the prompt context (default: 2048, 0 = loaded from model)
4747

4848
#### **--detach**, **-d**
4949
Run the container in the background and print the new container ID.

docs/ramalama.1.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ It adds support for model versioning and multiple files such as chat templates.
178178
| Command | Description |
179179
| ------------------------------------------------- | ---------------------------------------------------------- |
180180
| [ramalama-bench(1)](ramalama-bench.1.md) | benchmark specified AI Model |
181+
| [ramalama-chat(1)](ramalama-chat.1.md) | OpenAI chat with the specified REST API URL |
181182
| [ramalama-client(1)](ramalama-client.1.md) | interact with the AI Model server (experimental) |
182183
| [ramalama-containers(1)](ramalama-containers.1.md)| list all RamaLama containers |
183184
| [ramalama-convert(1)](ramalama-convert.1.md) | convert AI Models from local storage to OCI Image |

ramalama/chat.py

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
#!/usr/bin/env python3
2+
3+
import cmd
4+
import itertools
5+
import json
6+
import os
7+
import signal
8+
import sys
9+
import time
10+
import urllib.error
11+
import urllib.request
12+
13+
from ramalama.config import CONFIG
14+
from ramalama.console import EMOJI
15+
16+
17+
def should_colorize():
18+
t = os.getenv("TERM")
19+
return t and t != "dumb" and sys.stdout.isatty()
20+
21+
22+
def res(response, color):
23+
color_default = ""
24+
color_yellow = ""
25+
if (color == "auto" and should_colorize()) or color == "always":
26+
color_default = "\033[0m"
27+
color_yellow = "\033[33m"
28+
29+
print("\r", end="")
30+
assistant_response = ""
31+
for line in response:
32+
line = line.decode("utf-8").strip()
33+
if line.startswith("data: {"):
34+
line = line[len("data: ") :]
35+
choice = json.loads(line)["choices"][0]["delta"]
36+
if "content" in choice:
37+
choice = choice["content"]
38+
else:
39+
continue
40+
41+
if choice:
42+
print(f"{color_yellow}{choice}{color_default}", end="", flush=True)
43+
assistant_response += choice
44+
45+
print("")
46+
return assistant_response
47+
48+
49+
def default_prefix():
50+
if "LLAMA_PROMPT_PREFIX" in os.environ:
51+
return os.environ["LLAMA_PROMPT_PREFIX"]
52+
53+
if not EMOJI:
54+
return ""
55+
56+
engine = CONFIG.engine
57+
58+
if os.path.basename(engine) == "podman":
59+
return "🦭 > "
60+
61+
if os.path.basename(engine) == "docker":
62+
return "🐋 > "
63+
64+
return "> "
65+
66+
67+
class RamaLamaShell(cmd.Cmd):
68+
def __init__(self, args):
69+
super().__init__()
70+
self.conversation_history = []
71+
self.args = args
72+
self.request_in_process = False
73+
self.prompt = args.prefix
74+
75+
self.url = f"{args.url}/v1/chat/completions"
76+
self.models_url = f"{args.url}/v1/models"
77+
self.models = []
78+
79+
def model(self, index=0):
80+
try:
81+
if len(self.models) == 0:
82+
self.models = self.get_models()
83+
return self.models[index]
84+
except urllib.error.URLError:
85+
return ""
86+
87+
def get_models(self):
88+
request = urllib.request.Request(self.models_url, method="GET")
89+
response = urllib.request.urlopen(request)
90+
for line in response:
91+
line = line.decode("utf-8").strip()
92+
return [d['id'] for d in json.loads(line)["data"]]
93+
94+
def handle_args(self):
95+
if self.args.ARGS:
96+
self.default(" ".join(self.args.ARGS))
97+
self.kills()
98+
return True
99+
100+
return False
101+
102+
def do_EOF(self, user_content):
103+
print("")
104+
return True
105+
106+
def default(self, user_content):
107+
if user_content in ["/bye", "exit"]:
108+
return True
109+
110+
self.conversation_history.append({"role": "user", "content": user_content})
111+
self.request_in_process = True
112+
response = self._req()
113+
if not response:
114+
return True
115+
116+
self.conversation_history.append({"role": "assistant", "content": response})
117+
self.request_in_process = False
118+
119+
def _make_request_data(self):
120+
data = {
121+
"stream": True,
122+
"messages": self.conversation_history,
123+
"model": self.model(),
124+
}
125+
json_data = json.dumps(data).encode("utf-8")
126+
headers = {
127+
"Content-Type": "application/json",
128+
}
129+
request = urllib.request.Request(self.url, data=json_data, headers=headers, method="POST")
130+
131+
return request
132+
133+
def _req(self):
134+
request = self._make_request_data()
135+
136+
i = 0.01
137+
total_time_slept = 0
138+
response = None
139+
for c in itertools.cycle(['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']):
140+
try:
141+
response = urllib.request.urlopen(request)
142+
break
143+
except Exception:
144+
if sys.stdout.isatty():
145+
print(f"\r{c}", end="", flush=True)
146+
147+
if total_time_slept > 16:
148+
break
149+
150+
total_time_slept += i
151+
time.sleep(i)
152+
153+
i = min(i * 2, 0.1)
154+
155+
if response:
156+
return res(response, self.args.color)
157+
158+
print(f"\rError: could not connect to: {self.url}", file=sys.stderr)
159+
self.kills()
160+
161+
return None
162+
163+
def kills(self):
164+
if self.args.pid2kill:
165+
os.kill(self.args.pid2kill, signal.SIGINT)
166+
os.kill(self.args.pid2kill, signal.SIGTERM)
167+
os.kill(self.args.pid2kill, signal.SIGKILL)
168+
169+
def loop(self):
170+
while True:
171+
self.request_in_process = False
172+
try:
173+
self.cmdloop()
174+
except KeyboardInterrupt:
175+
print("")
176+
if not self.request_in_process:
177+
print("Use Ctrl + d or /bye or exit to quit.")
178+
179+
continue
180+
181+
break

ramalama/cli.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import ramalama.oci
2323
import ramalama.rag
2424
from ramalama import engine
25+
from ramalama.chat import RamaLamaShell, default_prefix
2526
from ramalama.common import accel_image, exec_cmd, get_accel, get_cmd_with_wrapper, perror
2627
from ramalama.config import CONFIG
2728
from ramalama.logger import configure_logger, logger
@@ -237,6 +238,7 @@ def configure_subcommands(parser):
237238
subparsers = parser.add_subparsers(dest="subcommand")
238239
subparsers.required = False
239240
bench_parser(subparsers)
241+
chat_parser(subparsers)
240242
client_parser(subparsers)
241243
containers_parser(subparsers)
242244
convert_parser(subparsers)
@@ -905,6 +907,23 @@ def default_threads():
905907
return CONFIG.threads
906908

907909

910+
def chat_parser(subparsers):
911+
parser = subparsers.add_parser("chat", help="OpenAI chat with the specified RESTAPI URL")
912+
parser.add_argument(
913+
'--color',
914+
'--colour',
915+
default="auto",
916+
choices=['never', 'always', 'auto'],
917+
help='possible values are "never", "always" and "auto".',
918+
)
919+
parser.add_argument("--prefix", type=str, help="prefix for the user prompt", default=default_prefix())
920+
parser.add_argument("--url", type=str, default="http://127.0.0.1:8080", help="the host to send requests to")
921+
parser.add_argument(
922+
"ARGS", nargs="*", help="overrides the default prompt, and the output is returned without entering the chatbot"
923+
)
924+
parser.set_defaults(func=chat_cli)
925+
926+
908927
def run_parser(subparsers):
909928
parser = subparsers.add_parser("run", help="run specified AI Model as a chatbot")
910929
runtime_options(parser, "run")
@@ -919,6 +938,14 @@ def run_parser(subparsers):
919938
parser.set_defaults(func=run_cli)
920939

921940

941+
def chat_cli(args):
942+
shell = RamaLamaShell(args)
943+
if shell.handle_args():
944+
return
945+
shell.loop()
946+
shell.kills()
947+
948+
922949
def run_cli(args):
923950
if args.rag:
924951
_get_rag(args)

0 commit comments

Comments
 (0)