Skip to content

Commit 6556b04

Browse files
authored
Merge pull request #1522 from rhatdan/demo
Update to add multi-modal
2 parents 2f92ec5 + 9f1faba commit 6556b04

File tree

2 files changed

+284
-1
lines changed

2 files changed

+284
-1
lines changed

docs/demo/camera-demo.html

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
6+
<title>Camera Interaction App</title>
7+
<style>
8+
body {
9+
font-family: sans-serif;
10+
display: flex;
11+
flex-direction: column;
12+
align-items: center;
13+
gap: 20px;
14+
padding: 20px;
15+
background-color: #f0f0f0;
16+
}
17+
.controls, .io-areas {
18+
display: flex;
19+
gap: 10px;
20+
align-items: center;
21+
background-color: #fff;
22+
padding: 15px;
23+
border-radius: 8px;
24+
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
25+
}
26+
.io-areas {
27+
flex-direction: column;
28+
align-items: stretch;
29+
}
30+
textarea {
31+
width: 300px;
32+
height: 80px;
33+
padding: 8px;
34+
border: 1px solid #ccc;
35+
border-radius: 4px;
36+
font-size: 14px;
37+
}
38+
#videoFeed {
39+
width: 480px;
40+
height: 360px;
41+
border: 2px solid #333;
42+
background-color: #000;
43+
border-radius: 8px;
44+
}
45+
#startButton {
46+
padding: 10px 20px;
47+
font-size: 16px;
48+
cursor: pointer;
49+
border: none;
50+
border-radius: 4px;
51+
color: white;
52+
}
53+
#startButton.start {
54+
background-color: #28a745; /* Green */
55+
}
56+
#startButton.stop {
57+
background-color: #dc3545; /* Red */
58+
}
59+
label {
60+
font-weight: bold;
61+
}
62+
select {
63+
padding: 8px;
64+
border-radius: 4px;
65+
border: 1px solid #ccc;
66+
}
67+
.hidden {
68+
display: none;
69+
}
70+
</style>
71+
</head>
72+
<body>
73+
74+
<h1>Camera Interaction App</h1>
75+
76+
<video id="videoFeed" autoplay playsinline></video>
77+
<canvas id="canvas" class="hidden"></canvas> <!-- For capturing frames -->
78+
79+
<div class="io-areas">
80+
<div>
81+
<label for="baseURL">Base API:</label><br>
82+
<input id="baseURL" name="Instruction" value="http://localhost:8080"></textarea>
83+
</div>
84+
<div>
85+
<label for="instructionText">Instruction:</label><br>
86+
<textarea id="instructionText" style="height: 2em; width: 40em" name="Instruction"></textarea>
87+
</div>
88+
<div>
89+
<label for="responseText">Response:</label><br>
90+
<textarea id="responseText" style="height: 2em; width: 40em" name="Response" readonly placeholder="Server response will appear here..."></textarea>
91+
</div>
92+
</div>
93+
94+
<div class="controls">
95+
<label for="intervalSelect">Interval between 2 requests:</label>
96+
<select id="intervalSelect" name="Interval between 2 requests">
97+
<option value="100">100ms</option>
98+
<option value="250">250ms</option>
99+
<option value="500" selected>500ms</option>
100+
<option value="1000">1s</option>
101+
<option value="2000">2s</option>
102+
</select>
103+
<button id="startButton" class="start">Start</button>
104+
</div>
105+
106+
<script>
107+
const video = document.getElementById('videoFeed');
108+
const canvas = document.getElementById('canvas');
109+
const baseURL = document.getElementById('baseURL');
110+
const instructionText = document.getElementById('instructionText');
111+
const responseText = document.getElementById('responseText');
112+
const intervalSelect = document.getElementById('intervalSelect');
113+
const startButton = document.getElementById('startButton');
114+
115+
instructionText.value = "What do you see?"; // default instruction
116+
117+
let stream;
118+
let intervalId;
119+
let isProcessing = false;
120+
121+
// Returns response text (string)
122+
async function sendChatCompletionRequest(instruction, imageBase64URL) {
123+
const response = await fetch(`${baseURL.value}/v1/chat/completions`, {
124+
method: 'POST',
125+
headers: {
126+
'Content-Type': 'application/json'
127+
},
128+
body: JSON.stringify({
129+
max_tokens: 100,
130+
messages: [
131+
{ role: 'user', content: [
132+
{ type: 'text', text: instruction },
133+
{ type: 'image_url', image_url: {
134+
url: imageBase64URL,
135+
} }
136+
] },
137+
]
138+
})
139+
});
140+
if (!response.ok) {
141+
const errorData = await response.text();
142+
return `Server error: ${response.status} - ${errorData}`;
143+
}
144+
const data = await response.json();
145+
return data.choices[0].message.content;
146+
}
147+
148+
// 1. Ask for camera permission on load
149+
async function initCamera() {
150+
try {
151+
stream = await navigator.mediaDevices.getUserMedia({ video: true, audio: false });
152+
video.srcObject = stream;
153+
responseText.value = "Camera access granted. Ready to start.";
154+
} catch (err) {
155+
console.error("Error accessing camera:", err);
156+
responseText.value = `Error accessing camera: ${err.name} - ${err.message}. Please ensure permissions are granted and you are on HTTPS or localhost.`;
157+
alert(`Error accessing camera: ${err.name}. Make sure you've granted permission and are on HTTPS or localhost.`);
158+
}
159+
}
160+
161+
function captureImage() {
162+
if (!stream || !video.videoWidth) {
163+
console.warn("Video stream not ready for capture.");
164+
return null;
165+
}
166+
canvas.width = video.videoWidth;
167+
canvas.height = video.videoHeight;
168+
const context = canvas.getContext('2d');
169+
context.drawImage(video, 0, 0, canvas.width, canvas.height);
170+
return canvas.toDataURL('image/jpeg', 0.8); // Use JPEG for smaller size, 0.8 quality
171+
}
172+
173+
async function sendData() {
174+
if (!isProcessing) return; // Ensure we don't have overlapping requests if processing takes longer than interval
175+
176+
const instruction = instructionText.value;
177+
const imageBase64URL = captureImage();
178+
179+
if (!imageBase64URL) {
180+
responseText.value = "Failed to capture image. Stream might not be active.";
181+
// Optionally stop processing if image capture fails consistently
182+
// handleStop();
183+
return;
184+
}
185+
186+
const payload = {
187+
instruction: instruction,
188+
imageBase64URL: imageBase64URL
189+
};
190+
191+
try {
192+
const response = await sendChatCompletionRequest(payload.instruction, payload.imageBase64URL);
193+
responseText.value = response;
194+
} catch (error) {
195+
console.error('Error sending data:', error);
196+
responseText.value = `Error: ${error.message}`;
197+
}
198+
}
199+
200+
function handleStart() {
201+
if (!stream) {
202+
responseText.value = "Camera not available. Cannot start.";
203+
alert("Camera not available. Please grant permission first.");
204+
return;
205+
}
206+
isProcessing = true;
207+
startButton.textContent = "Stop";
208+
startButton.classList.remove('start');
209+
startButton.classList.add('stop');
210+
211+
instructionText.disabled = true;
212+
intervalSelect.disabled = true;
213+
214+
responseText.value = "Processing started...";
215+
216+
const intervalMs = parseInt(intervalSelect.value, 10);
217+
218+
// Initial immediate call
219+
sendData();
220+
221+
// Then set interval
222+
intervalId = setInterval(sendData, intervalMs);
223+
}
224+
225+
function handleStop() {
226+
isProcessing = false;
227+
if (intervalId) {
228+
clearInterval(intervalId);
229+
intervalId = null;
230+
}
231+
startButton.textContent = "Start";
232+
startButton.classList.remove('stop');
233+
startButton.classList.add('start');
234+
235+
instructionText.disabled = false;
236+
intervalSelect.disabled = false;
237+
if (responseText.value.startsWith("Processing started...")) {
238+
responseText.value = "Processing stopped.";
239+
}
240+
}
241+
242+
startButton.addEventListener('click', () => {
243+
if (isProcessing) {
244+
handleStop();
245+
} else {
246+
handleStart();
247+
}
248+
});
249+
250+
// Initialize camera when the page loads
251+
window.addEventListener('DOMContentLoaded', initCamera);
252+
253+
// Optional: Stop stream when page is closed/navigated away to release camera
254+
window.addEventListener('beforeunload', () => {
255+
if (stream) {
256+
stream.getTracks().forEach(track => track.stop());
257+
}
258+
if (intervalId) {
259+
clearInterval(intervalId);
260+
}
261+
});
262+
263+
</script>
264+
</body>
265+
</html>

docs/demo/ramalama.sh

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# This script will demonstrate a lot of the features of RamaLama, concentrating
55
# on the security features.
66

7-
set -eou pipefail
7+
#set -eou pipefail
88
IFS=$'\n\t'
99

1010
# Setting up some colors for helping read the demo output.
@@ -170,6 +170,22 @@ quadlet() {
170170
clear
171171
}
172172

173+
multi-modal() {
174+
echo_color "Serve smolvlm via RamaLama model service"
175+
exec_color "ramalama serve --port 8080 --pull=never --name multi-modal -d smolvlm"
176+
echo ""
177+
178+
echo_color "Use web browser to show interaction"
179+
exec_color "google-chrome docs/demo/camera-demo.html"
180+
181+
echo_color "Stop the ramalama container"
182+
exec_color "ramalama stop multi-modal "
183+
echo ""
184+
185+
read -r -p "--> clear"
186+
clear
187+
}
188+
173189
setup
174190

175191
version
@@ -184,5 +200,7 @@ kubernetes
184200

185201
quadlet
186202

203+
multi-modal
204+
187205
echo_color "End of Demo"
188206
echo "Thank you!"

0 commit comments

Comments
 (0)