Skip to content

Commit 127b9c5

Browse files
committed
feat: implement /api/ps endpoint
Fixes #2605 Signed-off-by: Jeff MAURY <[email protected]>
1 parent 4d45a13 commit 127b9c5

File tree

3 files changed

+146
-0
lines changed

3 files changed

+146
-0
lines changed

api/openapi.yaml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,21 @@ paths:
162162
schema:
163163
$ref: '#/components/schemas/ChatResponse'
164164

165+
/api/ps:
166+
get:
167+
operationId: getRunningModels
168+
tags:
169+
- models
170+
description: List running models
171+
summary: List running models
172+
responses:
173+
'200':
174+
description: The list of running models was successfully fetched
175+
content:
176+
application/json:
177+
schema:
178+
$ref: '#/components/schemas/ProcessResponse'
179+
165180
components:
166181
schemas:
167182
ListResponse:
@@ -191,6 +206,35 @@ components:
191206
details:
192207
$ref: '#/components/schemas/ModelDetails'
193208

209+
ProcessResponse:
210+
type: object
211+
description: Response with a list of running models
212+
properties:
213+
models:
214+
type: array
215+
items:
216+
$ref: '#/components/schemas/ProcessModelResponse'
217+
218+
ProcessModelResponse:
219+
type: object
220+
description: Running model description
221+
properties:
222+
name:
223+
type: string
224+
model:
225+
type: string
226+
size:
227+
type: integer
228+
digest:
229+
type: string
230+
details:
231+
$ref: '#/components/schemas/ModelDetails'
232+
expires_at:
233+
type: string
234+
format: date-time
235+
size_vram:
236+
type: integer
237+
194238
ModelDetails:
195239
type: object
196240
description: Details about a model

packages/backend/src/managers/apiServer.spec.ts

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -793,3 +793,75 @@ describe.each([undefined, true, false])('stream is %o', stream => {
793793
});
794794
});
795795
});
796+
797+
describe('/api/ps', () => {
798+
test('returns an error if the model is not known', async () => {
799+
expect(server.getListener()).toBeDefined();
800+
vi.mocked(inferenceManager.getServers).mockImplementation(() => {
801+
throw new Error('model unknown');
802+
});
803+
const res = await request(server.getListener()!).get('/api/ps').expect(500);
804+
expect(res.body).toMatchObject({ message: 'unable to ps' });
805+
});
806+
807+
test('returns empty result if no servers', async () => {
808+
expect(server.getListener()).toBeDefined();
809+
vi.mocked(inferenceManager.getServers).mockReturnValue([]);
810+
const res = await request(server.getListener()!).get('/api/ps').expect(200);
811+
expect(res.body).toEqual({ models: [] });
812+
});
813+
814+
test('returns empty result if server is stopped', async () => {
815+
expect(server.getListener()).toBeDefined();
816+
vi.mocked(inferenceManager.getServers).mockReturnValue([
817+
{
818+
models: [
819+
{
820+
id: 'modelId1',
821+
name: 'model-name',
822+
description: 'model 1',
823+
},
824+
],
825+
container: {
826+
engineId: 'engine1',
827+
containerId: 'container1',
828+
},
829+
status: 'stopped',
830+
} as unknown as InferenceServer,
831+
]);
832+
const res = await request(server.getListener()!).get('/api/ps').expect(200);
833+
expect(res.body).toEqual({ models: [] });
834+
});
835+
836+
test('returns result if server is started', async () => {
837+
expect(server.getListener()).toBeDefined();
838+
vi.mocked(inferenceManager.getServers).mockReturnValue([
839+
{
840+
models: [
841+
{
842+
id: 'modelId1',
843+
name: 'model-name',
844+
description: 'model 1',
845+
memory: 1_000_000,
846+
},
847+
],
848+
container: {
849+
engineId: 'engine1',
850+
containerId: 'container1',
851+
},
852+
status: 'running',
853+
} as unknown as InferenceServer,
854+
]);
855+
const res = await request(server.getListener()!).get('/api/ps').expect(200);
856+
expect(res.body).toEqual({
857+
models: [
858+
{
859+
name: 'model-name',
860+
model: 'model-name',
861+
size: 1_000_000,
862+
digest: 'b48fa42fa5b28c4363747ec0797532e274650f73004383a3054697137d9d1f30',
863+
},
864+
],
865+
});
866+
});
867+
});

packages/backend/src/managers/apiServer.ts

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ import OpenAI from 'openai';
4141
import type { ChatCompletionMessageParam } from 'openai/resources';
4242
import type { ContainerRegistry } from '../registries/ContainerRegistry';
4343
import type { Stream } from 'openai/streaming';
44+
import crypto from 'node:crypto';
4445

4546
const SHOW_API_INFO_COMMAND = 'ai-lab.show-api-info';
4647
const SHOW_API_ERROR_COMMAND = 'ai-lab.show-api-error';
@@ -49,6 +50,7 @@ export const PREFERENCE_RANDOM_PORT = 0;
4950

5051
type ListModelResponse = components['schemas']['ListModelResponse'];
5152
type Message = components['schemas']['Message'];
53+
type ProcessModelResponse = components['schemas']['ProcessModelResponse'];
5254

5355
function asListModelResponse(model: ModelInfo): ListModelResponse {
5456
return {
@@ -61,6 +63,20 @@ function asListModelResponse(model: ModelInfo): ListModelResponse {
6163
};
6264
}
6365

66+
// ollama expect at least 12 characters for the digest
67+
function toDigest(name: string, sha256?: string): string {
68+
return sha256 ?? crypto.createHash('sha256').update(name).digest('hex');
69+
}
70+
71+
function asProcessModelResponse(model: ModelInfo): ProcessModelResponse {
72+
return {
73+
name: model.name,
74+
model: model.name,
75+
size: model.memory,
76+
digest: toDigest(model.name, model.sha256),
77+
};
78+
}
79+
6480
const LISTENING_ADDRESS = '127.0.0.1';
6581

6682
interface ChatCompletionOptions {
@@ -124,6 +140,7 @@ export class ApiServer implements Disposable {
124140
router.post('/show', this.show.bind(this));
125141
router.post('/generate', this.generate.bind(this));
126142
router.post('/chat', this.chat.bind(this));
143+
router.get('/ps', this.ps.bind(this));
127144
app.get('/', (_res, res) => res.sendStatus(200)); //required for the ollama client to work against us
128145
app.use('/api', router);
129146
app.use('/spec', this.getSpec.bind(this));
@@ -574,4 +591,17 @@ export class ApiServer implements Disposable {
574591
})
575592
.catch((err: unknown) => console.error(`unable to check if the inference server is running: ${err}`));
576593
}
594+
595+
ps(_req: Request, res: Response): void {
596+
try {
597+
const models = this.inferenceManager
598+
.getServers()
599+
.filter(server => server.status === 'running')
600+
.flatMap(server => server.models)
601+
.map(model => asProcessModelResponse(model));
602+
res.status(200).json({ models });
603+
} catch (err: unknown) {
604+
this.doErr(res, 'unable to ps', err);
605+
}
606+
}
577607
}

0 commit comments

Comments
 (0)