🐛 fix: implement the chat

arvinxx · arvinxx · commit a6f479191ef6 · 2024-09-30T23:10:05.000+08:00
diff --git a/package.json b/package.json
@@ -236,6 +236,7 @@
   "devDependencies": {
     "@commitlint/cli": "^19.4.0",
     "@edge-runtime/vm": "^4.0.2",
+    "@huggingface/tasks": "^0.12.12",
     "@lobehub/i18n-cli": "^1.19.1",
     "@lobehub/lint": "^1.24.4",
     "@lobehub/seo-cli": "^1.4.2",
diff --git a/src/app/api/chat/agentRuntime.ts b/src/app/api/chat/agentRuntime.ts
@@ -225,6 +225,16 @@ const getLlmOptionsFromPayload = (provider: string, payload: JWTPayload) => {
 
       return { apiKey, baseURL };
     }
+
+    case ModelProvider.HuggingFace: {
+      const { HUGGINGFACE_PROXY_URL, HUGGINGFACE_API_KEY } = getLLMConfig();
+
+      const apiKey = apiKeyManager.pick(payload?.apiKey || HUGGINGFACE_API_KEY);
+      const baseURL = payload?.endpoint || HUGGINGFACE_PROXY_URL;
+
+      return { apiKey, baseURL };
+    }
+
     case ModelProvider.Upstage: {
       const { UPSTAGE_API_KEY } = getLLMConfig();
 
diff --git a/src/config/llm.ts b/src/config/llm.ts
@@ -125,6 +125,10 @@ export const getLLMConfig = () => {
       ENABLED_HUNYUAN: z.boolean(),
       HUNYUAN_API_KEY: z.string().optional(),
       HUNYUAN_MODEL_LIST: z.string().optional(),
+
+      ENABLED_HUGGINGFACE: z.boolean(),
+      HUGGINGFACE_API_KEY: z.string().optional(),
+      HUGGINGFACE_PROXY_URL: z.string().optional(),
     },
     runtimeEnv: {
       API_KEY_SELECT_MODE: process.env.API_KEY_SELECT_MODE,
@@ -247,6 +251,10 @@ export const getLLMConfig = () => {
       ENABLED_HUNYUAN: !!process.env.HUNYUAN_API_KEY,
       HUNYUAN_API_KEY: process.env.HUNYUAN_API_KEY,
       HUNYUAN_MODEL_LIST: process.env.HUNYUAN_MODEL_LIST,
+
+      ENABLED_HUGGINGFACE: !!process.env.HUGGINGFACE_API_KEY,
+      HUGGINGFACE_API_KEY: process.env.HUGGINGFACE_API_KEY,
+      HUGGINGFACE_PROXY_URL: process.env.HUGGINGFACE_PROXY_URL,
     },
   });
 };
diff --git a/src/config/modelProviders/huggingface.ts b/src/config/modelProviders/huggingface.ts
@@ -2,7 +2,10 @@ import { ModelProviderCard } from '@/types/llm';
 
 // ref https://cloud.tencent.com/document/product/1729/104753
 const HuggingFace: ModelProviderCard = {
-  chatModels: [],
+  chatModels: [
+    { id: 'google/gemma-2-2b-it' },
+    { enabled: true, id: 'mistralai/Mistral-7B-Instruct-v0.2' },
+  ],
   checkModel: 'meta-llama/Llama-3.1-8B-Instruct',
   description:
     'HuggingFace Inference API 提供了一种快速且免费的方式，让您可以探索成千上万种模型，适用于各种任务。无论您是在为新应用程序进行原型设计，还是在尝试机器学习的功能，这个 API 都能让您即时访问多个领域的高性能模型。',
diff --git a/src/libs/agent-runtime/AgentRuntime.ts b/src/libs/agent-runtime/AgentRuntime.ts
@@ -135,7 +135,7 @@ class AgentRuntime {
       github: Partial<ClientOptions>;
       google: { apiKey?: string; baseURL?: string };
       groq: Partial<ClientOptions>;
-      huggingface: { apiKey?: string };
+      huggingface: { apiKey?: string; baseURL?: string };
       hunyuan: Partial<ClientOptions>;
       minimax: Partial<ClientOptions>;
       mistral: Partial<ClientOptions>;
diff --git a/src/libs/agent-runtime/huggingface/index.ts b/src/libs/agent-runtime/huggingface/index.ts
@@ -1,5 +1,4 @@
 import { HfInference } from '@huggingface/inference';
-import { HuggingFaceStream, StreamingTextResponse } from 'ai';
 
 import {
   AgentRuntimeError,
@@ -8,43 +7,49 @@ import {
   ChatStreamPayload,
   LobeRuntimeAI,
 } from '@/libs/agent-runtime';
+import { OpenAIStream } from '@/libs/agent-runtime/utils/streams';
 
 import { debugStream } from '../utils/debugStream';
+import { StreamingResponse } from '../utils/response';
+import { HuggingfaceResultToStream } from '../utils/streams/huggingface';
 
 export class LobeHuggingFaceAI implements LobeRuntimeAI {
   private client: HfInference;
   baseURL?: string;
 
-  constructor({ apiKey }: { apiKey?: string } = {}) {
+  constructor({ apiKey, baseURL }: { apiKey?: string; baseURL?: string } = {}) {
     if (!apiKey) throw AgentRuntimeError.createError(AgentRuntimeErrorType.InvalidProviderAPIKey);
 
     this.client = new HfInference(apiKey);
+
+    if (baseURL) {
+      this.client.endpoint(baseURL);
+    }
   }
 
   async chat(payload: ChatStreamPayload, options?: ChatCompetitionOptions) {
     try {
-      const hfStream = this.client.textGenerationStream({
-        inputs: payload.messages,
+      const hfRes = this.client.chatCompletionStream({
+        messages: payload.messages,
         model: payload.model,
-        parameters: {
-          temperature: payload.temperature,
-          top_p: payload.top_p,
-        },
+
         stream: true,
+        temperature: payload.temperature,
+        top_p: payload.top_p,
       });
 
+      const rawStream = HuggingfaceResultToStream(hfRes);
       // Convert the response into a friendly text-stream
-
-      const stream = HuggingFaceStream(hfStream, options?.callback);
-
-      const [debug, output] = stream.tee();
+      const [debug, output] = rawStream.tee();
 
       if (process.env.DEBUG_HUGGINGFACE_CHAT_COMPLETION === '1') {
         debugStream(debug).catch(console.error);
       }
 
+      const stream = OpenAIStream(output, options?.callback);
+
       // Respond with the stream
-      return new StreamingTextResponse(output, { headers: options?.headers });
+      return StreamingResponse(stream, { headers: options?.headers });
     } catch (e) {
       const err = e as Error;
 
diff --git a/src/libs/agent-runtime/utils/streams/huggingface.ts b/src/libs/agent-runtime/utils/streams/huggingface.ts
@@ -0,0 +1,48 @@
+import { ChatCompletionStreamOutput } from '@huggingface/tasks';
+import { readableFromAsyncIterable } from 'ai';
+
+import { ChatStreamCallbacks } from '@/libs/agent-runtime';
+import { nanoid } from '@/utils/uuid';
+
+import { ChatResp } from '../../wenxin/type';
+import {
+  StreamProtocolChunk,
+  StreamStack,
+  chatStreamable,
+  createCallbacksTransformer,
+  createSSEProtocolTransformer,
+} from './protocol';
+
+const transformHuggingfaceStream = (chunk: ChatResp): StreamProtocolChunk => {
+  console.log(chunk);
+  const finished = chunk.is_end;
+  if (finished) {
+    return { data: chunk.finish_reason || 'stop', id: chunk.id, type: 'stop' };
+  }
+
+  if (chunk.result) {
+    return { data: chunk.result, id: chunk.id, type: 'text' };
+  }
+
+  return {
+    data: chunk,
+    id: chunk.id,
+    type: 'data',
+  };
+};
+
+export const HuggingfaceResultToStream = (stream: AsyncIterable<ChatCompletionStreamOutput>) => {
+  // make the response to the streamable format
+  return readableFromAsyncIterable(chatStreamable(stream));
+};
+
+export const HuggingFaceStream = (
+  rawStream: ReadableStream<ChatResp>,
+  callbacks?: ChatStreamCallbacks,
+) => {
+  const streamStack: StreamStack = { id: 'chat_' + nanoid() };
+
+  return rawStream
+    .pipeThrough(createSSEProtocolTransformer(transformHuggingfaceStream, streamStack))
+    .pipeThrough(createCallbacksTransformer(callbacks));
+};
diff --git a/src/libs/agent-runtime/utils/streams/protocol.ts b/src/libs/agent-runtime/utils/streams/protocol.ts
@@ -38,6 +38,7 @@ export const generateToolCallId = (index: number, functionName?: string) =>
 
 export const chatStreamable = async function* <T>(stream: AsyncIterable<T>) {
   for await (const response of stream) {
+    console.log(response);
     yield response;
   }
 };

Original file line number	Diff line number	Diff line change
`@@ -38,6 +38,7 @@ export const generateToolCallId = (index: number, functionName?: string) =>`
`38`	`38`
`39`	`39`	`export const chatStreamable = async function* <T>(stream: AsyncIterable<T>) {`
`40`	`40`	`for await (const response of stream) {`
	`41`	`+ console.log(response);`
`41`	`42`	`yield response;`
`42`	`43`	`}`
`43`	`44`	`};`