✨ feat: support anthropic with vision

arvinxx · arvinxx · commit 5af03f62aa4c · 2024-03-05T02:22:48.000+08:00
diff --git a/package.json b/package.json
@@ -74,7 +74,7 @@
   },
   "dependencies": {
     "@ant-design/icons": "^5",
-    "@anthropic-ai/sdk": "^0.14.1",
+    "@anthropic-ai/sdk": "^0.16.0",
     "@auth/core": "latest",
     "@aws-sdk/client-bedrock-runtime": "^3.503.1",
     "@azure/openai": "^1.0.0-beta.11",
diff --git a/src/app/api/chat/anthropic/route.test.ts b/src/app/api/chat/anthropic/route.test.ts
@@ -0,0 +1,28 @@
+// @vitest-environment edge-runtime
+import { describe, expect, it, vi } from 'vitest';
+
+import { POST as UniverseRoute } from '../[provider]/route';
+import { POST, preferredRegion, runtime } from './route';
+
+// 模拟 '../[provider]/route'
+vi.mock('../[provider]/route', () => ({
+  POST: vi.fn().mockResolvedValue('mocked response'),
+}));
+
+describe('Configuration tests', () => {
+  it('should have runtime set to "edge"', () => {
+    expect(runtime).toBe('edge');
+  });
+
+  it('should contain specific regions in preferredRegion', () => {
+    expect(preferredRegion).not.contain(['hk1']);
+  });
+});
+
+describe('Google POST function tests', () => {
+  it('should call UniverseRoute with correct parameters', async () => {
+    const mockRequest = new Request('https://example.com', { method: 'POST' });
+    await POST(mockRequest);
+    expect(UniverseRoute).toHaveBeenCalledWith(mockRequest, { params: { provider: 'google' } });
+  });
+});
diff --git a/src/app/api/chat/anthropic/route.ts b/src/app/api/chat/anthropic/route.ts
@@ -0,0 +1,33 @@
+import { POST as UniverseRoute } from '../[provider]/route';
+
+// due to the Chinese region does not support accessing Google
+// we need to use proxy to access it
+// refs: https://github.com/google/generative-ai-js/issues/29#issuecomment-1866246513
+// if (process.env.HTTP_PROXY_URL) {
+//   const { setGlobalDispatcher, ProxyAgent } = require('undici');
+//
+//   console.log(process.env.HTTP_PROXY_URL)
+//   setGlobalDispatcher(new ProxyAgent({ uri: process.env.HTTP_PROXY_URL }));
+// }
+
+// but undici only can be used in NodeJS
+// so if you want to use with proxy, you need comment the code below
+export const runtime = 'edge';
+
+export const preferredRegion = [
+  'bom1',
+  'cle1',
+  'cpt1',
+  'gru1',
+  'hnd1',
+  'iad1',
+  'icn1',
+  'kix1',
+  'pdx1',
+  'sfo1',
+  'sin1',
+  'syd1',
+];
+
+export const POST = async (req: Request) =>
+  UniverseRoute(req, { params: { provider: 'anthropic' } });
diff --git a/src/config/modelProviders/anthropic.ts b/src/config/modelProviders/anthropic.ts
@@ -3,20 +3,52 @@ import { ModelProviderCard } from '@/types/llm';
 const Anthropic: ModelProviderCard = {
   chatModels: [
     {
-      displayName: 'Claude Instant 1.2',
-      id: 'claude-instant-1.2',
-      tokens: 100_000,
+      description:
+        'Ideal balance of intelligence and speed for enterprise workloads. Maximum utility at a lower price, dependable, balanced for scaled deployments',
+      displayName: 'Claude 3 Sonnet',
+      id: 'claude-3-sonnet-20240229',
+      maxOutput: 4096,
+      tokens: 200_000,
+      vision: true,
     },
     {
-      displayName: 'Claude 2.0',
-      id: 'claude-2.0',
-      tokens: 100_000,
+      description:
+        'Most powerful model for highly complex tasks. Top-level performance, intelligence, fluency, and understanding',
+      displayName: 'Claude 3 Opus',
+      id: 'claude-3-opus-20240229',
+      maxOutput: 4096,
+      tokens: 200_000,
+      vision: true,
+    },
+    {
+      description:
+        'Fastest and most compact model for near-instant responsiveness. Quick and accurate targeted performance',
+      displayName: 'Claude 3 Haiku',
+      hidden: true,
+      id: 'claude-3-haiku-20240229',
+      maxOutput: 4096,
+      tokens: 200_000,
+      vision: true,
     },
     {
       displayName: 'Claude 2.1',
       id: 'claude-2.1',
+      maxOutput: 4096,
       tokens: 200_000,
     },
+    {
+      displayName: 'Claude 2.0',
+      id: 'claude-2.0',
+      maxOutput: 4096,
+      tokens: 100_000,
+    },
+    {
+      displayName: 'Claude Instant 1.2',
+      hidden: true,
+      id: 'claude-instant-1.2',
+      maxOutput: 4096,
+      tokens: 100_000,
+    },
   ],
   id: 'anthropic',
 };
diff --git a/src/libs/agent-runtime/anthropic/index.ts b/src/libs/agent-runtime/anthropic/index.ts
@@ -1,13 +1,22 @@
+// sort-imports-ignore
+import '@anthropic-ai/sdk/shims/web';
 import Anthropic from '@anthropic-ai/sdk';
 import { AnthropicStream, StreamingTextResponse } from 'ai';
 import { ClientOptions } from 'openai';
 
 import { LobeRuntimeAI } from '../BaseAI';
 import { AgentRuntimeErrorType } from '../error';
-import { ChatCompetitionOptions, ChatStreamPayload, ModelProvider } from '../types';
+import {
+  ChatCompetitionOptions,
+  ChatStreamPayload,
+  ModelProvider,
+  OpenAIChatMessage,
+  UserMessageContentPart,
+} from '../types';
 import { AgentRuntimeError } from '../utils/createError';
 import { debugStream } from '../utils/debugStream';
 import { handleOpenAIError } from '../utils/handleOpenAIError';
+import { parseDataUri } from '../utils/uriParser';
 
 export class LobeAnthropicAI implements LobeRuntimeAI {
   private client: Anthropic;
@@ -18,14 +27,31 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
     this.client = new Anthropic({ apiKey });
   }
 
+  private buildAnthropicMessages = (
+    messages: OpenAIChatMessage[],
+  ): Anthropic.Messages.MessageParam[] =>
+    messages.map((message) => this.convertToAnthropicMessage(message));
+
+  private convertToAnthropicMessage = (
+    message: OpenAIChatMessage,
+  ): Anthropic.Messages.MessageParam => {
+    const content = message.content as string | UserMessageContentPart[];
+
+    return {
+      content:
+        typeof content === 'string' ? content : content.map((c) => this.convertToAnthropicBlock(c)),
+      role: message.role === 'function' || message.role === 'system' ? 'assistant' : message.role,
+    };
+  };
+
   async chat(payload: ChatStreamPayload, options?: ChatCompetitionOptions) {
     const { messages, model, max_tokens, temperature, top_p } = payload;
     const system_message = messages.find((m) => m.role === 'system');
     const user_messages = messages.filter((m) => m.role !== 'system');
 
-    const requestPramas: Anthropic.MessageCreateParams = {
+    const requestParams: Anthropic.MessageCreateParams = {
       max_tokens: max_tokens || 1024,
-      messages: user_messages as Anthropic.Messages.MessageParam[],
+      messages: this.buildAnthropicMessages(user_messages),
       model: model,
       stream: true,
       system: system_message?.content as string,
@@ -34,7 +60,7 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
     };
 
     try {
-      const response = await this.client.messages.create(requestPramas);
+      const response = await this.client.messages.create(requestParams);
       const [prod, debug] = response.tee();
 
       if (process.env.DEBUG_ANTHROPIC_CHAT_COMPLETION === '1') {
@@ -78,6 +104,29 @@ export class LobeAnthropicAI implements LobeRuntimeAI {
       });
     }
   }
+
+  private convertToAnthropicBlock(
+    content: UserMessageContentPart,
+  ): Anthropic.ContentBlock | Anthropic.ImageBlockParam {
+    switch (content.type) {
+      case 'text': {
+        return content;
+      }
+
+      case 'image_url': {
+        const { mimeType, base64 } = parseDataUri(content.image_url.url);
+
+        return {
+          source: {
+            data: base64 as string,
+            media_type: mimeType as Anthropic.ImageBlockParam.Source['media_type'],
+            type: 'base64',
+          },
+          type: 'image',
+        };
+      }
+    }
+  }
 }
 
 export default LobeAnthropicAI;
diff --git a/src/libs/agent-runtime/types/chat.ts b/src/libs/agent-runtime/types/chat.ts
@@ -6,7 +6,8 @@ interface UserMessageContentPartText {
   text: string;
   type: 'text';
 }
-interface UserMessageContentPartImage {
+
+export interface UserMessageContentPartImage {
   image_url: {
     detail?: 'auto' | 'low' | 'high';
     url: string;
diff --git a/src/services/file.ts b/src/services/file.ts
@@ -34,8 +34,7 @@ class FileService {
       }))();
 
     // 压缩图片
-    const fileType = 'image/webp';
-    const base64String = compressImage({ img, type: fileType });
+    const base64String = compressImage({ img, type: file.fileType });
     const binaryString = atob(base64String.split('base64,')[1]);
     const uint8Array = Uint8Array.from(binaryString, (char) => char.charCodeAt(0));
     file.data = uint8Array.buffer;
diff --git a/src/types/llm.ts b/src/types/llm.ts
@@ -19,6 +19,7 @@ export interface ChatModelCard {
    * whether model is legacy (deprecated but not removed yet)
    */
   legacy?: boolean;
+  maxOutput?: number;
   tokens?: number;
   /**
    *  whether model supports vision