microsoft · Hoder-zyf · Jun 13, 2025 · Jun 13, 2025 · Jun 13, 2025 · Jun 13, 2025
diff --git a/.env.example b/.env.example
@@ -7,25 +7,56 @@ For more information about configuration options, please refer to the documentat
 
 """
 
+# ==========================================
 # Global configs:
 USE_AZURE=False
 CHAT_USE_AZURE_TOKEN_PROVIDER=False
 EMBEDDING_USE_AZURE_TOKEN_PROVIDER=False
 MAX_RETRY=10
 RETRY_WAIT_SECONDS=20
-
-# LLM API Setting:
-OPENAI_API_KEY=<your_api_key>
-CHAT_MODEL=gpt-4-turbo
-CHAT_MAX_TOKENS=3000
-CHAT_TEMPERATURE=0.7
+# ==========================================
+
+
+# ==========================================
+# Backend Configuration
+# ==========================================
+BACKEND=rdagent.oai.backend.LiteLLMAPIBackend
+# ==========================================
+
+# ==========================================
+# Backend Configuration (choose one)
+# ==========================================
+
+# 1. Set universal API key
+# CHAT_MODEL="gpt-4o"
+# EMBEDDING_MODEL="text-embedding-3-small"
+# OPENAI_API_BASE="https://your-endpoint.com/v1"
+# OPENAI_API_KEY="sk-your-api-key-here"
+
+# 2. Set separate API KEY
+# Chat configuration
+OPENAI_API_KEY="sk-chat-key"
+OPENAI_API_BASE="https://xxx-litellm.com/v1"
+CHAT_MODEL='gpt-4o'
+
+# Embedding configuration (using other service)
+# Use siliconflow as example, pay attention to the litellm_proxy prefix
+LITELLM_PROXY_API_KEY="sk-embedding-service-key"
+LITELLM_PROXY_API_BASE="https://api.siliconflow.cn/v1"
+EMBEDDING_MODEL="litellm_proxy/BAAI/bge-large-en-v1.5"
+# ==========================================
+
+# ==========================================
+# Other Configuration 
+# ==========================================
 # CHAT_AZURE_API_BASE=<for_Azure_user>
 # CHAT_AZURE_API_VERSION=<for_Azure_user>
 
-EMBEDDING_MODEL=text-embedding-3-small
 # EMBEDDING_AZURE_API_BASE=<for_Azure_user>
 # EMBEDDING_AZURE_API_VERSION=<for_Azure_user>
 
 # Cache Setting (Optional):
-
-# Senario Configs:
+# USE_CHAT_CACHE=True
+# USE_EMBEDDING_CACHE=True
+# Senario Configs:
+# ==========================================
diff --git a/README.md b/README.md
@@ -153,16 +153,40 @@ Ensure the current user can run Docker commands **without using sudo**. You can
 
   You can set your Chat Model and Embedding Model in the following ways:
 
-- **Using LiteLLM (Recommended)**: We now support LiteLLM as a backend for integration with multiple LLM providers. You can configure as follows:
+- **Using LiteLLM (Recommended)**: We now support LiteLLM as a backend for integration with multiple LLM providers. You can configure in two ways:
+
+  **Option 1: Separate API bases for Chat and Embedding models**
+  ```bash
+  cat << EOF  > .env
+  BACKEND=rdagent.oai.backend.LiteLLMAPIBackend
+  # Set to any model supported by LiteLLM.
+  # Configure separate API bases for chat and embedding
+
+  # CHAT MODEL:
+  CHAT_MODEL=gpt-4o 
+  OPENAI_API_BASE=<your_chat_api_base>
+  OPENAI_API_KEY=<replace_with_your_openai_api_key>
+
+  # EMBEDDING MODEL:
+  # TAKE siliconflow as an example, you can use other providers.
+  # Note: embedding requires litellm_proxy prefix
+  EMBEDDING_MODEL=litellm_proxy/BAAI/bge-large-en-v1.5
+  LITELLM_PROXY_API_KEY=<replace_with_your_siliconflow_api_key>
+  LITELLM_PROXY_API_BASE=https://api.siliconflow.cn/v1
+  ```
+
+  **Option 2: Unified API base for both models**
   ```bash
   cat << EOF  > .env
   BACKEND=rdagent.oai.backend.LiteLLMAPIBackend
   # Set to any model supported by LiteLLM.
   CHAT_MODEL=gpt-4o 
   EMBEDDING_MODEL=text-embedding-3-small
-  # Then configure the environment variables required by your chosen model in the convention of LiteLLM here.
+  # Configure unified API base
+  OPENAI_API_BASE=<your_unified_api_base>
   OPENAI_API_KEY=<replace_with_your_openai_api_key>
   ```
+
   Notice: If you are using reasoning models that include thought processes in their responses (such as \<think> tags), you need to set the following environment variable:
   ```bash
   REASONING_THINK_RM=True

diff --git a/docs/installation_and_configuration.rst b/docs/installation_and_configuration.rst
@@ -18,15 +18,40 @@ LiteLLM Backend Configuration
 
 Please create a `.env` file in the root directory of the project and add environment variables.
 
-Here is a sample configuration for using OpenAI's gpt-4o via LiteLLM. 
+We now support LiteLLM as a backend for integration with multiple LLM providers. You can configure in two ways:
+
+Option 1: Separate API bases for Chat and Embedding models
+----------------------------------------------------------
+
+   .. code-block:: Properties
+
+      BACKEND=rdagent.oai.backend.LiteLLMAPIBackend
+      # Set to any model supported by LiteLLM.
+
+      # CHAT MODEL:
+      CHAT_MODEL=gpt-4o 
+      OPENAI_API_BASE=<your_chat_api_base>
+      OPENAI_API_KEY=<replace_with_your_openai_api_key>
+
+      # EMBEDDING MODEL:
+      # TAKE siliconflow as an example, you can use other providers.
+      # Note: embedding requires litellm_proxy prefix
+      EMBEDDING_MODEL=litellm_proxy/BAAI/bge-large-en-v1.5
+      LITELLM_PROXY_API_KEY=<replace_with_your_siliconflow_api_key>
+      LITELLM_PROXY_API_BASE=https://api.siliconflow.cn/v1
+
+Option 2: Unified API base for both models
+-------------------------------------------
 
    .. code-block:: Properties
 
       BACKEND=rdagent.oai.backend.LiteLLMAPIBackend
-      # It can be modified to any model supported by LiteLLM.
-      CHAT_MODEL=gpt-4o
+      # Set to any model supported by LiteLLM.
+      CHAT_MODEL=gpt-4o 
       EMBEDDING_MODEL=text-embedding-3-small
+      # Configure unified API base
       # The backend api_key fully follows the convention of litellm.
+      OPENAI_API_BASE=<your_unified_api_base>
       OPENAI_API_KEY=<replace_with_your_openai_api_key>
 
 Necessary parameters include:
@@ -37,6 +62,14 @@ Necessary parameters include:
 
 - `EMBEDDING_MODEL`: The model name of the embedding model.
 
+- `OPENAI_API_BASE`: The base URL of the API, which is used for both chat and embedding models if `EMBEDDING_MODEL` does not start with `litellm_proxy/`, else used for `CHAT_MODEL` only.
+
+- `LITELLM_PROXY_API_KEY`: The API key of the API, which is used for embedding models if `EMBEDDING_MODEL` starts with `litellm_proxy/`.
+
+- `LITELLM_PROXY_API_BASE`: The base URL of the API, which is used for embedding models if `EMBEDDING_MODEL` starts with `litellm_proxy/`.
+
+
+
 The `CHAT_MODEL` and `EMBEDDING_MODEL` parameters will be passed into LiteLLM's completion function. 
 
 Therefore, when utilizing models provided by different providers, first review the interface configuration of LiteLLM. The model names must match those allowed by LiteLLM.

diff --git a/test/oai/test_embedding_and_similarity.py b/test/oai/test_embedding_and_similarity.py
@@ -24,6 +24,7 @@ def test_embedding_similarity(self) -> None:
         assert similarity is not None
         assert isinstance(similarity, float)
         min_similarity_threshold = 0.8
+        print(f"similarity: {similarity}")
         assert similarity >= min_similarity_threshold