smallcloudai
diff --git a/‎Dockerfile
Lines changed: 0 additions & 9 deletions b/‎Dockerfile
Lines changed: 0 additions & 9 deletions
diff --git a/‎refact_webgui/webgui/selfhost_fastapi_completions.py
Lines changed: 9 additions & 71 deletions b/‎refact_webgui/webgui/selfhost_fastapi_completions.py
Lines changed: 9 additions & 71 deletions
diff --git a/‎refact_webgui/webgui/selfhost_login.py
Lines changed: 0 additions & 3 deletions b/‎refact_webgui/webgui/selfhost_login.py
Lines changed: 0 additions & 3 deletions
diff --git a/‎refact_webgui/webgui/selfhost_lsp_proxy.py
Lines changed: 0 additions & 67 deletions b/‎refact_webgui/webgui/selfhost_lsp_proxy.py
Lines changed: 0 additions & 67 deletions
diff --git a/‎refact_webgui/webgui/selfhost_plugins.py
Lines changed: 0 additions & 1 deletion b/‎refact_webgui/webgui/selfhost_plugins.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎refact_webgui/webgui/selfhost_static.py
Lines changed: 0 additions & 8 deletions b/‎refact_webgui/webgui/selfhost_static.py
Lines changed: 0 additions & 8 deletions
@@ -32,15 +32,6 @@ RUN curl https://downloads.apache.org/cassandra/KEYS | apt-key add -
 RUN apt-get update
 RUN apt-get install cassandra -y
 
-# refact lsp requisites
-RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y
-ENV PATH="${PATH}:/root/.cargo/bin"
-RUN git clone https://github.com/smallcloudai/refact-lsp.git /tmp/refact-lsp
-RUN echo "refact-lsp $(git -C /tmp/refact-lsp rev-parse HEAD)" >> /refact-build-info.txt
-RUN cd /tmp/refact-lsp \
-    && cargo install --path . \
-    && rm -rf /tmp/refact-lsp
-
 # to ping hf
 RUN apt-get install -y iputils-ping
 
 
@@ -207,7 +207,6 @@ def __init__(self,
         self.add_api_route("/v1/embeddings", self._embeddings_style_openai, methods=["POST"])
         self.add_api_route("/v1/chat/completions", self._chat_completions, methods=["POST"])
 
-        self.add_api_route("/v1/models", self._models, methods=["GET"])
         self.add_api_route("/tokenizer/{model_name}", self._tokenizer, methods=["GET"])
 
         self._inference_queue = inference_queue
@@ -483,35 +482,6 @@ async def _embeddings_style_openai(self, post: EmbeddingsStyleOpenAI, authorizat
             "usage": {"prompt_tokens": -1, "total_tokens": -1}
         }
 
-    async def _models(self, authorization: str = Header(None)):
-        await self._account_from_bearer(authorization)
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.get("http://127.0.0.1:8001/v1/caps") as resp:
-                    lsp_server_caps = await resp.json()
-        except aiohttp.ClientConnectorError as e:
-            err_msg = f"LSP server is not ready yet: {e}"
-            log(err_msg)
-            raise HTTPException(status_code=401, detail=err_msg)
-        completion_models = set()
-        for model, caps in lsp_server_caps["code_completion_models"].items():
-            completion_models.update({model, *caps["similar_models"]})
-        chat_models = set()
-        for model, caps in lsp_server_caps["code_chat_models"].items():
-            chat_models.update({model, *caps["similar_models"]})
-        data = [
-            {
-                "id": model, "root": model, "object": "model",
-                "created": 0, "owned_by": "", "permission": [], "parent": None,
-                "completion": model in completion_models, "chat": model in chat_models,
-            }
-            for model in lsp_server_caps["running_models"]
-        ]
-        return {
-            "object": "list",
-            "data": data,
-        }
-
     async def _chat_completions(self, post: ChatContext, authorization: str = Header(None)):
         def compose_usage_dict(model_dict, prompt_tokens_n, generated_tokens_n) -> Dict[str, Any]:
             usage_dict = dict()
@@ -543,6 +513,7 @@ def _wrap_output(output: str) -> str:
             return prefix + output + postfix
 
         model_dict = self._model_assigner.models_db_with_passthrough.get(post.model, {})
+        assert model_dict.get('backend') == 'litellm'
 
         async def litellm_streamer():
             generated_tokens_n = 0
@@ -613,47 +584,14 @@ async def litellm_non_streamer():
                 log(err_msg)
                 yield json.dumps(_patch_caps_version({"error": err_msg}))
 
-        async def chat_completion_streamer():
-            post_url = "http://127.0.0.1:8001/v1/chat"
-            payload = {
-                "messages": messages,
-                "stream": True,
-                "model": post.model,
-                "parameters": {
-                    "temperature": post.temperature,
-                    "max_new_tokens": post.actual_max_tokens,
-                }
-            }
-            async with aiohttp.ClientSession() as session:
-                try:
-                    async with session.post(post_url, json=payload) as response:
-                        finish_reason = None
-                        async for data, _ in response.content.iter_chunks():
-                            try:
-                                data = data.decode("utf-8")
-                                data = json.loads(data[len(prefix):-len(postfix)])
-                                finish_reason = data["choices"][0]["finish_reason"]
-                                data["choices"][0]["finish_reason"] = None
-                            except json.JSONDecodeError:
-                                data = {"choices": [{"finish_reason": finish_reason}]}
-                            yield _wrap_output(json.dumps(_patch_caps_version(data)))
-                except aiohttp.ClientConnectorError as e:
-                    err_msg = f"LSP server is not ready yet: {e}"
-                    log(err_msg)
-                    yield _wrap_output(json.dumps(_patch_caps_version({"error": err_msg})))
-
-        if model_dict.get('backend') == 'litellm':
-            model_name = model_dict.get('resolve_as', post.model)
-            if model_name not in litellm.model_list:
-                log(f"warning: requested model {model_name} is not in the litellm.model_list (this might not be the issue for some providers)")
-            log(f"chat/completions: model resolve {post.model} -> {model_name}")
-            prompt_tokens_n = litellm.token_counter(model_name, messages=messages)
-            if post.tools:
-                prompt_tokens_n += litellm.token_counter(model_name, text=json.dumps(post.tools))
-            response_streamer = litellm_streamer() if post.stream else litellm_non_streamer()
-        else:
-            # TODO: unused refact-lsp logic, remove ASAP
-            response_streamer = chat_completion_streamer()
+        model_name = model_dict.get('resolve_as', post.model)
+        if model_name not in litellm.model_list:
+            log(f"warning: requested model {model_name} is not in the litellm.model_list (this might not be the issue for some providers)")
+        log(f"chat/completions: model resolve {post.model} -> {model_name}")
+        prompt_tokens_n = litellm.token_counter(model_name, messages=messages)
+        if post.tools:
+            prompt_tokens_n += litellm.token_counter(model_name, text=json.dumps(post.tools))
+        response_streamer = litellm_streamer() if post.stream else litellm_non_streamer()
 
         return StreamingResponse(response_streamer, media_type="text/event-stream")
 
 
@@ -110,9 +110,6 @@ def header_authenticate(self, authorization: str) -> str:
         if len(bearer_hdr) != 2 or bearer_hdr[0] != "Bearer":
             raise ValueError("Invalid authorization header")
         api_key = bearer_hdr[1]
-        # TODO: this is a hack for chat handler, wee need to pass real user's api key
-        if api_key == "refact-dummy-chat-key":
-            return "refact-chat"
         if self._token == api_key:
             return "user"
         raise ValueError("API key mismatch")
 
@@ -10,7 +10,6 @@ def __init__(self):
             {"label": "Stats", "tab": "stats"},
             {"label": "Projects", "tab": "upload"},
             {"label": "Finetune", "tab": "finetune"},
-            # {"label": "Chat", "tab": "chat"},
             {"label": "Credentials", "tab": "settings", "hamburger": True},
             {"label": "Server Logs", "tab": "server-logs", "hamburger": True},
             {"label": "About", "tab": "about", "hamburger": True},
 
@@ -14,7 +14,6 @@ class StaticRouter(APIRouter):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         super().add_api_route("/", self._index, methods=["GET"])
-        super().add_api_route("/chat", self._chat, methods=["GET"])
         super().add_api_route("/{file_path:path}", self._static_file, methods=["GET"])
         super().add_api_route("/ping", self._ping_handler, methods=["GET"])
         self.static_folders = [
@@ -29,13 +28,6 @@ async def _index(self):
                 return FileResponse(fn, media_type="text/html")
         raise HTTPException(404, "No index.html found")
 
-    async def _chat(self):
-        for spath in self.static_folders:
-            fn = os.path.join(spath, "tab-chat.html")
-            if os.path.exists(fn):
-                return FileResponse(fn, media_type="text/html")
-        raise HTTPException(404, "No tab-chat.html found")
-
     async def _static_file(self, file_path: str):
         for spath in self.static_folders:
             try: