Skip to content

Commit 4f620f0

Browse files
edlee123CopilotZePan110chensuyueMSCetin37
authored
Bump: ChatQnA Example with OpenAI-Compatible Endpoint (#2091)
Signed-off-by: Ed Lee <[email protected]> Signed-off-by: ZePan110 <[email protected]> Signed-off-by: chensuyue <[email protected]> Signed-off-by: Mustafa <[email protected]> Signed-off-by: Yi Yao <[email protected]> Signed-off-by: Yongbozzz <[email protected]> Signed-off-by: CICD-at-OPEA <[email protected]> Signed-off-by: Ezequiel Lanza <[email protected]> Signed-off-by: Wang, Xigui <[email protected]> Co-authored-by: Copilot <[email protected]> Co-authored-by: ZePan110 <[email protected]> Co-authored-by: chen, suyue <[email protected]> Co-authored-by: Mustafa <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Yi Yao <[email protected]> Co-authored-by: Zhenzhong Xu <[email protected]> Co-authored-by: Zhu Yongbo <[email protected]> Co-authored-by: CICD-at-OPEA <[email protected]> Co-authored-by: Eze Lanza (Eze) <[email protected]> Co-authored-by: xiguiw <[email protected]> Co-authored-by: Yao Qing <[email protected]>
1 parent 9f5b851 commit 4f620f0

File tree

3 files changed

+698
-20
lines changed

3 files changed

+698
-20
lines changed

ChatQnA/chatqna.py

Lines changed: 72 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33

44
import argparse
55
import json
6+
import logging
67
import os
78
import re
89

9-
from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType
10+
from comps import CustomLogger, MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType
1011
from comps.cores.mega.utils import handle_message
1112
from comps.cores.proto.api_protocol import (
1213
ChatCompletionRequest,
@@ -20,6 +21,10 @@
2021
from fastapi.responses import StreamingResponse
2122
from langchain_core.prompts import PromptTemplate
2223

24+
logger = CustomLogger(__name__)
25+
log_level = logging.DEBUG if os.getenv("LOGFLAG", "").lower() == "true" else logging.INFO
26+
logging.basicConfig(level=log_level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
27+
2328

2429
class ChatTemplate:
2530
@staticmethod
@@ -62,6 +67,10 @@ def generate_rag_prompt(question, documents):
6267

6368

6469
def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs):
70+
logger.debug(
71+
f"Aligning inputs for service: {self.services[cur_node].name}, type: {self.services[cur_node].service_type}"
72+
)
73+
6574
if self.services[cur_node].service_type == ServiceType.EMBEDDING:
6675
inputs["inputs"] = inputs["text"]
6776
del inputs["text"]
@@ -83,6 +92,9 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
8392
# next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
8493
next_inputs["temperature"] = inputs["temperature"]
8594
inputs = next_inputs
95+
96+
# Log the aligned inputs (be careful with sensitive data)
97+
logger.debug(f"Aligned inputs for {self.services[cur_node].name}: {type(inputs)}")
8698
return inputs
8799

88100

@@ -123,7 +135,9 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di
123135
elif input_variables == ["question"]:
124136
prompt = prompt_template.format(question=data["initial_query"])
125137
else:
126-
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
138+
logger.warning(
139+
f"{prompt_template} not used, we only support 2 input variables ['question', 'context']"
140+
)
127141
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
128142
else:
129143
prompt = ChatTemplate.generate_rag_prompt(data["initial_query"], docs)
@@ -152,7 +166,7 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di
152166
elif input_variables == ["question"]:
153167
prompt = prompt_template.format(question=prompt)
154168
else:
155-
print(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
169+
logger.warning(f"{prompt_template} not used, we only support 2 input variables ['question', 'context']")
156170
prompt = ChatTemplate.generate_rag_prompt(prompt, reranked_docs)
157171
else:
158172
prompt = ChatTemplate.generate_rag_prompt(prompt, reranked_docs)
@@ -171,27 +185,65 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di
171185

172186

173187
def align_generator(self, gen, **kwargs):
174-
# OpenAI response format
175-
# b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n'
188+
"""Aligns the generator output to match ChatQnA's format of sending bytes.
189+
190+
Handles different LLM output formats (TGI, OpenAI) and properly filters
191+
empty or null content chunks to avoid UI display issues.
192+
"""
193+
# OpenAI response format example:
194+
# b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct",
195+
# "system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},
196+
# "logprobs":null,"finish_reason":null}]}\n\n'
197+
176198
for line in gen:
177-
line = line.decode("utf-8")
178-
chunks = [chunk.strip() for chunk in line.split("\n\n") if chunk.strip()]
179-
for line in chunks:
199+
try:
200+
line = line.decode("utf-8")
180201
start = line.find("{")
181202
end = line.rfind("}") + 1
203+
204+
# Skip lines with invalid JSON structure
205+
if start == -1 or end <= start:
206+
logger.debug("Skipping line with invalid JSON structure")
207+
continue
208+
182209
json_str = line[start:end]
183-
try:
184-
# sometimes yield empty chunk, do a fallback here
185-
json_data = json.loads(json_str)
186-
if "ops" in json_data and "op" in json_data["ops"][0]:
187-
if "value" in json_data["ops"][0] and isinstance(json_data["ops"][0]["value"], str):
188-
yield f"data: {repr(json_data['ops'][0]['value'].encode('utf-8'))}\n\n"
189-
else:
190-
pass
191-
elif "content" in json_data["choices"][0]["delta"]:
192-
yield f"data: {repr(json_data['choices'][0]['delta']['content'].encode('utf-8'))}\n\n"
193-
except Exception as e:
194-
yield f"data: {repr(json_str.encode('utf-8'))}\n\n"
210+
211+
# Parse the JSON data
212+
json_data = json.loads(json_str)
213+
214+
# Handle TGI format responses
215+
if "ops" in json_data and "op" in json_data["ops"][0]:
216+
if "value" in json_data["ops"][0] and isinstance(json_data["ops"][0]["value"], str):
217+
yield f"data: {repr(json_data['ops'][0]['value'].encode('utf-8'))}\n\n"
218+
# Empty value chunks are silently skipped
219+
220+
# Handle OpenAI format responses
221+
elif "choices" in json_data and len(json_data["choices"]) > 0:
222+
# Only yield content if it exists and is not null
223+
if (
224+
"delta" in json_data["choices"][0]
225+
and "content" in json_data["choices"][0]["delta"]
226+
and json_data["choices"][0]["delta"]["content"] is not None
227+
):
228+
content = json_data["choices"][0]["delta"]["content"]
229+
yield f"data: {repr(content.encode('utf-8'))}\n\n"
230+
# Null content chunks are silently skipped
231+
elif (
232+
"delta" in json_data["choices"][0]
233+
and "content" in json_data["choices"][0]["delta"]
234+
and json_data["choices"][0]["delta"]["content"] is None
235+
):
236+
logger.debug("Skipping null content chunk")
237+
238+
except json.JSONDecodeError as e:
239+
# Log the error with the problematic JSON string for better debugging
240+
logger.error(f"JSON parsing error in align_generator: {e}\nProblematic JSON: {json_str[:200]}")
241+
# Skip sending invalid JSON to avoid UI issues
242+
continue
243+
except Exception as e:
244+
logger.error(f"Unexpected error in align_generator: {e}, line snippet: {line[:100]}...")
245+
# Skip sending to avoid UI issues
246+
continue
195247
yield "data: [DONE]\n\n"
196248

197249

0 commit comments

Comments
 (0)