33
44import argparse
55import json
6+ import logging
67import os
78import re
89
9- from comps import MegaServiceEndpoint , MicroService , ServiceOrchestrator , ServiceRoleType , ServiceType
10+ from comps import CustomLogger , MegaServiceEndpoint , MicroService , ServiceOrchestrator , ServiceRoleType , ServiceType
1011from comps .cores .mega .utils import handle_message
1112from comps .cores .proto .api_protocol import (
1213 ChatCompletionRequest ,
2021from fastapi .responses import StreamingResponse
2122from langchain_core .prompts import PromptTemplate
2223
24+ logger = CustomLogger (__name__ )
25+ log_level = logging .DEBUG if os .getenv ("LOGFLAG" , "" ).lower () == "true" else logging .INFO
26+ logging .basicConfig (level = log_level , format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" )
27+
2328
2429class ChatTemplate :
2530 @staticmethod
@@ -62,6 +67,10 @@ def generate_rag_prompt(question, documents):
6267
6368
6469def align_inputs (self , inputs , cur_node , runtime_graph , llm_parameters_dict , ** kwargs ):
70+ logger .debug (
71+ f"Aligning inputs for service: { self .services [cur_node ].name } , type: { self .services [cur_node ].service_type } "
72+ )
73+
6574 if self .services [cur_node ].service_type == ServiceType .EMBEDDING :
6675 inputs ["inputs" ] = inputs ["text" ]
6776 del inputs ["text" ]
@@ -83,6 +92,9 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
8392 # next_inputs["repetition_penalty"] = inputs["repetition_penalty"]
8493 next_inputs ["temperature" ] = inputs ["temperature" ]
8594 inputs = next_inputs
95+
96+ # Log the aligned inputs (be careful with sensitive data)
97+ logger .debug (f"Aligned inputs for { self .services [cur_node ].name } : { type (inputs )} " )
8698 return inputs
8799
88100
@@ -123,7 +135,9 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di
123135 elif input_variables == ["question" ]:
124136 prompt = prompt_template .format (question = data ["initial_query" ])
125137 else :
126- print (f"{ prompt_template } not used, we only support 2 input variables ['question', 'context']" )
138+ logger .warning (
139+ f"{ prompt_template } not used, we only support 2 input variables ['question', 'context']"
140+ )
127141 prompt = ChatTemplate .generate_rag_prompt (data ["initial_query" ], docs )
128142 else :
129143 prompt = ChatTemplate .generate_rag_prompt (data ["initial_query" ], docs )
@@ -152,7 +166,7 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di
152166 elif input_variables == ["question" ]:
153167 prompt = prompt_template .format (question = prompt )
154168 else :
155- print (f"{ prompt_template } not used, we only support 2 input variables ['question', 'context']" )
169+ logger . warning (f"{ prompt_template } not used, we only support 2 input variables ['question', 'context']" )
156170 prompt = ChatTemplate .generate_rag_prompt (prompt , reranked_docs )
157171 else :
158172 prompt = ChatTemplate .generate_rag_prompt (prompt , reranked_docs )
@@ -171,27 +185,65 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di
171185
172186
173187def align_generator (self , gen , ** kwargs ):
174- # OpenAI response format
175- # b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct","system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},"logprobs":null,"finish_reason":null}]}\n\n'
188+ """Aligns the generator output to match ChatQnA's format of sending bytes.
189+
190+ Handles different LLM output formats (TGI, OpenAI) and properly filters
191+ empty or null content chunks to avoid UI display issues.
192+ """
193+ # OpenAI response format example:
194+ # b'data:{"id":"","object":"text_completion","created":1725530204,"model":"meta-llama/Meta-Llama-3-8B-Instruct",
195+ # "system_fingerprint":"2.0.1-native","choices":[{"index":0,"delta":{"role":"assistant","content":"?"},
196+ # "logprobs":null,"finish_reason":null}]}\n\n'
197+
176198 for line in gen :
177- line = line .decode ("utf-8" )
178- chunks = [chunk .strip () for chunk in line .split ("\n \n " ) if chunk .strip ()]
179- for line in chunks :
199+ try :
200+ line = line .decode ("utf-8" )
180201 start = line .find ("{" )
181202 end = line .rfind ("}" ) + 1
203+
204+ # Skip lines with invalid JSON structure
205+ if start == - 1 or end <= start :
206+ logger .debug ("Skipping line with invalid JSON structure" )
207+ continue
208+
182209 json_str = line [start :end ]
183- try :
184- # sometimes yield empty chunk, do a fallback here
185- json_data = json .loads (json_str )
186- if "ops" in json_data and "op" in json_data ["ops" ][0 ]:
187- if "value" in json_data ["ops" ][0 ] and isinstance (json_data ["ops" ][0 ]["value" ], str ):
188- yield f"data: { repr (json_data ['ops' ][0 ]['value' ].encode ('utf-8' ))} \n \n "
189- else :
190- pass
191- elif "content" in json_data ["choices" ][0 ]["delta" ]:
192- yield f"data: { repr (json_data ['choices' ][0 ]['delta' ]['content' ].encode ('utf-8' ))} \n \n "
193- except Exception as e :
194- yield f"data: { repr (json_str .encode ('utf-8' ))} \n \n "
210+
211+ # Parse the JSON data
212+ json_data = json .loads (json_str )
213+
214+ # Handle TGI format responses
215+ if "ops" in json_data and "op" in json_data ["ops" ][0 ]:
216+ if "value" in json_data ["ops" ][0 ] and isinstance (json_data ["ops" ][0 ]["value" ], str ):
217+ yield f"data: { repr (json_data ['ops' ][0 ]['value' ].encode ('utf-8' ))} \n \n "
218+ # Empty value chunks are silently skipped
219+
220+ # Handle OpenAI format responses
221+ elif "choices" in json_data and len (json_data ["choices" ]) > 0 :
222+ # Only yield content if it exists and is not null
223+ if (
224+ "delta" in json_data ["choices" ][0 ]
225+ and "content" in json_data ["choices" ][0 ]["delta" ]
226+ and json_data ["choices" ][0 ]["delta" ]["content" ] is not None
227+ ):
228+ content = json_data ["choices" ][0 ]["delta" ]["content" ]
229+ yield f"data: { repr (content .encode ('utf-8' ))} \n \n "
230+ # Null content chunks are silently skipped
231+ elif (
232+ "delta" in json_data ["choices" ][0 ]
233+ and "content" in json_data ["choices" ][0 ]["delta" ]
234+ and json_data ["choices" ][0 ]["delta" ]["content" ] is None
235+ ):
236+ logger .debug ("Skipping null content chunk" )
237+
238+ except json .JSONDecodeError as e :
239+ # Log the error with the problematic JSON string for better debugging
240+ logger .error (f"JSON parsing error in align_generator: { e } \n Problematic JSON: { json_str [:200 ]} " )
241+ # Skip sending invalid JSON to avoid UI issues
242+ continue
243+ except Exception as e :
244+ logger .error (f"Unexpected error in align_generator: { e } , line snippet: { line [:100 ]} ..." )
245+ # Skip sending to avoid UI issues
246+ continue
195247 yield "data: [DONE]\n \n "
196248
197249
0 commit comments