Skip to content

Commit 11d5d76

Browse files
authored
🔧 fix: Consolidate Text Parsing and TTS Edge Initialization (danny-avila#6582)
* 🔧 fix: Update useTextToSpeechExternal to include loading state and improve text parsing logic * fix: update msedge-tts and prevent excessive initialization attempts * fix: Refactor text parsing logic in mongoMeili model to use parseTextParts function
1 parent a2dd533 commit 11d5d76

File tree

6 files changed

+57
-29
lines changed

6 files changed

+57
-29
lines changed

‎api/models/plugins/mongoMeili.js

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
const _ = require('lodash');
22
const mongoose = require('mongoose');
33
const { MeiliSearch } = require('meilisearch');
4+
const { parseTextParts, ContentTypes } = require('librechat-data-provider');
45
const { cleanUpPrimaryKeyValue } = require('~/lib/utils/misc');
56
const logger = require('~/config/meiliLogger');
67

@@ -238,10 +239,7 @@ const createMeiliMongooseModel = function ({ index, attributesToIndex }) {
238239
}
239240

240241
if (object.content && Array.isArray(object.content)) {
241-
object.text = object.content
242-
.filter((item) => item.type === 'text' && item.text && item.text.value)
243-
.map((item) => item.text.value)
244-
.join(' ');
242+
object.text = parseTextParts(object.content);
245243
delete object.content;
246244
}
247245

‎client/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@
7373
"lodash": "^4.17.21",
7474
"lucide-react": "^0.394.0",
7575
"match-sorter": "^6.3.4",
76-
"msedge-tts": "^1.3.4",
76+
"msedge-tts": "^2.0.0",
7777
"qrcode.react": "^4.2.0",
7878
"rc-input-number": "^7.4.2",
7979
"react": "^18.2.0",

‎client/src/hooks/Input/useTextToSpeechEdge.ts

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ function useTextToSpeechEdge({
2626
const sourceBufferRef = useRef<SourceBuffer | null>(null);
2727
const pendingBuffers = useRef<Uint8Array[]>([]);
2828
const { showToast } = useToastContext();
29+
const initAttempts = useRef(0);
2930

3031
const isBrowserSupported = useMemo(
3132
() => typeof MediaSource !== 'undefined' && MediaSource.isTypeSupported('audio/mpeg'),
@@ -57,14 +58,20 @@ function useTextToSpeechEdge({
5758

5859
const initializeTTS = useCallback(() => {
5960
if (!ttsRef.current) {
60-
ttsRef.current = new MsEdgeTTS();
61+
ttsRef.current = new MsEdgeTTS({
62+
enableLogger: true,
63+
});
6164
}
6265
const availableVoice: VoiceOption | undefined = voices.find((v) => v.value === voiceName);
6366

6467
if (availableVoice) {
68+
if (initAttempts.current > 3) {
69+
return;
70+
}
6571
ttsRef.current
66-
.setMetadata(availableVoice.value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3)
72+
.setMetadata(availableVoice.value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3, {})
6773
.catch((error) => {
74+
initAttempts.current += 1;
6875
console.error('Error initializing TTS:', error);
6976
showToast({
7077
message: localize('com_nav_tts_init_error', { 0: (error as Error).message }),
@@ -73,8 +80,9 @@ function useTextToSpeechEdge({
7380
});
7481
} else if (voices.length > 0) {
7582
ttsRef.current
76-
.setMetadata(voices[0].value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3)
83+
.setMetadata(voices[0].value, OUTPUT_FORMAT.AUDIO_24KHZ_48KBITRATE_MONO_MP3, {})
7784
.catch((error) => {
85+
initAttempts.current += 1;
7886
console.error('Error initializing TTS:', error);
7987
showToast({
8088
message: localize('com_nav_tts_init_error', { 0: (error as Error).message }),
@@ -147,7 +155,8 @@ function useTextToSpeechEdge({
147155
setIsSpeaking(true);
148156
pendingBuffers.current = [];
149157

150-
const readable = ttsRef.current.toStream(text);
158+
const result = await ttsRef.current.toStream(text);
159+
const readable = result.audioStream;
151160

152161
readable.on('data', (chunk: Buffer) => {
153162
pendingBuffers.current.push(new Uint8Array(chunk));

‎client/src/hooks/Input/useTextToSpeechExternal.ts

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,10 @@ function useTextToSpeechExternal({
6767
return playPromise().catch(console.error);
6868
}
6969
console.error(error);
70-
showToast({ message: localize('com_nav_audio_play_error', { 0: error.message }), status: 'error' });
70+
showToast({
71+
message: localize('com_nav_audio_play_error', { 0: error.message }),
72+
status: 'error',
73+
});
7174
});
7275

7376
newAudio.onended = () => {
@@ -87,7 +90,7 @@ function useTextToSpeechExternal({
8790
setDownloadFile(false);
8891
};
8992

90-
const { mutate: processAudio } = useTextToSpeechMutation({
93+
const { mutate: processAudio, isLoading } = useTextToSpeechMutation({
9194
onMutate: (variables) => {
9295
const inputText = (variables.get('input') ?? '') as string;
9396
if (inputText.length >= 4096) {
@@ -182,7 +185,7 @@ function useTextToSpeechExternal({
182185

183186
useEffect(() => cancelPromiseSpeech, [cancelPromiseSpeech]);
184187

185-
const isLoading = useMemo(
188+
const isFetching = useMemo(
186189
() => isLast && globalIsFetching && !globalIsPlaying,
187190
[globalIsFetching, globalIsPlaying, isLast],
188191
);
@@ -192,7 +195,7 @@ function useTextToSpeechExternal({
192195
return {
193196
generateSpeechExternal,
194197
cancelSpeech,
195-
isLoading,
198+
isLoading: isFetching || isLoading,
196199
audioRef,
197200
voices: voicesData,
198201
};

‎package-lock.json

Lines changed: 20 additions & 16 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎packages/data-provider/src/parsers.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,9 +375,23 @@ export function parseTextParts(contentParts: a.TMessageContentParts[]): string {
375375
let result = '';
376376

377377
for (const part of contentParts) {
378+
if (!part.type) {
379+
continue;
380+
}
378381
if (part.type === ContentTypes.TEXT) {
379382
const textValue = typeof part.text === 'string' ? part.text : part.text.value;
380383

384+
if (
385+
result.length > 0 &&
386+
textValue.length > 0 &&
387+
result[result.length - 1] !== ' ' &&
388+
textValue[0] !== ' '
389+
) {
390+
result += ' ';
391+
}
392+
result += textValue;
393+
} else if (part.type === ContentTypes.THINK) {
394+
const textValue = typeof part.think === 'string' ? part.think : '';
381395
if (
382396
result.length > 0 &&
383397
textValue.length > 0 &&

0 commit comments

Comments
 (0)