Skip to content

Commit 6502f9a

Browse files
committed
embeds mode logic
1 parent 4334a8c commit 6502f9a

File tree

1 file changed

+195
-53
lines changed

1 file changed

+195
-53
lines changed

src/components/dictionary/Dictionary.tsx

Lines changed: 195 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -331,59 +331,173 @@ const Dictionary: React.FC = () => {
331331
let translations: string[] = [];
332332
let headLang: string = srcOverride;
333333
let translationLang: string = tgtOverride;
334+
let exactMatchFound = false;
334335

335336
if (exactForward) {
336337
headerTerm = exactForward.head.replace(/<[^>]+>/g, '').trim();
337338
translations = exactForward.defs.map((d) => d.replace(/<[^>]+>/g, '').trim());
338339
headLang = srcOverride;
339340
translationLang = tgtOverride;
341+
exactMatchFound = true;
340342
} else if (exactReverse) {
341343
headerTerm = exactReverse.head.replace(/<[^>]+>/g, '').trim();
342344
translations = exactReverse.defs.map((d) => d.replace(/<[^>]+>/g, '').trim());
343345
headLang = tgtOverride;
344346
translationLang = srcOverride;
345-
} else {
346-
setEmbeddingResults([]);
347-
setLoading(false);
348-
searchRef.current = null;
349-
return;
347+
exactMatchFound = true;
350348
}
351349

352-
const termsForEmbedding = Array.from(
353-
new Set([headerTerm, ...translations].filter(Boolean)),
354-
) as string[];
355-
356-
const embeddingMode = await chooseEmbeddingMode(srcOverride, tgtOverride);
357-
if (!embeddingMode) {
358-
setEmbeddingResults([]);
350+
if (!exactMatchFound) {
351+
const modesSet = await loadEmbeddingModes();
352+
const forwardMode = `${srcOverride}|${tgtOverride}`;
353+
const reverseMode = `${tgtOverride}|${srcOverride}`;
354+
const availableModes: string[] = [];
355+
if (modesSet.has(forwardMode)) availableModes.push(forwardMode);
356+
if (modesSet.has(reverseMode)) availableModes.push(reverseMode);
357+
if (availableModes.length === 0) {
358+
setEmbeddingResults([]);
359+
} else {
360+
type Job = { term: string; embeddingMode: string; termLang: string };
361+
const jobs: Job[] = availableModes.map((mode) => ({
362+
term: rawWord,
363+
embeddingMode: mode,
364+
termLang: srcOverride,
365+
}));
366+
const jobResults: Array<{
367+
term: string;
368+
embeddingMode: string;
369+
sims: string[];
370+
termLang: string;
371+
}> = [];
372+
await Promise.all(
373+
jobs.map(async (job) => {
374+
const simsSet = new Set<string>();
375+
const [, embReq] = apyFetch('embeddings', {
376+
q: job.term,
377+
langpair: job.embeddingMode,
378+
});
379+
try {
380+
const embRes = await embReq;
381+
const sims: string[] =
382+
embRes.data.responseData?.embeddingResults?.flatMap((obj: any) =>
383+
Object.values(obj).flat(),
384+
) || [];
385+
sims.forEach((s) => {
386+
if (!s.startsWith('*')) simsSet.add(s);
387+
});
388+
} catch {}
389+
jobResults.push({
390+
term: job.term,
391+
embeddingMode: job.embeddingMode,
392+
sims: Array.from(simsSet),
393+
termLang: job.termLang,
394+
});
395+
}),
396+
);
397+
const simsByMode: Record<string, string[]> = {};
398+
jobResults.forEach(({ embeddingMode, sims }) => {
399+
if (!simsByMode[embeddingMode]) simsByMode[embeddingMode] = [];
400+
sims.forEach((s) => {
401+
if (!simsByMode[embeddingMode].includes(s)) simsByMode[embeddingMode].push(s);
402+
});
403+
});
404+
const bilsearchParsed: Record<string, Record<string, Entry[]>> = {};
405+
await Promise.all(
406+
Object.entries(simsByMode).map(async ([mode, sims]) => {
407+
bilsearchParsed[mode] = {};
408+
await Promise.all(
409+
sims.map(async (sim) => {
410+
const [, bsReq] = apyFetch('bilsearch', { q: sim, langpair: mode });
411+
try {
412+
const resp = await bsReq;
413+
const raw = resp.data.responseData?.searchResults ?? [];
414+
const parsed = (raw as Array<Record<string, string[]>>).flatMap((o) =>
415+
Object.entries(o).map(([hd, defs]) => ({ head: hd, defs })),
416+
);
417+
bilsearchParsed[mode][sim] = parsed;
418+
} catch {
419+
bilsearchParsed[mode][sim] = [];
420+
}
421+
}),
422+
);
423+
}),
424+
);
425+
const embEntries: Entry[] = [];
426+
jobResults.forEach(({ embeddingMode, sims, termLang, term }) => {
427+
const [embedSourceLang] = embeddingMode.split('|');
428+
const isReverseEmbeddingMode = embeddingMode === `${tgtOverride}|${srcOverride}`;
429+
const displaySimilarTo = term;
430+
sims.forEach((sim) => {
431+
const parsed = bilsearchParsed[embeddingMode]?.[sim] || [];
432+
parsed.forEach(({ head: bilHead, defs }) => {
433+
defs.forEach((def) => {
434+
if (isReverseEmbeddingMode) {
435+
embEntries.push({
436+
head: def,
437+
defs: [bilHead],
438+
similarTo: displaySimilarTo,
439+
} as Entry);
440+
} else {
441+
embEntries.push({
442+
head: bilHead,
443+
defs: [def],
444+
similarTo: displaySimilarTo,
445+
} as Entry);
446+
}
447+
});
448+
});
449+
});
450+
});
451+
setEmbeddingResults(embEntries);
452+
}
359453
} else {
360-
const [embedSourceLang] = embeddingMode.split('|');
454+
const modesSet = await loadEmbeddingModes();
361455
const termLang: Record<string, string> = {};
362456
if (headerTerm) termLang[headerTerm] = headLang;
363457
translations.forEach((tr) => {
364458
termLang[tr] = translationLang;
365459
});
366-
367-
const getEquivalentInLang = (term: string, fromLang: string, toLang: string): string => {
368-
if (fromLang === toLang) return term;
369-
if (headerTerm) {
370-
if (fromLang === headLang && toLang === translationLang) {
371-
return translations[0] || term;
372-
}
373-
if (fromLang === translationLang && toLang === headLang) {
374-
return headerTerm;
375-
}
460+
type Job = { term: string; termLang: string; embeddingMode: string };
461+
const jobsMap = new Map<string, Job>();
462+
const forwardMode = `${srcOverride}|${tgtOverride}`;
463+
const reverseMode = `${tgtOverride}|${srcOverride}`;
464+
if (headerTerm) {
465+
if (termLang[headerTerm] === srcOverride && modesSet.has(forwardMode)) {
466+
const key = `${headerTerm}|${forwardMode}`;
467+
jobsMap.set(key, {
468+
term: headerTerm,
469+
termLang: termLang[headerTerm],
470+
embeddingMode: forwardMode,
471+
});
376472
}
377-
return term;
378-
};
379-
380-
const termToSims: Record<string, string[]> = {};
473+
if (termLang[headerTerm] === tgtOverride && modesSet.has(reverseMode)) {
474+
const key = `${headerTerm}|${reverseMode}`;
475+
jobsMap.set(key, {
476+
term: headerTerm,
477+
termLang: termLang[headerTerm],
478+
embeddingMode: reverseMode,
479+
});
480+
}
481+
}
482+
translations.forEach((tr) => {
483+
if (termLang[tr] === srcOverride && modesSet.has(forwardMode)) {
484+
const key = `${tr}|${forwardMode}`;
485+
jobsMap.set(key, { term: tr, termLang: termLang[tr], embeddingMode: forwardMode });
486+
}
487+
if (termLang[tr] === tgtOverride && modesSet.has(reverseMode)) {
488+
const key = `${tr}|${reverseMode}`;
489+
jobsMap.set(key, { term: tr, termLang: termLang[tr], embeddingMode: reverseMode });
490+
}
491+
});
492+
const jobs = Array.from(jobsMap.values());
493+
const jobResults: Array<{ term: string; embeddingMode: string; sims: string[]; termLang: string }> =
494+
[];
381495
await Promise.all(
382-
termsForEmbedding.map(async (term) => {
496+
jobs.map(async (job) => {
383497
const simsSet = new Set<string>();
384498
const [, embReq] = apyFetch('embeddings', {
385-
q: term,
386-
langpair: embeddingMode,
499+
q: job.term,
500+
langpair: job.embeddingMode,
387501
});
388502
try {
389503
const embRes = await embReq;
@@ -395,33 +509,61 @@ const Dictionary: React.FC = () => {
395509
if (!s.startsWith('*')) simsSet.add(s);
396510
});
397511
} catch {}
398-
termToSims[term] = Array.from(simsSet);
512+
jobResults.push({
513+
term: job.term,
514+
embeddingMode: job.embeddingMode,
515+
sims: Array.from(simsSet),
516+
termLang: job.termLang,
517+
});
399518
}),
400519
);
401-
402-
const uniqueSims = Array.from(new Set(Object.values(termToSims).flat())).filter(Boolean);
403-
const bilsearchLangpair = embeddingMode;
404-
const isReverseEmbeddingMode = embeddingMode === `${tgtOverride}|${srcOverride}`;
405-
406-
const bilsearchResponses = await Promise.all(
407-
uniqueSims.map((sim) => apyFetch('bilsearch', { q: sim, langpair: bilsearchLangpair })[1]),
408-
);
409-
410-
const simToParsed: Record<string, Entry[]> = {};
411-
bilsearchResponses.forEach((resp, i) => {
412-
const sim = uniqueSims[i];
413-
const raw = resp.data.responseData?.searchResults ?? [];
414-
simToParsed[sim] = (raw as Array<Record<string, string[]>>).flatMap((o) =>
415-
Object.entries(o).map(([hd, defs]) => ({ head: hd, defs })),
416-
);
520+
const simsByMode: Record<string, string[]> = {};
521+
jobResults.forEach(({ embeddingMode, sims }) => {
522+
if (!simsByMode[embeddingMode]) simsByMode[embeddingMode] = [];
523+
sims.forEach((s) => {
524+
if (!simsByMode[embeddingMode].includes(s)) simsByMode[embeddingMode].push(s);
525+
});
417526
});
418-
527+
const bilsearchParsed: Record<string, Record<string, Entry[]>> = {};
528+
await Promise.all(
529+
Object.entries(simsByMode).map(async ([mode, sims]) => {
530+
bilsearchParsed[mode] = {};
531+
await Promise.all(
532+
sims.map(async (sim) => {
533+
const [, bsReq] = apyFetch('bilsearch', { q: sim, langpair: mode });
534+
try {
535+
const resp = await bsReq;
536+
const raw = resp.data.responseData?.searchResults ?? [];
537+
const parsed = (raw as Array<Record<string, string[]>>).flatMap((o) =>
538+
Object.entries(o).map(([hd, defs]) => ({ head: hd, defs })),
539+
);
540+
bilsearchParsed[mode][sim] = parsed;
541+
} catch {
542+
bilsearchParsed[mode][sim] = [];
543+
}
544+
}),
545+
);
546+
}),
547+
);
419548
const embEntries: Entry[] = [];
420-
Object.entries(termToSims).forEach(([originalTerm, sims]) => {
421-
const fromLang = termLang[originalTerm] || '';
422-
const displaySimilarTo = getEquivalentInLang(originalTerm, fromLang, embedSourceLang);
549+
jobResults.forEach(({ embeddingMode, sims, termLang, term }) => {
550+
const [embedSourceLang] = embeddingMode.split('|');
551+
const isReverseEmbeddingMode = embeddingMode === `${tgtOverride}|${srcOverride}`;
552+
const getEquivalentInLang = (orig: string, fromLang: string, toLang: string): string => {
553+
if (fromLang === toLang) return orig;
554+
if (headerTerm) {
555+
if (fromLang === headLang && toLang === translationLang) {
556+
return translations[0] || orig;
557+
}
558+
if (fromLang === translationLang && toLang === headLang) {
559+
return headerTerm;
560+
}
561+
}
562+
return orig;
563+
};
564+
const displaySimilarTo = getEquivalentInLang(term, termLang, embedSourceLang);
423565
sims.forEach((sim) => {
424-
const parsed = simToParsed[sim] || [];
566+
const parsed = bilsearchParsed[embeddingMode]?.[sim] || [];
425567
parsed.forEach(({ head: bilHead, defs }) => {
426568
defs.forEach((def) => {
427569
if (isReverseEmbeddingMode) {
@@ -450,7 +592,7 @@ const Dictionary: React.FC = () => {
450592
searchRef.current = null;
451593
}
452594
},
453-
[apyFetch, searchWord, srcLang, tgtLang, chooseEmbeddingMode],
595+
[apyFetch, searchWord, srcLang, tgtLang, chooseEmbeddingMode, loadEmbeddingModes],
454596
);
455597

456598
const grouped: Record<string, Entry[]> = React.useMemo(() => {

0 commit comments

Comments
 (0)