Skip to content

Commit fb47c53

Browse files
committed
feat(server) clear semantic search metadata
1 parent d9e8ce8 commit fb47c53

File tree

1 file changed

+52
-9
lines changed

1 file changed

+52
-9
lines changed

packages/backend/server/src/plugins/copilot/tools/doc-semantic-search.ts

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { tool } from 'ai';
2+
import { omit } from 'lodash-es';
23
import { z } from 'zod';
34

45
import type { AccessController } from '../../../core/permission';
@@ -8,6 +9,29 @@ import type { ContextSession } from '../context/session';
89
import type { CopilotChatOptions } from '../providers';
910
import { toolError } from './error';
1011

12+
const FILTER_PREFIX = [
13+
'Title: ',
14+
'Created at: ',
15+
'Updated at: ',
16+
'Created by: ',
17+
'Updated by: ',
18+
];
19+
20+
function clearEmbeddingChunk(chunk: ChunkSimilarity): ChunkSimilarity {
21+
if (chunk.content) {
22+
const lines = chunk.content.split('\n');
23+
let maxLines = 5;
24+
while (maxLines > 0 && lines.length > 0) {
25+
if (FILTER_PREFIX.some(prefix => lines[0].startsWith(prefix))) {
26+
lines.shift();
27+
maxLines--;
28+
}
29+
}
30+
return { ...chunk, content: lines.join('\n') };
31+
}
32+
return chunk;
33+
}
34+
1135
export const buildDocSearchGetter = (
1236
ac: AccessController,
1337
context: CopilotContextService,
@@ -47,18 +71,37 @@ export const buildDocSearchGetter = (
4771
if (!docChunks.length && !fileChunks.length)
4872
return `No results found for "${query}".`;
4973

74+
const docIds = docChunks.map(c => ({
75+
// oxlint-disable-next-line no-non-null-assertion
76+
workspaceId: options.workspace!,
77+
docId: c.docId,
78+
}));
79+
const docAuthors = await models.doc
80+
.findAuthors(docIds)
81+
.then(
82+
docs =>
83+
new Map(
84+
docs
85+
.filter(d => !!d)
86+
.map(doc => [doc.id, omit(doc, ['id', 'workspaceId'])])
87+
)
88+
);
5089
const docMetas = await models.doc
51-
.findAuthors(
52-
docChunks.map(c => ({
53-
// oxlint-disable-next-line no-non-null-assertion
54-
workspaceId: options.workspace!,
55-
docId: c.docId,
56-
}))
57-
)
58-
.then(docs => new Map(docs.filter(d => !!d).map(doc => [doc.id, doc])));
90+
.findMetas(docIds, { select: { title: true } })
91+
.then(
92+
docs =>
93+
new Map(
94+
docs
95+
.filter(d => !!d)
96+
.map(doc => [
97+
doc.docId,
98+
Object.assign({}, doc, docAuthors.get(doc.docId)),
99+
])
100+
)
101+
);
59102

60103
return [
61-
...fileChunks,
104+
...fileChunks.map(clearEmbeddingChunk),
62105
...docChunks.map(c => ({
63106
...c,
64107
...docMetas.get(c.docId),

0 commit comments

Comments
 (0)