Skip to content

Commit c797cac

Browse files
authored
feat(server): clear semantic search metadata (#13197)
fix AI-360 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Search results now display document metadata enriched with author information. * **Improvements** * Search result content is cleaner, with leading metadata lines (such as titles and creation dates) removed from document excerpts. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
1 parent 339ecab commit c797cac

File tree

1 file changed

+55
-9
lines changed

1 file changed

+55
-9
lines changed

packages/backend/server/src/plugins/copilot/tools/doc-semantic-search.ts

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { tool } from 'ai';
2+
import { omit } from 'lodash-es';
23
import { z } from 'zod';
34

45
import type { AccessController } from '../../../core/permission';
@@ -8,6 +9,32 @@ import type { ContextSession } from '../context/session';
89
import type { CopilotChatOptions } from '../providers';
910
import { toolError } from './error';
1011

12+
const FILTER_PREFIX = [
13+
'Title: ',
14+
'Created at: ',
15+
'Updated at: ',
16+
'Created by: ',
17+
'Updated by: ',
18+
];
19+
20+
function clearEmbeddingChunk(chunk: ChunkSimilarity): ChunkSimilarity {
21+
if (chunk.content) {
22+
const lines = chunk.content.split('\n');
23+
let maxLines = 5;
24+
while (maxLines > 0 && lines.length > 0) {
25+
if (FILTER_PREFIX.some(prefix => lines[0].startsWith(prefix))) {
26+
lines.shift();
27+
maxLines--;
28+
} else {
29+
// only process consecutive metadata rows
30+
break;
31+
}
32+
}
33+
return { ...chunk, content: lines.join('\n') };
34+
}
35+
return chunk;
36+
}
37+
1138
export const buildDocSearchGetter = (
1239
ac: AccessController,
1340
context: CopilotContextService,
@@ -47,18 +74,37 @@ export const buildDocSearchGetter = (
4774
if (!docChunks.length && !fileChunks.length)
4875
return `No results found for "${query}".`;
4976

77+
const docIds = docChunks.map(c => ({
78+
// oxlint-disable-next-line no-non-null-assertion
79+
workspaceId: options.workspace!,
80+
docId: c.docId,
81+
}));
82+
const docAuthors = await models.doc
83+
.findAuthors(docIds)
84+
.then(
85+
docs =>
86+
new Map(
87+
docs
88+
.filter(d => !!d)
89+
.map(doc => [doc.id, omit(doc, ['id', 'workspaceId'])])
90+
)
91+
);
5092
const docMetas = await models.doc
51-
.findAuthors(
52-
docChunks.map(c => ({
53-
// oxlint-disable-next-line no-non-null-assertion
54-
workspaceId: options.workspace!,
55-
docId: c.docId,
56-
}))
57-
)
58-
.then(docs => new Map(docs.filter(d => !!d).map(doc => [doc.id, doc])));
93+
.findMetas(docIds, { select: { title: true } })
94+
.then(
95+
docs =>
96+
new Map(
97+
docs
98+
.filter(d => !!d)
99+
.map(doc => [
100+
doc.docId,
101+
Object.assign({}, doc, docAuthors.get(doc.docId)),
102+
])
103+
)
104+
);
59105

60106
return [
61-
...fileChunks,
107+
...fileChunks.map(clearEmbeddingChunk),
62108
...docChunks.map(c => ({
63109
...c,
64110
...docMetas.get(c.docId),

0 commit comments

Comments
 (0)