Skip to content

Commit 3e156a0

Browse files
committed
fix: Properly handle LLM Ref
This LLM sometimes adds `REF.../REF` tags that should be removed
1 parent 33a7bbf commit 3e156a0

File tree

3 files changed

+67
-16
lines changed

3 files changed

+67
-16
lines changed

packages/cozy-search/src/components/Conversations/ChatItemLabel.jsx

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,25 +2,11 @@ import React from 'react'
22

33
import Markdown from 'cozy-ui/transpiled/react/Markdown'
44

5-
/**
6-
* Sanitize chat content by removing special sources tags like
7-
* [REF]...[/REF] or [doc_X] that are not currently handled.
8-
*
9-
* @param {string} content - content to sanitize
10-
* @returns {string} sanitized content
11-
*/
12-
const sanitizeContent = content => {
13-
if (!content) {
14-
return ''
15-
}
16-
return content
17-
.replace(/\s?\[REF\][\s\S]*?\[\/REF\]/g, '')
18-
.replace(/\s?\[doc_\d+\]/g, '')
19-
}
5+
import { sanitizeChatContent } from '../helpers'
206

217
const ChatItemLabel = ({ label }) => {
228
if (typeof label === 'string') {
23-
const content = sanitizeContent(label)
9+
const content = sanitizeChatContent(label)
2410
return <Markdown content={content} />
2511
}
2612

packages/cozy-search/src/components/helpers.js

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,25 @@ export const isMessageForThisConversation = (res, messagesId) =>
2323
messagesId.includes(res._id)
2424

2525
export const isAssistantEnabled = () => flag('cozy.assistant.enabled')
26+
27+
/**
28+
* Sanitize chat content by removing special sources tags like
29+
* [REF]...[/REF] or [doc_X] that are not currently handled.
30+
*
31+
* @param {string} content - content to sanitize
32+
* @returns {string} sanitized content
33+
*/
34+
export const sanitizeChatContent = content => {
35+
if (!content) {
36+
return ''
37+
}
38+
return (
39+
content
40+
// Remove REFdoc_1/REF
41+
.replace(/\s?\[REF\][\s\S]*?\[\/REF\]/g, '')
42+
// Remove [REF]doc_1[/REF]
43+
.replace(/\s?REF[\s\S]*?\/REF/g, '')
44+
// remove « [doc_1] »
45+
.replace(/\s?\[doc_\d+\]/g, '')
46+
)
47+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import { sanitizeChatContent } from './helpers'
2+
3+
describe('sanitizeChatContent', () => {
4+
it('should return empty string for empty content', () => {
5+
expect(sanitizeChatContent('')).toBe('')
6+
expect(sanitizeChatContent(null)).toBe('')
7+
expect(sanitizeChatContent(undefined)).toBe('')
8+
})
9+
10+
it('should return string if no special tags', () => {
11+
const text = 'How you doing, here no ref.'
12+
expect(sanitizeChatContent(text)).toBe(text)
13+
})
14+
15+
it('should remove tags REF.../REF', () => {
16+
const text = 'Before REFdoc_1/REF after'
17+
expect(sanitizeChatContent(text)).toBe('Before after')
18+
})
19+
20+
it('should remove tags [REF]...[/REF]', () => {
21+
const text = 'Before [REF]doc_1[/REF] after'
22+
expect(sanitizeChatContent(text)).toBe('Before after')
23+
})
24+
25+
it('should remove mixed REF tags ', () => {
26+
const text = 'A REF.../REF B [REF]...[/REF] C REF.../REF D'
27+
expect(sanitizeChatContent(text)).toBe('A B C D')
28+
})
29+
30+
it('should remove [tags]', () => {
31+
const text = 'Here is a doc [doc_42] and some texte'
32+
expect(sanitizeChatContent(text)).toBe('Here is a doc and some texte')
33+
})
34+
35+
it('should not remove simple REF or [REF]', () => {
36+
const text = 'REF not closed [REF]not closed either'
37+
expect(sanitizeChatContent(text)).toBe(text)
38+
})
39+
it('should not remove lowercase ref', () => {
40+
const text = 'before refdoc_1/ref after'
41+
expect(sanitizeChatContent(text)).toBe(text)
42+
})
43+
})

0 commit comments

Comments
 (0)