Skip to content

Commit 8624f84

Browse files
authored
💄 style: improve search experience (lobehub#9661)
* add placeholder structure * improve searching * improve search result content * fix tests * improve error * improve crawler multi page style * improve crawler multi page content * improve styles * fix tests * make url max twoline
1 parent 6db99c0 commit 8624f84

File tree

32 files changed

+857
-153
lines changed

32 files changed

+857
-153
lines changed

packages/prompts/src/prompts/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ export * from './chatMessages';
22
export * from './files';
33
export * from './knowledgeBaseQA';
44
export * from './plugin';
5+
export * from './search';
56
export * from './systemRole';
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
import { describe, expect, it } from 'vitest';
2+
3+
import { crawlResultsPrompt } from './crawlResults';
4+
5+
describe('crawlResultsPrompt', () => {
6+
it('should return empty XML for empty results', () => {
7+
const result = crawlResultsPrompt([]);
8+
expect(result).toBe('<no_crawl_results />');
9+
});
10+
11+
it('should convert basic crawl result to compact XML format', () => {
12+
const results = [
13+
{
14+
url: 'https://example.com',
15+
title: 'Example Page',
16+
content: 'Page content here',
17+
},
18+
];
19+
20+
const xml = crawlResultsPrompt(results);
21+
22+
expect(xml).toEqual(`<crawlResults>
23+
<page url="https://example.com" title="Example Page">Page content here</page>
24+
</crawlResults>`);
25+
});
26+
27+
it('should include all optional metadata fields', () => {
28+
const results = [
29+
{
30+
url: 'http://arxiv.org/abs/2509.09734v1',
31+
title: 'MCP-AgentBench: Evaluating Real-World Language Agent Performance',
32+
contentType: 'text' as const,
33+
description: 'Abstract page for arXiv paper 2509.09734v1',
34+
length: 10187,
35+
content: 'Full paper content...',
36+
},
37+
];
38+
39+
const xml = crawlResultsPrompt(results);
40+
41+
expect(xml).toEqual(`<crawlResults>
42+
<page url="http://arxiv.org/abs/2509.09734v1" title="MCP-AgentBench: Evaluating Real-World Language Agent Performance" contentType="text" description="Abstract page for arXiv paper 2509.09734v1" length="10187">Full paper content...</page>
43+
</crawlResults>`);
44+
});
45+
46+
it('should handle page without content', () => {
47+
const results = [
48+
{
49+
url: 'https://example.com',
50+
title: 'Empty Page',
51+
contentType: 'text' as const,
52+
},
53+
];
54+
55+
const xml = crawlResultsPrompt(results);
56+
57+
expect(xml).toEqual(`<crawlResults>
58+
<page url="https://example.com" title="Empty Page" contentType="text" />
59+
</crawlResults>`);
60+
});
61+
62+
it('should handle error items', () => {
63+
const results = [
64+
{
65+
errorType: 'NetworkError',
66+
errorMessage: 'Failed to fetch the page',
67+
url: 'https://failed.com',
68+
},
69+
];
70+
71+
const xml = crawlResultsPrompt(results);
72+
73+
expect(xml).toEqual(`<crawlResults>
74+
<error errorType="NetworkError" errorMessage="Failed to fetch the page" url="https://failed.com" />
75+
</crawlResults>`);
76+
});
77+
78+
it('should escape XML special characters in attributes', () => {
79+
const results = [
80+
{
81+
url: 'https://example.com?foo=bar&baz=qux',
82+
title: 'Title with <tags> & "quotes"',
83+
description: 'Description with special chars & <html>',
84+
},
85+
];
86+
87+
const xml = crawlResultsPrompt(results);
88+
89+
expect(xml).toEqual(`<crawlResults>
90+
<page url="https://example.com?foo=bar&amp;baz=qux" title="Title with &lt;tags&gt; &amp; &quot;quotes&quot;" description="Description with special chars &amp; &lt;html&gt;" />
91+
</crawlResults>`);
92+
});
93+
94+
it('should escape XML special characters in content', () => {
95+
const results = [
96+
{
97+
url: 'https://example.com',
98+
title: 'Test',
99+
content: 'Content with <html> tags & special chars',
100+
},
101+
];
102+
103+
const xml = crawlResultsPrompt(results);
104+
105+
expect(xml).toEqual(`<crawlResults>
106+
<page url="https://example.com" title="Test">Content with &lt;html&gt; tags &amp; special chars</page>
107+
</crawlResults>`);
108+
});
109+
110+
it('should handle multiple pages with mixed success and errors', () => {
111+
const results = [
112+
{
113+
url: 'https://success1.com',
114+
title: 'First Page',
115+
content: 'First content',
116+
},
117+
{
118+
errorType: 'TimeoutError',
119+
errorMessage: 'Request timeout',
120+
url: 'https://failed.com',
121+
},
122+
{
123+
url: 'https://success2.com',
124+
title: 'Second Page',
125+
content: 'Second content',
126+
},
127+
];
128+
129+
const xml = crawlResultsPrompt(results);
130+
131+
expect(xml).toEqual(`<crawlResults>
132+
<page url="https://success1.com" title="First Page">First content</page>
133+
<error errorType="TimeoutError" errorMessage="Request timeout" url="https://failed.com" />
134+
<page url="https://success2.com" title="Second Page">Second content</page>
135+
</crawlResults>`);
136+
});
137+
138+
it('should handle error without url', () => {
139+
const results = [
140+
{
141+
errorType: 'UnknownError',
142+
errorMessage: 'Unknown error occurred',
143+
},
144+
];
145+
146+
const xml = crawlResultsPrompt(results);
147+
148+
expect(xml).toEqual(`<crawlResults>
149+
<error errorType="UnknownError" errorMessage="Unknown error occurred" />
150+
</crawlResults>`);
151+
});
152+
153+
it('should handle real arXiv example', () => {
154+
const results = [
155+
{
156+
url: 'http://arxiv.org/abs/2508.01780v1',
157+
title: 'LiveMCPBench: Can Agents Navigate an Ocean of MCP Tools?',
158+
contentType: 'text' as const,
159+
description: 'Abstract page for arXiv paper 2508.01780v1',
160+
length: 10512,
161+
content:
162+
'With the rapid development of Model Context Protocol (MCP), the number of MCP servers has surpassed 10,000...',
163+
},
164+
];
165+
166+
const xml = crawlResultsPrompt(results);
167+
168+
expect(xml).toEqual(`<crawlResults>
169+
<page url="http://arxiv.org/abs/2508.01780v1" title="LiveMCPBench: Can Agents Navigate an Ocean of MCP Tools?" contentType="text" description="Abstract page for arXiv paper 2508.01780v1" length="10512">With the rapid development of Model Context Protocol (MCP), the number of MCP servers has surpassed 10,000...</page>
170+
</crawlResults>`);
171+
});
172+
});
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import { escapeXmlAttr, escapeXmlContent } from './xmlEscape';
2+
3+
export interface CrawlResultItem {
4+
content?: string;
5+
contentType?: 'text' | 'json';
6+
description?: string;
7+
length?: number;
8+
siteName?: string;
9+
title?: string;
10+
url: string;
11+
}
12+
13+
export interface CrawlErrorItem {
14+
content?: string;
15+
errorMessage: string;
16+
errorType: string;
17+
url?: string;
18+
}
19+
20+
/**
21+
* Convert crawl results array to compact XML format for token efficiency
22+
* Uses attributes for metadata and element content for main text
23+
*
24+
* @example
25+
* ```typescript
26+
* const results = [
27+
* { title: "Page Title", url: "https://example.com", content: "..." }
28+
* ];
29+
* const xml = crawlResultsPrompt(results);
30+
* // Output:
31+
* // <crawlResults>
32+
* // <page title="Page Title" url="https://example.com">...</page>
33+
* // </crawlResults>
34+
* ```
35+
*/
36+
export const crawlResultsPrompt = (results: Array<CrawlResultItem | CrawlErrorItem>): string => {
37+
if (results.length === 0) return '<no_crawl_results />';
38+
39+
const items = results
40+
.map((item) => {
41+
// Handle error items
42+
if ('errorMessage' in item) {
43+
const attrs: string[] = [
44+
`errorType="${escapeXmlAttr(item.errorType)}"`,
45+
`errorMessage="${escapeXmlAttr(item.errorMessage)}"`,
46+
];
47+
48+
if (item.url) {
49+
attrs.push(`url="${escapeXmlAttr(item.url)}"`);
50+
}
51+
52+
return ` <error ${attrs.join(' ')} />`;
53+
}
54+
55+
// Handle successful crawl items
56+
const attrs: string[] = [`url="${escapeXmlAttr(item.url)}"`];
57+
58+
if (item.title) {
59+
attrs.push(`title="${escapeXmlAttr(item.title)}"`);
60+
}
61+
62+
if (item.contentType) {
63+
attrs.push(`contentType="${escapeXmlAttr(item.contentType)}"`);
64+
}
65+
66+
if (item.description) {
67+
attrs.push(`description="${escapeXmlAttr(item.description)}"`);
68+
}
69+
70+
if (item.length !== undefined) {
71+
attrs.push(`length="${item.length}"`);
72+
}
73+
74+
const attrString = attrs.join(' ');
75+
const content = item.content ? escapeXmlContent(item.content) : '';
76+
77+
return content ? ` <page ${attrString}>${content}</page>` : ` <page ${attrString} />`;
78+
})
79+
.join('\n');
80+
81+
return `<crawlResults>\n${items}\n</crawlResults>`;
82+
};
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
export * from './crawlResults';
2+
export * from './searchResults';

0 commit comments

Comments
 (0)