| 
 | 1 | +import { describe, expect, it } from 'vitest';  | 
 | 2 | + | 
 | 3 | +import { crawlResultsPrompt } from './crawlResults';  | 
 | 4 | + | 
 | 5 | +describe('crawlResultsPrompt', () => {  | 
 | 6 | +  it('should return empty XML for empty results', () => {  | 
 | 7 | +    const result = crawlResultsPrompt([]);  | 
 | 8 | +    expect(result).toBe('<no_crawl_results />');  | 
 | 9 | +  });  | 
 | 10 | + | 
 | 11 | +  it('should convert basic crawl result to compact XML format', () => {  | 
 | 12 | +    const results = [  | 
 | 13 | +      {  | 
 | 14 | +        url: 'https://example.com',  | 
 | 15 | +        title: 'Example Page',  | 
 | 16 | +        content: 'Page content here',  | 
 | 17 | +      },  | 
 | 18 | +    ];  | 
 | 19 | + | 
 | 20 | +    const xml = crawlResultsPrompt(results);  | 
 | 21 | + | 
 | 22 | +    expect(xml).toEqual(`<crawlResults>  | 
 | 23 | +  <page url="https://example.com" title="Example Page">Page content here</page>  | 
 | 24 | +</crawlResults>`);  | 
 | 25 | +  });  | 
 | 26 | + | 
 | 27 | +  it('should include all optional metadata fields', () => {  | 
 | 28 | +    const results = [  | 
 | 29 | +      {  | 
 | 30 | +        url: 'http://arxiv.org/abs/2509.09734v1',  | 
 | 31 | +        title: 'MCP-AgentBench: Evaluating Real-World Language Agent Performance',  | 
 | 32 | +        contentType: 'text' as const,  | 
 | 33 | +        description: 'Abstract page for arXiv paper 2509.09734v1',  | 
 | 34 | +        length: 10187,  | 
 | 35 | +        content: 'Full paper content...',  | 
 | 36 | +      },  | 
 | 37 | +    ];  | 
 | 38 | + | 
 | 39 | +    const xml = crawlResultsPrompt(results);  | 
 | 40 | + | 
 | 41 | +    expect(xml).toEqual(`<crawlResults>  | 
 | 42 | +  <page url="http://arxiv.org/abs/2509.09734v1" title="MCP-AgentBench: Evaluating Real-World Language Agent Performance" contentType="text" description="Abstract page for arXiv paper 2509.09734v1" length="10187">Full paper content...</page>  | 
 | 43 | +</crawlResults>`);  | 
 | 44 | +  });  | 
 | 45 | + | 
 | 46 | +  it('should handle page without content', () => {  | 
 | 47 | +    const results = [  | 
 | 48 | +      {  | 
 | 49 | +        url: 'https://example.com',  | 
 | 50 | +        title: 'Empty Page',  | 
 | 51 | +        contentType: 'text' as const,  | 
 | 52 | +      },  | 
 | 53 | +    ];  | 
 | 54 | + | 
 | 55 | +    const xml = crawlResultsPrompt(results);  | 
 | 56 | + | 
 | 57 | +    expect(xml).toEqual(`<crawlResults>  | 
 | 58 | +  <page url="https://example.com" title="Empty Page" contentType="text" />  | 
 | 59 | +</crawlResults>`);  | 
 | 60 | +  });  | 
 | 61 | + | 
 | 62 | +  it('should handle error items', () => {  | 
 | 63 | +    const results = [  | 
 | 64 | +      {  | 
 | 65 | +        errorType: 'NetworkError',  | 
 | 66 | +        errorMessage: 'Failed to fetch the page',  | 
 | 67 | +        url: 'https://failed.com',  | 
 | 68 | +      },  | 
 | 69 | +    ];  | 
 | 70 | + | 
 | 71 | +    const xml = crawlResultsPrompt(results);  | 
 | 72 | + | 
 | 73 | +    expect(xml).toEqual(`<crawlResults>  | 
 | 74 | +  <error errorType="NetworkError" errorMessage="Failed to fetch the page" url="https://failed.com" />  | 
 | 75 | +</crawlResults>`);  | 
 | 76 | +  });  | 
 | 77 | + | 
 | 78 | +  it('should escape XML special characters in attributes', () => {  | 
 | 79 | +    const results = [  | 
 | 80 | +      {  | 
 | 81 | +        url: 'https://example.com?foo=bar&baz=qux',  | 
 | 82 | +        title: 'Title with <tags> & "quotes"',  | 
 | 83 | +        description: 'Description with special chars & <html>',  | 
 | 84 | +      },  | 
 | 85 | +    ];  | 
 | 86 | + | 
 | 87 | +    const xml = crawlResultsPrompt(results);  | 
 | 88 | + | 
 | 89 | +    expect(xml).toEqual(`<crawlResults>  | 
 | 90 | +  <page url="https://example.com?foo=bar&baz=qux" title="Title with <tags> & "quotes"" description="Description with special chars & <html>" />  | 
 | 91 | +</crawlResults>`);  | 
 | 92 | +  });  | 
 | 93 | + | 
 | 94 | +  it('should escape XML special characters in content', () => {  | 
 | 95 | +    const results = [  | 
 | 96 | +      {  | 
 | 97 | +        url: 'https://example.com',  | 
 | 98 | +        title: 'Test',  | 
 | 99 | +        content: 'Content with <html> tags & special chars',  | 
 | 100 | +      },  | 
 | 101 | +    ];  | 
 | 102 | + | 
 | 103 | +    const xml = crawlResultsPrompt(results);  | 
 | 104 | + | 
 | 105 | +    expect(xml).toEqual(`<crawlResults>  | 
 | 106 | +  <page url="https://example.com" title="Test">Content with <html> tags & special chars</page>  | 
 | 107 | +</crawlResults>`);  | 
 | 108 | +  });  | 
 | 109 | + | 
 | 110 | +  it('should handle multiple pages with mixed success and errors', () => {  | 
 | 111 | +    const results = [  | 
 | 112 | +      {  | 
 | 113 | +        url: 'https://success1.com',  | 
 | 114 | +        title: 'First Page',  | 
 | 115 | +        content: 'First content',  | 
 | 116 | +      },  | 
 | 117 | +      {  | 
 | 118 | +        errorType: 'TimeoutError',  | 
 | 119 | +        errorMessage: 'Request timeout',  | 
 | 120 | +        url: 'https://failed.com',  | 
 | 121 | +      },  | 
 | 122 | +      {  | 
 | 123 | +        url: 'https://success2.com',  | 
 | 124 | +        title: 'Second Page',  | 
 | 125 | +        content: 'Second content',  | 
 | 126 | +      },  | 
 | 127 | +    ];  | 
 | 128 | + | 
 | 129 | +    const xml = crawlResultsPrompt(results);  | 
 | 130 | + | 
 | 131 | +    expect(xml).toEqual(`<crawlResults>  | 
 | 132 | +  <page url="https://success1.com" title="First Page">First content</page>  | 
 | 133 | +  <error errorType="TimeoutError" errorMessage="Request timeout" url="https://failed.com" />  | 
 | 134 | +  <page url="https://success2.com" title="Second Page">Second content</page>  | 
 | 135 | +</crawlResults>`);  | 
 | 136 | +  });  | 
 | 137 | + | 
 | 138 | +  it('should handle error without url', () => {  | 
 | 139 | +    const results = [  | 
 | 140 | +      {  | 
 | 141 | +        errorType: 'UnknownError',  | 
 | 142 | +        errorMessage: 'Unknown error occurred',  | 
 | 143 | +      },  | 
 | 144 | +    ];  | 
 | 145 | + | 
 | 146 | +    const xml = crawlResultsPrompt(results);  | 
 | 147 | + | 
 | 148 | +    expect(xml).toEqual(`<crawlResults>  | 
 | 149 | +  <error errorType="UnknownError" errorMessage="Unknown error occurred" />  | 
 | 150 | +</crawlResults>`);  | 
 | 151 | +  });  | 
 | 152 | + | 
 | 153 | +  it('should handle real arXiv example', () => {  | 
 | 154 | +    const results = [  | 
 | 155 | +      {  | 
 | 156 | +        url: 'http://arxiv.org/abs/2508.01780v1',  | 
 | 157 | +        title: 'LiveMCPBench: Can Agents Navigate an Ocean of MCP Tools?',  | 
 | 158 | +        contentType: 'text' as const,  | 
 | 159 | +        description: 'Abstract page for arXiv paper 2508.01780v1',  | 
 | 160 | +        length: 10512,  | 
 | 161 | +        content:  | 
 | 162 | +          'With the rapid development of Model Context Protocol (MCP), the number of MCP servers has surpassed 10,000...',  | 
 | 163 | +      },  | 
 | 164 | +    ];  | 
 | 165 | + | 
 | 166 | +    const xml = crawlResultsPrompt(results);  | 
 | 167 | + | 
 | 168 | +    expect(xml).toEqual(`<crawlResults>  | 
 | 169 | +  <page url="http://arxiv.org/abs/2508.01780v1" title="LiveMCPBench: Can Agents Navigate an Ocean of MCP Tools?" contentType="text" description="Abstract page for arXiv paper 2508.01780v1" length="10512">With the rapid development of Model Context Protocol (MCP), the number of MCP servers has surpassed 10,000...</page>  | 
 | 170 | +</crawlResults>`);  | 
 | 171 | +  });  | 
 | 172 | +});  | 
0 commit comments