Skip to content

Commit 54ab0bd

Browse files
committed
improve caching
1 parent d2adba1 commit 54ab0bd

File tree

8 files changed

+50
-19
lines changed

8 files changed

+50
-19
lines changed

packages/file-loaders/src/loaders/excel/__snapshots__/index.test.ts.snap

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,25 @@
11
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
22

33
exports[`ExcelLoader > should aggregate content correctly (joining sheets) > aggregated_content 1`] = `
4-
"## Sheet: 表1
5-
4+
"<sheet name="表1" index="0">
65
| __EMPTY | 类别 A | 类别 B | __EMPTY_1 | __EMPTY_2 |
76
| --- | --- | --- | --- | --- |
87
| 项目 1 | 5 | 7 | | |
98
| 项目 2 | 10 | 8 | | |
109
| 项目 3 | 9 | 15 | | |
1110
| 项目 4 | 7 | 12 | | |
1211
| 项目 5 | 16 | 21 | | |
12+
</sheet>
1313
14-
---
15-
16-
## Sheet: 表2 - 表格 2
17-
14+
<sheet name="表2 - 表格 2" index="1">
1815
| __EMPTY | 类别 A | 类别 B | __EMPTY_1 | __EMPTY_2 |
1916
| --- | --- | --- | --- | --- |
2017
| 项目 1 | 5 | 7 | | |
2118
| 项目 2 | 10 | 8 | | |
2219
| 项目 3 | 9 | 15 | | |
2320
| 项目 4 | 7 | 12 | | |
24-
| 项目 5 | 16 | 21 | | |"
21+
| 项目 5 | 16 | 21 | | |
22+
</sheet>"
2523
`;
2624

2725
exports[`ExcelLoader > should load pages correctly from an Excel file (one page per sheet) 1`] = `

packages/file-loaders/src/loaders/excel/index.ts

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { readFile } from 'node:fs/promises';
33
import * as xlsx from 'xlsx';
44

55
import type { DocumentPage, FileLoaderInterface } from '../../types';
6+
import { promptTemplate } from './prompt';
67

78
const log = debug('file-loaders:excel');
89

@@ -135,13 +136,7 @@ export class ExcelLoader implements FileLoaderInterface {
135136
*/
136137
async aggregateContent(pages: DocumentPage[]): Promise<string> {
137138
log('Aggregating content from', pages.length, 'Excel pages');
138-
const result = pages
139-
.map((page) => {
140-
const sheetName = page.metadata.sheetName;
141-
const header = sheetName ? `## Sheet: ${sheetName}\n\n` : '';
142-
return header + page.pageContent;
143-
})
144-
.join('\n\n---\n\n'); // Separator between sheets
139+
const result = promptTemplate(pages);
145140

146141
log('Excel content aggregated successfully, length:', result.length);
147142
return result;
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import type { DocumentPage } from '../../types';
2+
3+
export const promptTemplate = (pages: DocumentPage[]) => {
4+
return (
5+
pages
6+
.map((page, index) => {
7+
const sheetName = page.metadata.sheetName;
8+
9+
const sheetIndex = page.metadata?.pageNumber || index;
10+
11+
return `<sheet name="${sheetName}" index="${sheetIndex}">
12+
${page.pageContent}
13+
</sheet>`;
14+
})
15+
// Separator between sheets
16+
.join('\n\n')
17+
);
18+
};

packages/file-loaders/src/loaders/pdf/__snapshots__/index.test.ts.snap

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
22

33
exports[`PdfLoader > should aggregate content correctly 1`] = `
4-
"简单报告
4+
"<page pageNumber="1">
5+
简单报告
56
副标题
67
轻点或点按此占位符⽂本并开始键⼊即可开始。你可以在 Mac、iPad、iPhone 或
78
iCloud.com 上查看和编辑此⽂稿。
@@ -24,10 +25,13 @@ Pages ⽂稿可⽤于⽂字处理和⻚⾯布局。此“简单报告”模板
2425
⽂本添加你⾃⼰的内容。”
2526
⻚脚
2627
1
28+
</page>
2729
30+
<page pageNumber="2">
2831
这是第⼆⻚的内容
2932
⻚脚
30-
2"
33+
2
34+
</page>"
3135
`;
3236

3337
exports[`PdfLoader > should attach document metadata correctly 1`] = `

packages/file-loaders/src/loaders/pdf/index.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import * as _pdfjsWorker from 'pdfjs-dist/legacy/build/pdf.worker.mjs';
77
import type { TextContent } from 'pdfjs-dist/types/src/display/api';
88

99
import type { DocumentPage, FileLoaderInterface } from '../../types';
10+
import { promptTemplate } from './prompt';
1011

1112
const log = debug('file-loaders:pdf');
1213

@@ -132,7 +133,7 @@ export class PdfLoader implements FileLoaderInterface {
132133
`Found ${validPages.length} valid pages for aggregation (${pages.length - validPages.length} pages with errors filtered out)`,
133134
);
134135

135-
const result = validPages.map((page) => page.pageContent).join('\n\n');
136+
const result = promptTemplate(validPages);
136137
log('PDF content aggregated successfully, length:', result.length);
137138
return result;
138139
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import type { DocumentPage } from '../../types';
2+
3+
export const promptTemplate = (pages: DocumentPage[]) => {
4+
return pages
5+
.map((page, index) => {
6+
const pageNumber = page.metadata?.pageNumber || index;
7+
8+
return `<page pageNumber="${pageNumber}">
9+
${page.pageContent}
10+
</page>`;
11+
})
12+
.join('\n\n');
13+
};

packages/file-loaders/test/__snapshots__/loaders.test.ts.snap

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
exports[`loadFile Integration Tests > PDF Handling > should load content from a pdf file using filePath 1`] = `
44
{
5-
"content": "123",
5+
"content": "<page pageNumber="1">
6+
123
7+
</page>",
68
"fileType": "pdf",
79
"filename": "test.pdf",
810
"metadata": {

packages/file-loaders/test/loaders.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ describe('loadFile Integration Tests', () => {
4444
// Pass filePath directly to loadFile
4545
const docs = await loadFile(filePath);
4646

47-
expect(docs.content).toEqual('123');
47+
expect(docs.content).toContain('123');
4848
expect(docs.source).toEqual(filePath);
4949

5050
// @ts-expect-error

0 commit comments

Comments
 (0)