Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions langchain/.eslintrc.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ module.exports = {
"no-console": 0,
"no-restricted-syntax": 0,
"no-shadow": 0,
"no-continue": 0,
"no-underscore-dangle": 0,
"no-use-before-define": 0,
"no-useless-constructor": 0,
Expand Down
8 changes: 4 additions & 4 deletions langchain/src/document.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ export interface DocumentParams<
> {
pageContent: string;

metadata: Metadata;
metadata?: Metadata;
}

/**
Expand All @@ -19,10 +19,10 @@ export class Document<

metadata: Metadata;

constructor(fields?: Partial<DocumentParams<Metadata>>) {
this.pageContent = fields?.pageContent
constructor(fields: DocumentParams<Metadata>) {
this.pageContent = fields.pageContent
? fields.pageContent.toString()
: this.pageContent;
this.metadata = fields?.metadata ?? ({} as Metadata);
this.metadata = fields.metadata ?? ({} as Metadata);
}
}
3 changes: 3 additions & 0 deletions langchain/src/document_loaders/fs/docx.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ export class DocxLoader extends BufferLoader {
const docx = await extractRawText({
buffer: raw,
});

if (!docx.value) return [];

return [
new Document({
pageContent: docx.value,
Expand Down
7 changes: 6 additions & 1 deletion langchain/src/document_loaders/fs/epub.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ export class EPubLoader extends BaseDocumentLoader {
const chapters = await Promise.all(
epub.flow.map(async (chapter) => {
if (!chapter.id) return null as never;
const html: string = await epub.getChapterRawAsync(chapter.id);
if (!html) return null as never;
return {
html: await epub.getChapterRawAsync(chapter.id),
html,
title: chapter.title,
};
})
Expand All @@ -37,6 +39,9 @@ export class EPubLoader extends BaseDocumentLoader {

const parsed = await this.parse(epub);
const metadata = { source: this.filePath };

if (parsed.length === 0) return [];

return this.splitChapters
? parsed.map(
(chapter) =>
Expand Down
9 changes: 9 additions & 0 deletions langchain/src/document_loaders/fs/pdf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ export class PDFLoader extends BufferLoader {
for (let i = 1; i <= pdf.numPages; i += 1) {
const page = await pdf.getPage(i);
const content = await page.getTextContent();

if (content.items.length === 0) {
continue;
}

const text = content.items
.map((item) => (item as TextItem).str)
.join("\n");
Expand Down Expand Up @@ -61,6 +66,10 @@ export class PDFLoader extends BufferLoader {
return documents;
}

if (documents.length === 0) {
return [];
}

return [
new Document({
pageContent: documents.map((doc) => doc.pageContent).join("\n\n"),
Expand Down
7 changes: 6 additions & 1 deletion langchain/src/document_loaders/fs/srt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@ export class SRTLoader extends TextLoader {
const { SRTParser2 } = await SRTLoaderImports();
const parser = new SRTParser2();
const srts = parser.fromSrt(raw);
return [srts.map((srt) => srt.text).join(" ")];
return [
srts
.map((srt) => srt.text)
.filter(Boolean)
.join(" "),
];
}
}

Expand Down
2 changes: 1 addition & 1 deletion langchain/src/document_loaders/fs/unstructured.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ export class UnstructuredLoader extends BaseDocumentLoader {
`Expected partitioning request to return an array, but got ${elements}`
);
}
return elements as Element[];
return elements.filter((el) => typeof el.text === "string") as Element[];
}

async load(): Promise<Document[]> {
Expand Down
11 changes: 6 additions & 5 deletions langchain/src/vectorstores/milvus.ts
Original file line number Diff line number Diff line change
Expand Up @@ -230,20 +230,21 @@ export class Milvus extends VectorStore {
}
const results: [Document, number][] = [];
searchResp.results.forEach((result) => {
const doc = new Document();
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const fields = { pageContent: "", metadata: {} as Record<string, any> };
Object.keys(result).forEach((key) => {
if (key === this.textField) {
doc.pageContent = result[key];
fields.pageContent = result[key];
} else if (this.fields.includes(key)) {
if (typeof result[key] === "string") {
const { isJson, obj } = checkJsonString(result[key]);
doc.metadata[key] = isJson ? obj : result[key];
fields.metadata[key] = isJson ? obj : result[key];
} else {
doc.metadata[key] = result[key];
fields.metadata[key] = result[key];
}
}
});
results.push([doc, result.score]);
results.push([new Document(fields), result.score]);
});
// console.log("Search result: " + JSON.stringify(results, null, 2));
return results;
Expand Down
4 changes: 2 additions & 2 deletions langchain/src/vectorstores/prisma.ts
Original file line number Diff line number Diff line change
Expand Up @@ -262,10 +262,10 @@ export class PrismaVectorStore<
const results: [Document<SimilarityModel<TModel, TSelectModel>>, number][] =
[];
for (const article of articles) {
if (article._distance != null) {
if (article._distance != null && article[this.contentColumn] != null) {
results.push([
new Document({
pageContent: article[this.contentColumn] as string | undefined,
pageContent: article[this.contentColumn] as string,
metadata: article,
}),
article._distance,
Expand Down