Skip to content

Commit 6019a7d

Browse files
urakozzYury Kozyrevhntrl
authored
fix(langchain): update JSONL loader to support complex json structures (#8863)
Co-authored-by: Yury Kozyrev <[email protected]> Co-authored-by: Hunter Lovell <[email protected]> Co-authored-by: Hunter Lovell <[email protected]>
1 parent 707a768 commit 6019a7d

File tree

4 files changed

+37
-1
lines changed

4 files changed

+37
-1
lines changed

.changeset/rotten-rockets-allow.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"langchain": patch
3+
---
4+
5+
update JSONL loader to support complex json structures

langchain/src/document_loaders/fs/json.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,18 @@ export class JSONLinesLoader extends TextLoader {
168168
.filter(Boolean)
169169
.map((line) => JSON.parse(line));
170170
const pointer = jsonpointer.compile(this.pointer);
171-
return jsons.map((json) => pointer.get(json));
171+
return jsons.map((json) => {
172+
const data = pointer.get(json);
173+
if (typeof data === "string") {
174+
return data;
175+
}
176+
if (!data) {
177+
return "";
178+
}
179+
if (typeof data === "object") {
180+
return JSON.stringify(data);
181+
}
182+
return "";
183+
});
172184
}
173185
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
{"systemInstruction":{"role":"system","parts":[{"text":"You are professional in being an expert\n**There must be only JSON in the output.**"}]},"contents":[{"role":"user","parts":[{"text":"{\"product_id\":\"product1\",\"version\":4,\"supplier\":{\"name\":\"BASF GmbH\"} }"}]},{"role":"model","parts":[{"text":"{\"is_company\":true }"}]}]}
2+
{"systemInstruction":{"role":"system","parts":[{"text":"You are professional in being an expert\n**There must be only JSON in the output.**"}]},"contents":[{"role":"user","parts":[{"text":"{\"product_id\":\"product2\",\"version\":4,\"supplier\":{\"name\":\"Martin Burger\"}}"}]},{"role":"model","parts":[{"text":"{\"is_company\":false}"}]}]}

langchain/src/document_loaders/tests/jsonl.test.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,20 @@ test("Test JSON loader from file", async () => {
2020
})
2121
);
2222
});
23+
24+
test("Test JSON loader from complex JSONL file", async () => {
25+
const filePath = path.resolve(
26+
path.dirname(url.fileURLToPath(import.meta.url)),
27+
"./example_data/ml_training.jsonl"
28+
);
29+
const loader = new JSONLinesLoader(filePath, "/contents");
30+
const docs = await loader.load();
31+
32+
expect(docs.length).toBe(2);
33+
expect(docs[0].metadata).toEqual({ source: filePath, line: 1 });
34+
const [user, model] = JSON.parse(docs[0].pageContent);
35+
const userData = JSON.parse(user.parts[0].text);
36+
expect(userData.supplier.name).toEqual("BASF GmbH");
37+
const modelData = JSON.parse(model.parts[0].text);
38+
expect(modelData.is_company).toBeTruthy();
39+
});

0 commit comments

Comments
 (0)