Skip to content

Commit 438212c

Browse files
committed
fix dtypes not used on csv parse
1 parent 14f9e49 commit 438212c

File tree

5 files changed

+103
-7
lines changed

5 files changed

+103
-7
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,5 @@ node_modules
33
test/fixtures/*
44
test/samples/*
55
*.xlsx
6+
7+
testsss

src/danfojs-base/io/browser/io.csv.ts

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,16 @@ import Papa from 'papaparse'
4848
*/
4949
const $readCSV = async (file: any, options?: CsvInputOptionsBrowser): Promise<DataFrame> => {
5050
const frameConfig = options?.frameConfig || {}
51+
const hasStringType = frameConfig.dtypes?.includes("string")
5152

5253
return new Promise((resolve, reject) => {
5354
let hasError = false;
5455

5556
Papa.parse(file, {
5657
header: true,
57-
dynamicTyping: true,
58+
dynamicTyping: !hasStringType,
5859
skipEmptyLines: 'greedy',
60+
delimiter: ",",
5961
...options,
6062
error: (error) => {
6163
hasError = true;
@@ -108,12 +110,13 @@ const $streamCSV = async (file: string, callback: (df: DataFrame) => void, optio
108110
return new Promise((resolve, reject) => {
109111
let count = 0
110112
let hasError = false;
111-
113+
const hasStringType = frameConfig.dtypes?.includes("string")
112114
Papa.parse(file, {
113-
...options,
114-
dynamicTyping: true,
115115
header: true,
116116
download: true,
117+
dynamicTyping: !hasStringType,
118+
delimiter: ",",
119+
...options,
117120
step: results => {
118121
if (hasError) return;
119122
try {

src/danfojs-base/io/node/io.csv.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,16 @@ import fs from 'fs'
5050
*/
5151
const $readCSV = async (filePath: string, options?: CsvInputOptionsNode): Promise<DataFrame> => {
5252
const frameConfig = options?.frameConfig || {}
53+
const hasStringType = frameConfig.dtypes?.includes("string")
5354

5455
if (filePath.startsWith("http") || filePath.startsWith("https")) {
5556
return new Promise((resolve, reject) => {
5657
let hasError = false;
5758
const optionsWithDefaults = {
5859
header: true,
59-
dynamicTyping: true,
60+
dynamicTyping: !hasStringType,
6061
skipEmptyLines: 'greedy',
62+
delimiter: ",",
6163
...options,
6264
}
6365

@@ -116,7 +118,8 @@ const $readCSV = async (filePath: string, options?: CsvInputOptionsNode): Promis
116118

117119
Papa.parse(fileStream, {
118120
header: true,
119-
dynamicTyping: true,
121+
dynamicTyping: !hasStringType,
122+
delimiter: ",",
120123
...options,
121124
error: (error) => {
122125
hasError = true;

src/danfojs-browser/tests/io/csv.reader.test.js

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,41 @@ describe("readCSV", function () {
9797
assert.ok(error instanceof Error);
9898
}
9999
});
100+
101+
it("Preserves leading zeros when dtype is string", async function () {
102+
// Create a CSV file with leading zeros
103+
const csvContent = "codes\n012345\n001234";
104+
const file = new File([ csvContent ], "leading_zeros.csv", { type: "text/csv" });
105+
106+
const df = await dfd.readCSV(file, {
107+
frameConfig: {
108+
dtypes: [ "string" ]
109+
}
110+
});
111+
112+
assert.deepEqual(df.values, [ [ "012345" ], [ "001234" ] ]);
113+
assert.deepEqual(df.dtypes, [ "string" ]);
114+
115+
// Verify the values are actually strings
116+
const jsonData = dfd.toJSON(df);
117+
assert.deepEqual(jsonData, [ { codes: "012345" }, { codes: "001234" } ]);
118+
});
119+
120+
it("Converts to numbers when dtype is not string", async function () {
121+
// Create a CSV file with leading zeros
122+
const csvContent = "codes\n012345\n001234";
123+
const file = new File([ csvContent ], "leading_zeros.csv", { type: "text/csv" });
124+
125+
const df = await dfd.readCSV(file); // default behavior without string dtype
126+
127+
// Values should be converted to numbers
128+
assert.deepEqual(df.values, [ [ 12345 ], [ 1234 ] ]);
129+
assert.deepEqual(df.dtypes, [ "int32" ]);
130+
131+
// Verify JSON output
132+
const jsonData = dfd.toJSON(df);
133+
assert.deepEqual(jsonData, [ { codes: 12345 }, { codes: 1234 } ]);
134+
});
100135
});
101136

102137
// describe("streamCSV", function () {

src/danfojs-node/test/io/csv.reader.test.ts

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import path from "path";
22
import chai, { assert, expect } from "chai";
33
import { describe, it } from "mocha";
44
import chaiAsPromised from "chai-as-promised";
5-
import { DataFrame, readCSV, Series, streamCSV, toCSV } from "../../dist/danfojs-node/src";
5+
import { DataFrame, readCSV, Series, streamCSV, toCSV, toJSON } from "../../dist/danfojs-node/src";
66
import fs from 'fs';
77
import process from 'process';
88

@@ -112,6 +112,59 @@ describe("readCSV", function () {
112112
const filePath = path.join(testSamplesDir, "invalid.csv");
113113
await expect(readCSV(filePath)).to.be.rejectedWith("ENOENT: no such file or directory");
114114
});
115+
116+
it("Preserves leading zeros when dtype is string", async function () {
117+
const filePath = path.join(testSamplesDir, "leading_zeros.csv");
118+
// Create test CSV file
119+
fs.writeFileSync(filePath, "codes\n012345\n001234");
120+
121+
try {
122+
const df = await readCSV(filePath, {
123+
frameConfig: {
124+
dtypes: ["string"]
125+
}
126+
});
127+
128+
assert.deepEqual(df.values, [["012345"], ["001234"]]);
129+
assert.deepEqual(df.dtypes, ["string"]);
130+
131+
// Verify the values are actually strings
132+
const jsonData = toJSON(df);
133+
assert.deepEqual(jsonData, [{ codes: "012345" }, { codes: "001234" }]);
134+
135+
// Clean up
136+
fs.unlinkSync(filePath);
137+
} catch (error) {
138+
// Clean up even if test fails
139+
fs.unlinkSync(filePath);
140+
throw error;
141+
}
142+
});
143+
144+
it("Converts to numbers when dtype is not string", async function () {
145+
const filePath = path.join(testSamplesDir, "leading_zeros.csv");
146+
// Create test CSV file
147+
fs.writeFileSync(filePath, "codes\n012345\n001234");
148+
149+
try {
150+
const df = await readCSV(filePath); // default behavior without string dtype
151+
152+
// Values should be converted to numbers
153+
assert.deepEqual(df.values, [[12345], [1234]]);
154+
assert.deepEqual(df.dtypes, ["int32"]);
155+
156+
// Verify JSON output
157+
const jsonData = toJSON(df);
158+
assert.deepEqual(jsonData, [{ codes: 12345 }, { codes: 1234 }]);
159+
160+
// Clean up
161+
fs.unlinkSync(filePath);
162+
} catch (error) {
163+
// Clean up even if test fails
164+
fs.unlinkSync(filePath);
165+
throw error;
166+
}
167+
});
115168
});
116169

117170
describe("streamCSV", function () {

0 commit comments

Comments
 (0)