Skip to content

Commit 744bbd9

Browse files
committed
fs: improve promise based readFile performance for big files
This significantly reduces the peak memory for the promise based readFile operation by reusing a single memory chunk after each read and strinigifying that chunk immediately. Signed-off-by: Ruben Bridgewater <[email protected]>
1 parent 5e57d24 commit 744bbd9

File tree

3 files changed

+62
-35
lines changed

3 files changed

+62
-35
lines changed

benchmark/fs/readfile-promises.js

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,19 @@ const filename = path.resolve(tmpdir.path,
1515

1616
const bench = common.createBenchmark(main, {
1717
duration: [5],
18-
len: [1024, 16 * 1024 * 1024],
18+
len: [
19+
1024,
20+
512 * 1024,
21+
4 * 1024 ** 2,
22+
8 * 1024 ** 2,
23+
16 * 1024 ** 2,
24+
32 * 1024 ** 2
25+
],
26+
encoding: ['', 'utf8'],
1927
concurrent: [1, 10]
2028
});
2129

22-
function main({ len, duration, concurrent }) {
30+
function main({ len, duration, encoding, concurrent }) {
2331
try {
2432
fs.unlinkSync(filename);
2533
} catch {
@@ -44,7 +52,7 @@ function main({ len, duration, concurrent }) {
4452
}, duration * 1000);
4553

4654
function read() {
47-
fs.promises.readFile(filename)
55+
fs.promises.readFile(filename, { encoding })
4856
.then((res) => afterRead(undefined, res))
4957
.catch((err) => afterRead(err));
5058
}

lib/fs.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,9 @@ function readFileAfterStat(err, stats) {
343343
if (err)
344344
return context.close(err);
345345

346+
// TODO(BridgeAR): Check if allocating a smaller chunk is better performance
347+
// wise, similar to the promise based version (less peak memory and chunked
348+
// stringify operations vs multiple C++/JS boundary crossings).
346349
const size = context.size = isFileType(stats, S_IFREG) ? stats[8] : 0;
347350

348351
if (size > kIoMaxLength) {
@@ -352,6 +355,9 @@ function readFileAfterStat(err, stats) {
352355

353356
try {
354357
if (size === 0) {
358+
// TODO(BridgeAR): We are able to optimize this in case an encoding is used. If
359+
// that's the case, let's use the StringDecoder and directly concat the
360+
// result and to reuse the former chunk instead of allocating a new one.
355361
context.buffers = [];
356362
} else {
357363
context.buffer = Buffer.allocUnsafeSlow(size);

lib/internal/fs/promises.js

Lines changed: 45 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ const {
8686
promisify,
8787
} = require('internal/util');
8888
const { EventEmitterMixin } = require('internal/event_target');
89+
const { StringDecoder } = require('string_decoder');
8990
const { watch } = require('internal/fs/watchers');
9091
const { isIterable } = require('internal/streams/utils');
9192
const assert = require('internal/assert');
@@ -416,63 +417,75 @@ async function writeFileHandle(filehandle, data, signal, encoding) {
416417

417418
async function readFileHandle(filehandle, options) {
418419
const signal = options?.signal;
420+
const encoding = options?.encoding;
421+
const decoder = encoding && new StringDecoder(encoding);
419422

420423
checkAborted(signal);
421424

422425
const statFields = await binding.fstat(filehandle.fd, false, kUsePromises);
423426

424427
checkAborted(signal);
425428

426-
let size;
429+
let size = 0;
430+
let length = 0;
427431
if ((statFields[1/* mode */] & S_IFMT) === S_IFREG) {
428432
size = statFields[8/* size */];
433+
length = encoding ? MathMin(size, kReadFileBufferLength) : size;
429434
} else {
430-
size = 0;
435+
length = kReadFileUnknownBufferLength;
431436
}
432437

433438
if (size > kIoMaxLength)
434439
throw new ERR_FS_FILE_TOO_LARGE(size);
435440

436-
let endOfFile = false;
437441
let totalRead = 0;
438-
const noSize = size === 0;
439-
const buffers = [];
440-
const fullBuffer = noSize ? undefined : Buffer.allocUnsafeSlow(size);
441-
do {
442+
let buffer = Buffer.allocUnsafeSlow(length);
443+
let result = '';
444+
let isBufferFull = true;
445+
let offset = 0;
446+
let buffers;
447+
448+
while (true) {
442449
checkAborted(signal);
443-
let buffer;
444-
let offset;
445-
let length;
446-
if (noSize) {
447-
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength);
448-
offset = 0;
449-
length = kReadFileUnknownBufferLength;
450-
} else {
451-
buffer = fullBuffer;
452-
offset = totalRead;
450+
if (size === 0) {
453451
length = MathMin(size - totalRead, kReadFileBufferLength);
454452
}
455453

456454
const bytesRead = (await binding.read(filehandle.fd, buffer, offset,
457-
length, -1, kUsePromises)) || 0;
455+
length, -1, kUsePromises)) ?? 0;
458456
totalRead += bytesRead;
459-
endOfFile = bytesRead === 0 || totalRead === size;
460-
if (noSize && bytesRead > 0) {
461-
const isBufferFull = bytesRead === kReadFileUnknownBufferLength;
462-
const chunkBuffer = isBufferFull ? buffer : buffer.slice(0, bytesRead);
463-
ArrayPrototypePush(buffers, chunkBuffer);
457+
458+
if (bytesRead === 0 || totalRead === size) {
459+
const singleRead = bytesRead === totalRead;
460+
if (!encoding) {
461+
if (size === 0 && !singleRead) {
462+
return Buffer.concat(buffers, totalRead);
463+
}
464+
return buffer
465+
}
466+
467+
if (singleRead) {
468+
return buffer.toString(encoding);
469+
}
470+
result += decoder.end(buffer.slice(0, bytesRead));
471+
return result;
464472
}
465-
} while (!endOfFile);
466473

467-
let result;
468-
if (size > 0) {
469-
result = totalRead === size ? fullBuffer : fullBuffer.slice(0, totalRead);
470-
} else {
471-
result = buffers.length === 1 ? buffers[0] : Buffer.concat(buffers,
472-
totalRead);
474+
if (size === 0) {
475+
isBufferFull = bytesRead === kReadFileUnknownBufferLength;
476+
// Unknown file size requires chunks.
477+
if (!encoding) {
478+
buffers ??= [];
479+
ArrayPrototypePush(buffers, buffer);
480+
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength)
481+
}
482+
}
483+
if (encoding) {
484+
result += decoder.write(isBufferFull ? buffer : buffer.slice(0, bytesRead));
485+
} else if(size !== 0) {
486+
offset += bytesRead;
487+
}
473488
}
474-
475-
return options.encoding ? result.toString(options.encoding) : result;
476489
}
477490

478491
// All of the functions are defined as async in order to ensure that errors

0 commit comments

Comments
 (0)