Skip to content

Commit 67fb765

Browse files
BridgeARdanielleadams
authored andcommitted
fs: improve promise based readFile performance for big files
This significantly reduces the peak memory for the promise based readFile operation by reusing a single memory chunk after each read and strinigifying that chunk immediately. Signed-off-by: Ruben Bridgewater <ruben@bridgewater.de> PR-URL: #44295 Reviewed-By: Matteo Collina <matteo.collina@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com>
1 parent 9c7e664 commit 67fb765

File tree

4 files changed

+82
-37
lines changed

4 files changed

+82
-37
lines changed

benchmark/fs/readfile-promises.js

+8-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,14 @@ const filename = path.resolve(tmpdir.path,
1616
const bench = common.createBenchmark(main, {
1717
duration: [5],
1818
encoding: ['', 'utf-8'],
19-
len: [1024, 16 * 1024 * 1024],
19+
len: [
20+
1024,
21+
512 * 1024,
22+
4 * 1024 ** 2,
23+
8 * 1024 ** 2,
24+
16 * 1024 ** 2,
25+
32 * 1024 ** 2,
26+
],
2027
concurrent: [1, 10]
2128
});
2229

lib/fs.js

+5
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,9 @@ function readFileAfterStat(err, stats) {
339339
if (err)
340340
return context.close(err);
341341

342+
// TODO(BridgeAR): Check if allocating a smaller chunk is better performance
343+
// wise, similar to the promise based version (less peak memory and chunked
344+
// stringify operations vs multiple C++/JS boundary crossings).
342345
const size = context.size = isFileType(stats, S_IFREG) ? stats[8] : 0;
343346

344347
if (size > kIoMaxLength) {
@@ -348,6 +351,8 @@ function readFileAfterStat(err, stats) {
348351

349352
try {
350353
if (size === 0) {
354+
// TODO(BridgeAR): If an encoding is set, use the StringDecoder to concat
355+
// the result and reuse the buffer instead of allocating a new one.
351356
context.buffers = [];
352357
} else {
353358
context.buffer = Buffer.allocUnsafeSlow(size);

lib/internal/fs/promises.js

+54-33
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ const {
8686
promisify,
8787
} = require('internal/util');
8888
const { EventEmitterMixin } = require('internal/event_target');
89+
const { StringDecoder } = require('string_decoder');
8990
const { watch } = require('internal/fs/watchers');
9091
const { isIterable } = require('internal/streams/utils');
9192
const assert = require('internal/assert');
@@ -416,63 +417,83 @@ async function writeFileHandle(filehandle, data, signal, encoding) {
416417

417418
async function readFileHandle(filehandle, options) {
418419
const signal = options?.signal;
420+
const encoding = options?.encoding;
421+
const decoder = encoding && new StringDecoder(encoding);
419422

420423
checkAborted(signal);
421424

422425
const statFields = await binding.fstat(filehandle.fd, false, kUsePromises);
423426

424427
checkAborted(signal);
425428

426-
let size;
429+
let size = 0;
430+
let length = 0;
427431
if ((statFields[1/* mode */] & S_IFMT) === S_IFREG) {
428432
size = statFields[8/* size */];
429-
} else {
430-
size = 0;
433+
length = encoding ? MathMin(size, kReadFileBufferLength) : size;
434+
}
435+
if (length === 0) {
436+
length = kReadFileUnknownBufferLength;
431437
}
432438

433439
if (size > kIoMaxLength)
434440
throw new ERR_FS_FILE_TOO_LARGE(size);
435441

436-
let endOfFile = false;
437442
let totalRead = 0;
438-
const noSize = size === 0;
439-
const buffers = [];
440-
const fullBuffer = noSize ? undefined : Buffer.allocUnsafeSlow(size);
441-
do {
443+
let buffer = Buffer.allocUnsafeSlow(length);
444+
let result = '';
445+
let offset = 0;
446+
let buffers;
447+
const chunkedRead = length > kReadFileBufferLength;
448+
449+
while (true) {
442450
checkAborted(signal);
443-
let buffer;
444-
let offset;
445-
let length;
446-
if (noSize) {
447-
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength);
448-
offset = 0;
449-
length = kReadFileUnknownBufferLength;
450-
} else {
451-
buffer = fullBuffer;
452-
offset = totalRead;
451+
452+
if (chunkedRead) {
453453
length = MathMin(size - totalRead, kReadFileBufferLength);
454454
}
455455

456456
const bytesRead = (await binding.read(filehandle.fd, buffer, offset,
457-
length, -1, kUsePromises)) || 0;
457+
length, -1, kUsePromises)) ?? 0;
458458
totalRead += bytesRead;
459-
endOfFile = bytesRead === 0 || totalRead === size;
460-
if (noSize && bytesRead > 0) {
461-
const isBufferFull = bytesRead === kReadFileUnknownBufferLength;
462-
const chunkBuffer = isBufferFull ? buffer : buffer.slice(0, bytesRead);
463-
ArrayPrototypePush(buffers, chunkBuffer);
459+
460+
if (bytesRead === 0 ||
461+
totalRead === size ||
462+
(bytesRead !== buffer.length && !chunkedRead)) {
463+
const singleRead = bytesRead === totalRead;
464+
465+
const bytesToCheck = chunkedRead ? totalRead : bytesRead;
466+
467+
if (bytesToCheck !== buffer.length) {
468+
buffer = buffer.subarray(0, bytesToCheck);
469+
}
470+
471+
if (!encoding) {
472+
if (size === 0 && !singleRead) {
473+
ArrayPrototypePush(buffers, buffer);
474+
return Buffer.concat(buffers, totalRead);
475+
}
476+
return buffer;
477+
}
478+
479+
if (singleRead) {
480+
return buffer.toString(encoding);
481+
}
482+
result += decoder.end(buffer);
483+
return result;
464484
}
465-
} while (!endOfFile);
466485

467-
let result;
468-
if (size > 0) {
469-
result = totalRead === size ? fullBuffer : fullBuffer.slice(0, totalRead);
470-
} else {
471-
result = buffers.length === 1 ? buffers[0] : Buffer.concat(buffers,
472-
totalRead);
486+
if (encoding) {
487+
result += decoder.write(buffer);
488+
} else if (size !== 0) {
489+
offset = totalRead;
490+
} else {
491+
buffers ??= [];
492+
// Unknown file size requires chunks.
493+
ArrayPrototypePush(buffers, buffer);
494+
buffer = Buffer.allocUnsafeSlow(kReadFileUnknownBufferLength);
495+
}
473496
}
474-
475-
return options.encoding ? result.toString(options.encoding) : result;
476497
}
477498

478499
// All of the functions are defined as async in order to ensure that errors

test/parallel/test-fs-promises-readfile.js

+15-3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
// Flags: --expose-internals
12
'use strict';
23

34
const common = require('../common');
@@ -6,15 +7,15 @@ const assert = require('assert');
67
const path = require('path');
78
const { writeFile, readFile } = require('fs').promises;
89
const tmpdir = require('../common/tmpdir');
10+
const { internalBinding } = require('internal/test/binding');
11+
const fsBinding = internalBinding('fs');
912
tmpdir.refresh();
1013

1114
const fn = path.join(tmpdir.path, 'large-file');
1215

1316
// Creating large buffer with random content
1417
const largeBuffer = Buffer.from(
15-
Array.apply(null, { length: 16834 * 2 })
16-
.map(Math.random)
17-
.map((number) => (number * (1 << 8)))
18+
Array.from({ length: 1024 ** 2 + 19 }, (_, index) => index)
1819
);
1920

2021
async function createLargeFile() {
@@ -69,11 +70,22 @@ async function validateWrongSignalParam() {
6970

7071
}
7172

73+
async function validateZeroByteLiar() {
74+
const originalFStat = fsBinding.fstat;
75+
fsBinding.fstat = common.mustCall(
76+
() => (/* stat fields */ [0, 1, 2, 3, 4, 5, 6, 7, 0 /* size */])
77+
);
78+
const readBuffer = await readFile(fn);
79+
assert.strictEqual(readBuffer.toString(), largeBuffer.toString());
80+
fsBinding.fstat = originalFStat;
81+
}
82+
7283
(async () => {
7384
await createLargeFile();
7485
await validateReadFile();
7586
await validateReadFileProc();
7687
await validateReadFileAbortLogicBefore();
7788
await validateReadFileAbortLogicDuring();
7889
await validateWrongSignalParam();
90+
await validateZeroByteLiar();
7991
})().then(common.mustCall());

0 commit comments

Comments
 (0)