Skip to content

Commit 341770f

Browse files
committed
lib: improve normalize encoding performance
This focuses on the common case by making sure they are prioritized. It also changes some typeof checks to test for undefined since that is faster and it adds a benchmark. PR-URL: nodejs#18790 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com> Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
1 parent 876836b commit 341770f

File tree

4 files changed

+101
-33
lines changed

4 files changed

+101
-33
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
'use strict';
2+
3+
const common = require('../common.js');
4+
5+
const bench = common.createBenchmark(main, {
6+
encoding: [
7+
'ascii',
8+
'ASCII',
9+
'base64',
10+
'BASE64',
11+
'binary',
12+
'BINARY',
13+
'hex',
14+
'HEX',
15+
'latin1',
16+
'LATIN1',
17+
'ucs-2',
18+
'UCS-2',
19+
'ucs2',
20+
'UCS2',
21+
'utf-16le',
22+
'UTF-16LE',
23+
'utf-8',
24+
'UTF-8',
25+
'utf16le',
26+
'UTF16LE',
27+
'utf8',
28+
'UTF8'
29+
],
30+
n: [1e6]
31+
}, {
32+
flags: ['--expose-internals']
33+
});
34+
35+
function main({ encoding, n }) {
36+
const { normalizeEncoding } = require('internal/util');
37+
38+
bench.start();
39+
for (var i = 0; i < n; i++) {
40+
normalizeEncoding(encoding);
41+
}
42+
bench.end(n);
43+
}

lib/buffer.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ function assertSize(size) {
242242
err = new errors.RangeError('ERR_INVALID_OPT_VALUE', 'size', size);
243243
}
244244

245-
if (err) {
245+
if (err !== null) {
246246
Error.captureStackTrace(err, assertSize);
247247
throw err;
248248
}
@@ -428,7 +428,7 @@ Buffer.compare = function compare(a, b) {
428428

429429
Buffer.isEncoding = function isEncoding(encoding) {
430430
return typeof encoding === 'string' &&
431-
typeof normalizeEncoding(encoding) === 'string';
431+
normalizeEncoding(encoding) !== undefined;
432432
};
433433
Buffer[kIsEncodingSymbol] = Buffer.isEncoding;
434434

lib/internal/util.js

+50-27
Original file line numberDiff line numberDiff line change
@@ -96,36 +96,59 @@ function assertCrypto() {
9696
throw new errors.Error('ERR_NO_CRYPTO');
9797
}
9898

99-
// The loop should only run at most twice, retrying with lowercased enc
100-
// if there is no match in the first pass.
101-
// We use a loop instead of branching to retry with a helper
102-
// function in order to avoid the performance hit.
10399
// Return undefined if there is no match.
100+
// Move the "slow cases" to a separate function to make sure this function gets
101+
// inlined properly. That prioritizes the common case.
104102
function normalizeEncoding(enc) {
105-
if (enc == null || enc === '') return 'utf8';
106-
let retried;
107-
while (true) {
108-
switch (enc) {
109-
case 'utf8':
110-
case 'utf-8':
111-
return 'utf8';
112-
case 'ucs2':
113-
case 'ucs-2':
114-
case 'utf16le':
115-
case 'utf-16le':
103+
if (enc == null || enc === 'utf8' || enc === 'utf-8') return 'utf8';
104+
return slowCases(enc);
105+
}
106+
107+
function slowCases(enc) {
108+
switch (enc.length) {
109+
case 4:
110+
if (enc === 'UTF8') return 'utf8';
111+
if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le';
112+
enc = `${enc}`.toLowerCase();
113+
if (enc === 'utf8') return 'utf8';
114+
if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le';
115+
break;
116+
case 3:
117+
if (enc === 'hex' || enc === 'HEX' || `${enc}`.toLowerCase() === 'hex')
118+
return 'hex';
119+
break;
120+
case 5:
121+
if (enc === 'ascii') return 'ascii';
122+
if (enc === 'ucs-2') return 'utf16le';
123+
if (enc === 'UTF-8') return 'utf8';
124+
if (enc === 'ASCII') return 'ascii';
125+
if (enc === 'UCS-2') return 'utf16le';
126+
enc = `${enc}`.toLowerCase();
127+
if (enc === 'utf-8') return 'utf8';
128+
if (enc === 'ascii') return 'ascii';
129+
if (enc === 'usc-2') return 'utf16le';
130+
break;
131+
case 6:
132+
if (enc === 'base64') return 'base64';
133+
if (enc === 'latin1' || enc === 'binary') return 'latin1';
134+
if (enc === 'BASE64') return 'base64';
135+
if (enc === 'LATIN1' || enc === 'BINARY') return 'latin1';
136+
enc = `${enc}`.toLowerCase();
137+
if (enc === 'base64') return 'base64';
138+
if (enc === 'latin1' || enc === 'binary') return 'latin1';
139+
break;
140+
case 7:
141+
if (enc === 'utf16le' || enc === 'UTF16LE' ||
142+
`${enc}`.toLowerCase() === 'utf16le')
116143
return 'utf16le';
117-
case 'latin1':
118-
case 'binary':
119-
return 'latin1';
120-
case 'base64':
121-
case 'ascii':
122-
case 'hex':
123-
return enc;
124-
default:
125-
if (retried) return; // undefined
126-
enc = ('' + enc).toLowerCase();
127-
retried = true;
128-
}
144+
break;
145+
case 8:
146+
if (enc === 'utf-16le' || enc === 'UTF-16LE' ||
147+
`${enc}`.toLowerCase() === 'utf-16le')
148+
return 'utf16le';
149+
break;
150+
default:
151+
if (enc === '') return 'utf8';
129152
}
130153
}
131154

lib/string_decoder.js

+6-4
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,12 @@ const kNativeDecoder = Symbol('kNativeDecoder');
4343
// modules monkey-patch it to support additional encodings
4444
function normalizeEncoding(enc) {
4545
const nenc = internalUtil.normalizeEncoding(enc);
46-
if (typeof nenc !== 'string' &&
47-
(Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc)))
48-
throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc);
49-
return nenc || enc;
46+
if (nenc === undefined) {
47+
if (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc))
48+
throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc);
49+
return enc;
50+
}
51+
return nenc;
5052
}
5153

5254
const encodingsMap = {};

0 commit comments

Comments
 (0)