lib: improve normalize encoding performance

BridgeAR · BridgeAR · commit 341770fedf77 · 2018-03-02T02:09:25.000Z
This focuses on the common case by making sure they are prioritized. It also changes some typeof checks to test for undefined since that is faster and it adds a benchmark. PR-URL: nodejs#18790 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com> Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
diff --git a/benchmark/buffers/buffer-normalize-encoding.js b/benchmark/buffers/buffer-normalize-encoding.js
@@ -0,0 +1,43 @@
+'use strict';
+
+const common = require('../common.js');
+
+const bench = common.createBenchmark(main, {
+  encoding: [
+    'ascii',
+    'ASCII',
+    'base64',
+    'BASE64',
+    'binary',
+    'BINARY',
+    'hex',
+    'HEX',
+    'latin1',
+    'LATIN1',
+    'ucs-2',
+    'UCS-2',
+    'ucs2',
+    'UCS2',
+    'utf-16le',
+    'UTF-16LE',
+    'utf-8',
+    'UTF-8',
+    'utf16le',
+    'UTF16LE',
+    'utf8',
+    'UTF8'
+  ],
+  n: [1e6]
+}, {
+  flags: ['--expose-internals']
+});
+
+function main({ encoding, n }) {
+  const { normalizeEncoding } = require('internal/util');
+
+  bench.start();
+  for (var i = 0; i < n; i++) {
+    normalizeEncoding(encoding);
+  }
+  bench.end(n);
+}
diff --git a/lib/buffer.js b/lib/buffer.js
@@ -242,7 +242,7 @@ function assertSize(size) {
     err = new errors.RangeError('ERR_INVALID_OPT_VALUE', 'size', size);
   }
 
-  if (err) {
+  if (err !== null) {
     Error.captureStackTrace(err, assertSize);
     throw err;
   }
@@ -428,7 +428,7 @@ Buffer.compare = function compare(a, b) {
 
 Buffer.isEncoding = function isEncoding(encoding) {
   return typeof encoding === 'string' &&
-         typeof normalizeEncoding(encoding) === 'string';
+         normalizeEncoding(encoding) !== undefined;
 };
 Buffer[kIsEncodingSymbol] = Buffer.isEncoding;
 
diff --git a/lib/internal/util.js b/lib/internal/util.js
@@ -96,36 +96,59 @@ function assertCrypto() {
     throw new errors.Error('ERR_NO_CRYPTO');
 }
 
-// The loop should only run at most twice, retrying with lowercased enc
-// if there is no match in the first pass.
-// We use a loop instead of branching to retry with a helper
-// function in order to avoid the performance hit.
 // Return undefined if there is no match.
+// Move the "slow cases" to a separate function to make sure this function gets
+// inlined properly. That prioritizes the common case.
 function normalizeEncoding(enc) {
-  if (enc == null || enc === '') return 'utf8';
-  let retried;
-  while (true) {
-    switch (enc) {
-      case 'utf8':
-      case 'utf-8':
-        return 'utf8';
-      case 'ucs2':
-      case 'ucs-2':
-      case 'utf16le':
-      case 'utf-16le':
+  if (enc == null || enc === 'utf8' || enc === 'utf-8') return 'utf8';
+  return slowCases(enc);
+}
+
+function slowCases(enc) {
+  switch (enc.length) {
+    case 4:
+      if (enc === 'UTF8') return 'utf8';
+      if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le';
+      enc = `${enc}`.toLowerCase();
+      if (enc === 'utf8') return 'utf8';
+      if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le';
+      break;
+    case 3:
+      if (enc === 'hex' || enc === 'HEX' || `${enc}`.toLowerCase() === 'hex')
+        return 'hex';
+      break;
+    case 5:
+      if (enc === 'ascii') return 'ascii';
+      if (enc === 'ucs-2') return 'utf16le';
+      if (enc === 'UTF-8') return 'utf8';
+      if (enc === 'ASCII') return 'ascii';
+      if (enc === 'UCS-2') return 'utf16le';
+      enc = `${enc}`.toLowerCase();
+      if (enc === 'utf-8') return 'utf8';
+      if (enc === 'ascii') return 'ascii';
+      if (enc === 'usc-2') return 'utf16le';
+      break;
+    case 6:
+      if (enc === 'base64') return 'base64';
+      if (enc === 'latin1' || enc === 'binary') return 'latin1';
+      if (enc === 'BASE64') return 'base64';
+      if (enc === 'LATIN1' || enc === 'BINARY') return 'latin1';
+      enc = `${enc}`.toLowerCase();
+      if (enc === 'base64') return 'base64';
+      if (enc === 'latin1' || enc === 'binary') return 'latin1';
+      break;
+    case 7:
+      if (enc === 'utf16le' || enc === 'UTF16LE' ||
+        `${enc}`.toLowerCase() === 'utf16le')
         return 'utf16le';
-      case 'latin1':
-      case 'binary':
-        return 'latin1';
-      case 'base64':
-      case 'ascii':
-      case 'hex':
-        return enc;
-      default:
-        if (retried) return; // undefined
-        enc = ('' + enc).toLowerCase();
-        retried = true;
-    }
+      break;
+    case 8:
+      if (enc === 'utf-16le' || enc === 'UTF-16LE' ||
+        `${enc}`.toLowerCase() === 'utf-16le')
+        return 'utf16le';
+      break;
+    default:
+      if (enc === '') return 'utf8';
   }
 }
 
diff --git a/lib/string_decoder.js b/lib/string_decoder.js
@@ -43,10 +43,12 @@ const kNativeDecoder = Symbol('kNativeDecoder');
 // modules monkey-patch it to support additional encodings
 function normalizeEncoding(enc) {
   const nenc = internalUtil.normalizeEncoding(enc);
-  if (typeof nenc !== 'string' &&
-      (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc)))
-    throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc);
-  return nenc || enc;
+  if (nenc === undefined) {
+    if (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc))
+      throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc);
+    return enc;
+  }
+  return nenc;
 }
 
 const encodingsMap = {};

Original file line number	Diff line number	Diff line change
`@@ -242,7 +242,7 @@ function assertSize(size) {`
`242`	`242`	`err = new errors.RangeError('ERR_INVALID_OPT_VALUE', 'size', size);`
`243`	`243`	`}`
`244`	`244`
`245`		`- if (err) {`
	`245`	`+ if (err !== null) {`
`246`	`246`	`Error.captureStackTrace(err, assertSize);`
`247`	`247`	`throw err;`
`248`	`248`	`}`
`@@ -428,7 +428,7 @@ Buffer.compare = function compare(a, b) {`
`428`	`428`
`429`	`429`	`Buffer.isEncoding = function isEncoding(encoding) {`
`430`	`430`	`return typeof encoding === 'string' &&`
`431`		`- typeof normalizeEncoding(encoding) === 'string';`
	`431`	`+ normalizeEncoding(encoding) !== undefined;`
`432`	`432`	`};`
`433`	`433`	`Buffer[kIsEncodingSymbol] = Buffer.isEncoding;`
`434`	`434`