Skip to content

Commit 163a5a6

Browse files
committedJan 14, 2024
crypto: implement crypto.hash()
This patch introduces a helper crypto.hash() that computes a digest from the input at one shot. This can be 1.2-1.6x faster than the object-based createHash() for smaller inputs (<= 5MB) that are readily available (not streamed) and incur less memory overhead since no intermediate objects will be created.
1 parent c931b91 commit 163a5a6

File tree

9 files changed

+244
-8
lines changed

9 files changed

+244
-8
lines changed
 

‎benchmark/crypto/oneshot-hash.js

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
'use strict';
2+
3+
const common = require('../common.js');
4+
const { createHash, hash } = require('crypto');
5+
const path = require('path');
6+
const filepath = path.resolve(__dirname, '../../test/fixtures/snapshot/typescript.js');
7+
const fs = require('fs');
8+
const assert = require('assert');
9+
10+
const bench = common.createBenchmark(main, {
11+
length: [1000, 100_000],
12+
method: ['md5', 'sha1', 'sha256'],
13+
type: ['string', 'buffer'],
14+
n: [100_000, 1000],
15+
}, {
16+
combinationFilter: ({ length, n }) => {
17+
return length * n <= 100_000 * 1000;
18+
},
19+
});
20+
21+
function main({ length, type, method, n }) {
22+
let data = fs.readFileSync(filepath);
23+
if (type === 'string') {
24+
data = data.toString().slice(0, length);
25+
} else {
26+
data = Uint8Array.prototype.slice.call(data, 0, length);
27+
}
28+
29+
const oneshotHash = hash ?
30+
(method, input) => hash(method, input, 'hex') :
31+
(method, input) => createHash(method).update(input).digest('hex');
32+
const array = [];
33+
for (let i = 0; i < n; i++) {
34+
array.push(null);
35+
}
36+
bench.start();
37+
for (let i = 0; i < n; i++) {
38+
array[i] = oneshotHash(method, data);
39+
}
40+
bench.end(n);
41+
assert.strictEqual(typeof array[n - 1], 'string');
42+
}

‎doc/api/crypto.md

+59
Original file line numberDiff line numberDiff line change
@@ -3510,6 +3510,65 @@ Computes the Diffie-Hellman secret based on a `privateKey` and a `publicKey`.
35103510
Both keys must have the same `asymmetricKeyType`, which must be one of `'dh'`
35113511
(for Diffie-Hellman), `'ec'` (for ECDH), `'x448'`, or `'x25519'` (for ECDH-ES).
35123512

3513+
### `crypto.hash(algorith, data[, outputEncoding])`
3514+
3515+
<!-- YAML
3516+
added:
3517+
- REPLACEME
3518+
-->
3519+
3520+
* `algorithm` {string|undefined}
3521+
* `data` {string|ArrayBuffer|Buffer|TypedArray|DataView} When `data` is a string,
3522+
it will be encoded as UTF-8 before being hashed. If a different input encoding
3523+
is desired for a string input, user could encode the string into a TypedArray
3524+
using either `TextEncoder` or `Buffer.from()` and passing the encoded
3525+
TypedArray into this API instead.
3526+
* `outputEncoding` {string|undefined} [Encoding][encoding] used to encode the
3527+
returned digest. **Default:** `'hex'`.
3528+
* Returns: {string|Buffer}
3529+
3530+
A utility for creating one-shot hash digests of data. It can be faster than
3531+
the object-based `crypto.createHash()` when hashing a smaller amount of data
3532+
(<= 5MB) that's readily available. If the data can be big or if it is streamed,
3533+
it's still recommended to use `crypto.createHash()` instead.
3534+
3535+
The `algorithm` is dependent on the available algorithms supported by the
3536+
version of OpenSSL on the platform. Examples are `'sha256'`, `'sha512'`, etc.
3537+
On recent releases of OpenSSL, `openssl list -digest-algorithms` will
3538+
display the available digest algorithms.
3539+
3540+
Example:
3541+
3542+
```cjs
3543+
const crypto = require('node:crypto');
3544+
3545+
// Hashing a string and return the result as a hex-encoded string.
3546+
const string = 'Node.js';
3547+
// 10b3493287f831e81a438811a1ffba01f8cec4b7
3548+
console.log(crypto.hash('sha1', string));
3549+
3550+
// Encode a base64-encoded string into a Buffer, hash it and return
3551+
// the result as a buffer.
3552+
const base64 = 'Tm9kZS5qcw==';
3553+
// <Buffer 10 b3 49 32 87 f8 31 e8 1a 43 88 11 a1 ff ba 01 f8 ce c4 b7>
3554+
console.log(crypto.hash('sha1', Buffer.from(base64, 'base64'), 'buffer'));
3555+
```
3556+
3557+
```mjs
3558+
import crypto from 'node:crypto';
3559+
3560+
// Hashing a string and return the result as a hex-encoded string.
3561+
const string = 'Node.js';
3562+
// 10b3493287f831e81a438811a1ffba01f8cec4b7
3563+
console.log(crypto.hash('sha1', string));
3564+
3565+
// Encode a base64-encoded string into a Buffer, hash it and return
3566+
// the result as a buffer.
3567+
const base64 = 'Tm9kZS5qcw==';
3568+
// <Buffer 10 b3 49 32 87 f8 31 e8 1a 43 88 11 a1 ff ba 01 f8 ce c4 b7>
3569+
console.log(crypto.hash('sha1', Buffer.from(base64, 'base64'), 'buffer'));
3570+
```
3571+
35133572
### `crypto.generateKey(type, options, callback)`
35143573

35153574
<!-- YAML

‎lib/crypto.js

+2
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ const {
107107
const {
108108
Hash,
109109
Hmac,
110+
hash,
110111
} = require('internal/crypto/hash');
111112
const {
112113
X509Certificate,
@@ -219,6 +220,7 @@ module.exports = {
219220
getFips,
220221
setFips,
221222
verify: verifyOneShot,
223+
hash,
222224

223225
// Classes
224226
Certificate,

‎lib/internal/crypto/hash.js

+19
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ const {
1111
HashJob,
1212
Hmac: _Hmac,
1313
kCryptoJobAsync,
14+
oneShotDigest,
1415
} = internalBinding('crypto');
1516

1617
const {
@@ -29,6 +30,8 @@ const {
2930

3031
const {
3132
lazyDOMException,
33+
normalizeEncoding,
34+
encodingsMap,
3235
} = require('internal/util');
3336

3437
const {
@@ -47,6 +50,7 @@ const {
4750
validateEncoding,
4851
validateString,
4952
validateUint32,
53+
validateBuffer,
5054
} = require('internal/validators');
5155

5256
const {
@@ -188,8 +192,23 @@ async function asyncDigest(algorithm, data) {
188192
throw lazyDOMException('Unrecognized algorithm name', 'NotSupportedError');
189193
}
190194

195+
function hash(algorithm, input, outputEncoding = 'hex') {
196+
validateString(algorithm, 'algorithm');
197+
if (typeof input !== 'string') {
198+
validateBuffer(input, 'input');
199+
}
200+
// Fast case: if it's 'hex', we don't need to validate it further.
201+
if (outputEncoding !== 'hex') {
202+
validateString(outputEncoding);
203+
outputEncoding = normalizeEncoding(outputEncoding) || outputEncoding;
204+
}
205+
return oneShotDigest(algorithm, getCachedHashId(algorithm), getHashCache(),
206+
input, outputEncoding, encodingsMap[outputEncoding]);
207+
}
208+
191209
module.exports = {
192210
Hash,
193211
Hmac,
194212
asyncDigest,
213+
hash,
195214
};

‎src/api/encoding.cc

+10
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,16 @@ enum encoding ParseEncoding(const char* encoding,
109109
return default_encoding;
110110
}
111111

112+
enum encoding ParseEncoding(Isolate* isolate,
113+
Local<Value> encoding_v,
114+
Local<Value> encoding_id,
115+
enum encoding default_encoding) {
116+
if (encoding_id->IsUint32()) {
117+
return static_cast<enum encoding>(encoding_id.As<v8::Uint32>()->Value());
118+
}
119+
120+
return ParseEncoding(isolate, encoding_v, default_encoding);
121+
}
112122

113123
enum encoding ParseEncoding(Isolate* isolate,
114124
Local<Value> encoding_v,

‎src/crypto/crypto_hash.cc

+79-8
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ using v8::MaybeLocal;
2323
using v8::Name;
2424
using v8::Nothing;
2525
using v8::Object;
26+
using v8::String;
2627
using v8::Uint32;
2728
using v8::Value;
2829

@@ -202,6 +203,71 @@ const EVP_MD* GetDigestImplementation(Environment* env,
202203
#endif
203204
}
204205

206+
// crypto.digest(algorithm, algorithmId, algorithmCache,
207+
// input, outputEncoding, outputEncodingId)
208+
void Hash::OneShotDigest(const FunctionCallbackInfo<Value>& args) {
209+
Environment* env = Environment::GetCurrent(args);
210+
Isolate* isolate = env->isolate();
211+
CHECK_EQ(args.Length(), 6);
212+
CHECK(args[0]->IsString()); // algorithm
213+
CHECK(args[1]->IsInt32()); // algorithmId
214+
CHECK(args[2]->IsObject()); // algorithmCache
215+
CHECK(args[3]->IsString() || args[3]->IsArrayBufferView()); // input
216+
CHECK(args[4]->IsString()); // outputEncoding
217+
CHECK(args[5]->IsUint32() || args[5]->IsUndefined()); // outputEncodingId
218+
219+
const EVP_MD* md = GetDigestImplementation(env, args[0], args[1], args[2]);
220+
if (md == nullptr) {
221+
Utf8Value method(isolate, args[0]);
222+
std::string message =
223+
"Digest method " + method.ToString() + " is not supported";
224+
return ThrowCryptoError(env, ERR_get_error(), message.c_str());
225+
}
226+
227+
enum encoding output_enc = ParseEncoding(isolate, args[4], args[5], HEX);
228+
229+
int md_len = EVP_MD_size(md);
230+
unsigned int result_size;
231+
ByteSource::Builder output(md_len);
232+
int success;
233+
// On smaller inputs, EVP_Digest() can be slower than the
234+
// deprecated helpers e.g SHA256_XXX. The speedup may not
235+
// be worth using deprecated APIs, however, so we use
236+
// EVP_Digest(), unless there's a better alternative
237+
// in the future.
238+
// https://github.com/openssl/openssl/issues/19612
239+
if (args[3]->IsString()) {
240+
Utf8Value utf8(isolate, args[3]);
241+
success = EVP_Digest(utf8.out(),
242+
utf8.length(),
243+
output.data<unsigned char>(),
244+
&result_size,
245+
md,
246+
nullptr);
247+
} else {
248+
ArrayBufferViewContents<unsigned char> input(args[3]);
249+
success = EVP_Digest(input.data(),
250+
input.length(),
251+
output.data<unsigned char>(),
252+
&result_size,
253+
md,
254+
nullptr);
255+
}
256+
if (!success) {
257+
return ThrowCryptoError(env, ERR_get_error());
258+
}
259+
260+
Local<Value> error;
261+
MaybeLocal<Value> rc = StringBytes::Encode(
262+
env->isolate(), output.data<char>(), md_len, output_enc, &error);
263+
if (rc.IsEmpty()) {
264+
CHECK(!error.IsEmpty());
265+
env->isolate()->ThrowException(error);
266+
return;
267+
}
268+
args.GetReturnValue().Set(rc.FromMaybe(Local<Value>()));
269+
}
270+
205271
void Hash::Initialize(Environment* env, Local<Object> target) {
206272
Isolate* isolate = env->isolate();
207273
Local<Context> context = env->context();
@@ -216,6 +282,7 @@ void Hash::Initialize(Environment* env, Local<Object> target) {
216282

217283
SetMethodNoSideEffect(context, target, "getHashes", GetHashes);
218284
SetMethodNoSideEffect(context, target, "getCachedAliases", GetCachedAliases);
285+
SetMethodNoSideEffect(context, target, "oneShotDigest", OneShotDigest);
219286

220287
HashJob::Initialize(env, target);
221288

@@ -229,6 +296,7 @@ void Hash::RegisterExternalReferences(ExternalReferenceRegistry* registry) {
229296
registry->Register(HashDigest);
230297
registry->Register(GetHashes);
231298
registry->Register(GetCachedAliases);
299+
registry->Register(OneShotDigest);
232300

233301
HashJob::RegisterExternalReferences(registry);
234302

@@ -294,14 +362,17 @@ bool Hash::HashUpdate(const char* data, size_t len) {
294362
}
295363

296364
void Hash::HashUpdate(const FunctionCallbackInfo<Value>& args) {
297-
Decode<Hash>(args, [](Hash* hash, const FunctionCallbackInfo<Value>& args,
298-
const char* data, size_t size) {
299-
Environment* env = Environment::GetCurrent(args);
300-
if (UNLIKELY(size > INT_MAX))
301-
return THROW_ERR_OUT_OF_RANGE(env, "data is too long");
302-
bool r = hash->HashUpdate(data, size);
303-
args.GetReturnValue().Set(r);
304-
});
365+
Decode<Hash>(args,
366+
[](Hash* hash,
367+
const FunctionCallbackInfo<Value>& args,
368+
const char* data,
369+
size_t size) {
370+
Environment* env = Environment::GetCurrent(args);
371+
if (UNLIKELY(size > INT_MAX))
372+
return THROW_ERR_OUT_OF_RANGE(env, "data is too long");
373+
bool r = hash->HashUpdate(data, size);
374+
args.GetReturnValue().Set(r);
375+
});
305376
}
306377

307378
void Hash::HashDigest(const FunctionCallbackInfo<Value>& args) {

‎src/crypto/crypto_hash.h

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class Hash final : public BaseObject {
2626

2727
static void GetHashes(const v8::FunctionCallbackInfo<v8::Value>& args);
2828
static void GetCachedAliases(const v8::FunctionCallbackInfo<v8::Value>& args);
29+
static void OneShotDigest(const v8::FunctionCallbackInfo<v8::Value>& args);
2930

3031
protected:
3132
static void New(const v8::FunctionCallbackInfo<v8::Value>& args);

‎src/node_internals.h

+4
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,10 @@ v8::HeapProfiler::HeapSnapshotOptions GetHeapSnapshotOptions(
446446
v8::Local<v8::Value> options);
447447
} // namespace heap
448448

449+
enum encoding ParseEncoding(v8::Isolate* isolate,
450+
v8::Local<v8::Value> encoding_v,
451+
v8::Local<v8::Value> encoding_id,
452+
enum encoding default_encoding);
449453
} // namespace node
450454

451455
#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
'use strict';
2+
const common = require('../common');
3+
4+
if (!common.hasCrypto)
5+
common.skip('missing crypto');
6+
7+
const assert = require('assert');
8+
const crypto = require('crypto');
9+
const fixtures = require('../common/fixtures');
10+
const fs = require('fs');
11+
12+
const methods = crypto.getHashes();
13+
assert(methods.length > 0);
14+
15+
function test(input) {
16+
for (const method of methods) {
17+
for (const outputEncoding of ['buffer', 'hex', 'base64', undefined]) {
18+
const oldDigest = crypto.createHash(method).update(input).digest(outputEncoding || 'hex');
19+
const newDigest = crypto.hash(method, input, outputEncoding);
20+
assert.deepStrictEqual(newDigest, oldDigest,
21+
`different result from ${method} with encoding ${outputEncoding}`);
22+
}
23+
}
24+
}
25+
26+
const input = fs.readFileSync(fixtures.path('utf8_test_text.txt'));
27+
test(input);
28+
test(input.toString());

0 commit comments

Comments
 (0)