Skip to content

Commit 8ead9c6

Browse files
nodejs-github-botaduh95
authored andcommitted
deps: update simdutf to 5.7.2
PR-URL: #56388 Reviewed-By: Luigi Pinca <luigipinca@gmail.com> Reviewed-By: Rafael Gonzaga <rafael.nunu@hotmail.com> Reviewed-By: James M Snell <jasnell@gmail.com>
1 parent 7e5ea06 commit 8ead9c6

File tree

2 files changed

+55
-30
lines changed

2 files changed

+55
-30
lines changed

deps/simdutf/simdutf.cpp

+52-27
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2024-12-17 14:54:59 -0500. Do not edit! */
1+
/* auto-generated on 2024-12-26 12:42:33 -0500. Do not edit! */
22
/* begin file src/simdutf.cpp */
33
#include "simdutf.h"
44
// We include base64_tables once.
@@ -697,6 +697,15 @@ static_assert(to_base64_url_value[uint8_t('_')] == 63,
697697
#include <climits>
698698
#include <type_traits>
699699

700+
static_assert(sizeof(uint8_t) == sizeof(char),
701+
"simdutf requires that uint8_t be a char");
702+
static_assert(sizeof(uint16_t) == sizeof(char16_t),
703+
"simdutf requires that char16_t be 16 bits");
704+
static_assert(sizeof(uint32_t) == sizeof(char32_t),
705+
"simdutf requires that char32_t be 32 bits");
706+
// next line is redundant, but it is kept to catch defective systems.
707+
static_assert(CHAR_BIT == 8, "simdutf requires 8-bit bytes");
708+
700709
// Useful for debugging purposes
701710
namespace simdutf {
702711
namespace {
@@ -9746,24 +9755,23 @@ inline simdutf_warn_unused uint16_t swap_bytes(const uint16_t word) {
97469755
}
97479756

97489757
template <endianness big_endian>
9749-
inline simdutf_warn_unused bool validate(const char16_t *buf,
9758+
inline simdutf_warn_unused bool validate(const char16_t *data,
97509759
size_t len) noexcept {
9751-
const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
97529760
uint64_t pos = 0;
97539761
while (pos < len) {
9754-
uint16_t word =
9762+
char16_t word =
97559763
!match_system(big_endian) ? swap_bytes(data[pos]) : data[pos];
97569764
if ((word & 0xF800) == 0xD800) {
97579765
if (pos + 1 >= len) {
97589766
return false;
97599767
}
9760-
uint16_t diff = uint16_t(word - 0xD800);
9768+
char16_t diff = char16_t(word - 0xD800);
97619769
if (diff > 0x3FF) {
97629770
return false;
97639771
}
9764-
uint16_t next_word =
9772+
char16_t next_word =
97659773
!match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1];
9766-
uint16_t diff2 = uint16_t(next_word - 0xDC00);
9774+
char16_t diff2 = char16_t(next_word - 0xDC00);
97679775
if (diff2 > 0x3FF) {
97689776
return false;
97699777
}
@@ -9776,24 +9784,23 @@ inline simdutf_warn_unused bool validate(const char16_t *buf,
97769784
}
97779785

97789786
template <endianness big_endian>
9779-
inline simdutf_warn_unused result validate_with_errors(const char16_t *buf,
9787+
inline simdutf_warn_unused result validate_with_errors(const char16_t *data,
97809788
size_t len) noexcept {
9781-
const uint16_t *data = reinterpret_cast<const uint16_t *>(buf);
97829789
size_t pos = 0;
97839790
while (pos < len) {
9784-
uint16_t word =
9791+
char16_t word =
97859792
!match_system(big_endian) ? swap_bytes(data[pos]) : data[pos];
97869793
if ((word & 0xF800) == 0xD800) {
97879794
if (pos + 1 >= len) {
97889795
return result(error_code::SURROGATE, pos);
97899796
}
9790-
uint16_t diff = uint16_t(word - 0xD800);
9797+
char16_t diff = char16_t(word - 0xD800);
97919798
if (diff > 0x3FF) {
97929799
return result(error_code::SURROGATE, pos);
97939800
}
9794-
uint16_t next_word =
9801+
char16_t next_word =
97959802
!match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1];
9796-
uint16_t diff2 = uint16_t(next_word - 0xDC00);
9803+
char16_t diff2 = uint16_t(next_word - 0xDC00);
97979804
if (diff2 > 0x3FF) {
97989805
return result(error_code::SURROGATE, pos);
97999806
}
@@ -9806,24 +9813,22 @@ inline simdutf_warn_unused result validate_with_errors(const char16_t *buf,
98069813
}
98079814

98089815
template <endianness big_endian>
9809-
inline size_t count_code_points(const char16_t *buf, size_t len) {
9816+
inline size_t count_code_points(const char16_t *p, size_t len) {
98109817
// We are not BOM aware.
9811-
const uint16_t *p = reinterpret_cast<const uint16_t *>(buf);
98129818
size_t counter{0};
98139819
for (size_t i = 0; i < len; i++) {
9814-
uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
9820+
char16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
98159821
counter += ((word & 0xFC00) != 0xDC00);
98169822
}
98179823
return counter;
98189824
}
98199825

98209826
template <endianness big_endian>
9821-
inline size_t utf8_length_from_utf16(const char16_t *buf, size_t len) {
9827+
inline size_t utf8_length_from_utf16(const char16_t *p, size_t len) {
98229828
// We are not BOM aware.
9823-
const uint16_t *p = reinterpret_cast<const uint16_t *>(buf);
98249829
size_t counter{0};
98259830
for (size_t i = 0; i < len; i++) {
9826-
uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
9831+
char16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
98279832
counter++; // ASCII
98289833
counter += static_cast<size_t>(
98299834
word >
@@ -9835,25 +9840,22 @@ inline size_t utf8_length_from_utf16(const char16_t *buf, size_t len) {
98359840
}
98369841

98379842
template <endianness big_endian>
9838-
inline size_t utf32_length_from_utf16(const char16_t *buf, size_t len) {
9843+
inline size_t utf32_length_from_utf16(const char16_t *p, size_t len) {
98399844
// We are not BOM aware.
9840-
const uint16_t *p = reinterpret_cast<const uint16_t *>(buf);
98419845
size_t counter{0};
98429846
for (size_t i = 0; i < len; i++) {
9843-
uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
9847+
char16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
98449848
counter += ((word & 0xFC00) != 0xDC00);
98459849
}
98469850
return counter;
98479851
}
98489852

98499853
inline size_t latin1_length_from_utf16(size_t len) { return len; }
98509854

9851-
simdutf_really_inline void change_endianness_utf16(const char16_t *in,
9852-
size_t size, char16_t *out) {
9853-
const uint16_t *input = reinterpret_cast<const uint16_t *>(in);
9854-
uint16_t *output = reinterpret_cast<uint16_t *>(out);
9855+
simdutf_really_inline void
9856+
change_endianness_utf16(const char16_t *input, size_t size, char16_t *output) {
98559857
for (size_t i = 0; i < size; i++) {
9856-
*output++ = uint16_t(input[i] >> 8 | input[i] << 8);
9858+
*output++ = char16_t(input[i] >> 8 | input[i] << 8);
98579859
}
98589860
}
98599861

@@ -21042,6 +21044,9 @@ struct validating_transcoder {
2104221044
uint64_t utf8_continuation_mask =
2104321045
input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in
2104421046
// this case, we also have ASCII to account for.
21047+
if (utf8_continuation_mask & 1) {
21048+
return 0; // error
21049+
}
2104521050
uint64_t utf8_leading_mask = ~utf8_continuation_mask;
2104621051
uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1;
2104721052
// We process in blocks of up to 12 bytes except possibly
@@ -26717,6 +26722,14 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen,
2671726722
}
2671826723

2671926724
if (!ignore_garbage && equalsigns > 0) {
26725+
if (last_chunk_options == last_chunk_handling_options::strict) {
26726+
return {BASE64_INPUT_REMAINDER, size_t(src - srcinit),
26727+
size_t(dst - dstinit)};
26728+
}
26729+
if (last_chunk_options ==
26730+
last_chunk_handling_options::stop_before_partial) {
26731+
return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)};
26732+
}
2672026733
if ((size_t(dst - dstinit) % 3 == 0) ||
2672126734
((size_t(dst - dstinit) % 3) + 1 + equalsigns != 4)) {
2672226735
return {INVALID_BASE64_CHARACTER, equallocation, size_t(dst - dstinit)};
@@ -33161,6 +33174,9 @@ struct validating_transcoder {
3316133174
uint64_t utf8_continuation_mask =
3316233175
input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in
3316333176
// this case, we also have ASCII to account for.
33177+
if (utf8_continuation_mask & 1) {
33178+
return 0; // error
33179+
}
3316433180
uint64_t utf8_leading_mask = ~utf8_continuation_mask;
3316533181
uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1;
3316633182
// We process in blocks of up to 12 bytes except possibly
@@ -43013,6 +43029,9 @@ struct validating_transcoder {
4301343029
uint64_t utf8_continuation_mask =
4301443030
input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in
4301543031
// this case, we also have ASCII to account for.
43032+
if (utf8_continuation_mask & 1) {
43033+
return 0; // error
43034+
}
4301643035
uint64_t utf8_leading_mask = ~utf8_continuation_mask;
4301743036
uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1;
4301843037
// We process in blocks of up to 12 bytes except possibly
@@ -48110,6 +48129,9 @@ struct validating_transcoder {
4811048129
uint64_t utf8_continuation_mask =
4811148130
input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in
4811248131
// this case, we also have ASCII to account for.
48132+
if (utf8_continuation_mask & 1) {
48133+
return 0; // error
48134+
}
4811348135
uint64_t utf8_leading_mask = ~utf8_continuation_mask;
4811448136
uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1;
4811548137
// We process in blocks of up to 12 bytes except possibly
@@ -54454,6 +54476,9 @@ struct validating_transcoder {
5445454476
uint64_t utf8_continuation_mask =
5445554477
input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in
5445654478
// this case, we also have ASCII to account for.
54479+
if (utf8_continuation_mask & 1) {
54480+
return 0; // error
54481+
}
5445754482
uint64_t utf8_leading_mask = ~utf8_continuation_mask;
5445854483
uint64_t utf8_end_of_code_point_mask = utf8_leading_mask >> 1;
5445954484
// We process in blocks of up to 12 bytes except possibly

deps/simdutf/simdutf.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2024-12-17 14:54:59 -0500. Do not edit! */
1+
/* auto-generated on 2024-12-26 12:42:33 -0500. Do not edit! */
22
/* begin file include/simdutf.h */
33
#ifndef SIMDUTF_H
44
#define SIMDUTF_H
@@ -675,7 +675,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS
675675
#define SIMDUTF_SIMDUTF_VERSION_H
676676

677677
/** The version of simdutf being used (major.minor.revision) */
678-
#define SIMDUTF_VERSION "5.7.0"
678+
#define SIMDUTF_VERSION "5.7.2"
679679

680680
namespace simdutf {
681681
enum {
@@ -690,7 +690,7 @@ enum {
690690
/**
691691
* The revision (major.minor.REVISION) of simdutf being used.
692692
*/
693-
SIMDUTF_VERSION_REVISION = 0
693+
SIMDUTF_VERSION_REVISION = 2
694694
};
695695
} // namespace simdutf
696696

0 commit comments

Comments
 (0)