|
1 |
| -/* auto-generated on 2024-11-14 14:52:31 -0500. Do not edit! */ |
| 1 | +/* auto-generated on 2024-11-21 10:33:28 -0500. Do not edit! */ |
2 | 2 | /* begin file src/simdutf.cpp */
|
3 | 3 | #include "simdutf.h"
|
4 | 4 | // We include base64_tables once.
|
@@ -23495,7 +23495,7 @@ size_t encode_base64(char *dst, const char *src, size_t srclen,
|
23495 | 23495 | }
|
23496 | 23496 |
|
23497 | 23497 | template <bool base64_url>
|
23498 |
| -static inline uint64_t to_base64_mask(block64 *b, bool *error) { |
| 23498 | +static inline uint64_t to_base64_mask(block64 *b, uint64_t *error) { |
23499 | 23499 | __m512i input = b->chunks[0];
|
23500 | 23500 | const __m512i ascii_space_tbl = _mm512_set_epi8(
|
23501 | 23501 | 0, 0, 13, 12, 0, 10, 9, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 13, 12, 0, 10,
|
@@ -23538,7 +23538,7 @@ static inline uint64_t to_base64_mask(block64 *b, bool *error) {
|
23538 | 23538 | if (mask) {
|
23539 | 23539 | const __mmask64 spaces = _mm512_cmpeq_epi8_mask(
|
23540 | 23540 | _mm512_shuffle_epi8(ascii_space_tbl, input), input);
|
23541 |
| - *error |= (mask != spaces); |
| 23541 | + *error = (mask ^ spaces); |
23542 | 23542 | }
|
23543 | 23543 | b->chunks[0] = translated;
|
23544 | 23544 |
|
@@ -23646,16 +23646,13 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen,
|
23646 | 23646 | block64 b;
|
23647 | 23647 | load_block(&b, src);
|
23648 | 23648 | src += 64;
|
23649 |
| - bool error = false; |
| 23649 | + uint64_t error = 0; |
23650 | 23650 | uint64_t badcharmask = to_base64_mask<base64_url>(&b, &error);
|
23651 | 23651 | if (error) {
|
23652 | 23652 | src -= 64;
|
23653 |
| - while (src < srcend && scalar::base64::is_eight_byte(*src) && |
23654 |
| - to_base64[uint8_t(*src)] <= 64) { |
23655 |
| - src++; |
23656 |
| - } |
23657 |
| - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), |
23658 |
| - size_t(dst - dstinit)}; |
| 23653 | + size_t error_offset = _tzcnt_u64(error); |
| 23654 | + return {error_code::INVALID_BASE64_CHARACTER, |
| 23655 | + size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; |
23659 | 23656 | }
|
23660 | 23657 | if (badcharmask != 0) {
|
23661 | 23658 | // optimization opportunity: check for simple masks like those made of
|
@@ -28240,7 +28237,7 @@ struct block64 {
|
28240 | 28237 | };
|
28241 | 28238 |
|
28242 | 28239 | template <bool base64_url>
|
28243 |
| -static inline uint32_t to_base64_mask(__m256i *src, bool *error) { |
| 28240 | +static inline uint32_t to_base64_mask(__m256i *src, uint32_t *error) { |
28244 | 28241 | const __m256i ascii_space_tbl =
|
28245 | 28242 | _mm256_setr_epi8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa,
|
28246 | 28243 | 0x0, 0xc, 0xd, 0x0, 0x0, 0x20, 0x0, 0x0, 0x0, 0x0, 0x0,
|
@@ -28324,17 +28321,19 @@ static inline uint32_t to_base64_mask(__m256i *src, bool *error) {
|
28324 | 28321 | if (mask) {
|
28325 | 28322 | __m256i ascii_space =
|
28326 | 28323 | _mm256_cmpeq_epi8(_mm256_shuffle_epi8(ascii_space_tbl, *src), *src);
|
28327 |
| - *error |= (mask != _mm256_movemask_epi8(ascii_space)); |
| 28324 | + *error = (mask ^ _mm256_movemask_epi8(ascii_space)); |
28328 | 28325 | }
|
28329 | 28326 | *src = out;
|
28330 | 28327 | return (uint32_t)mask;
|
28331 | 28328 | }
|
28332 | 28329 |
|
28333 | 28330 | template <bool base64_url>
|
28334 |
| -static inline uint64_t to_base64_mask(block64 *b, bool *error) { |
28335 |
| - *error = 0; |
28336 |
| - uint64_t m0 = to_base64_mask<base64_url>(&b->chunks[0], error); |
28337 |
| - uint64_t m1 = to_base64_mask<base64_url>(&b->chunks[1], error); |
| 28331 | +static inline uint64_t to_base64_mask(block64 *b, uint64_t *error) { |
| 28332 | + uint32_t err0 = 0; |
| 28333 | + uint32_t err1 = 0; |
| 28334 | + uint64_t m0 = to_base64_mask<base64_url>(&b->chunks[0], &err0); |
| 28335 | + uint64_t m1 = to_base64_mask<base64_url>(&b->chunks[1], &err1); |
| 28336 | + *error = err0 | ((uint64_t)err1 << 32); |
28338 | 28337 | return m0 | (m1 << 32);
|
28339 | 28338 | }
|
28340 | 28339 |
|
@@ -28466,16 +28465,13 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen,
|
28466 | 28465 | block64 b;
|
28467 | 28466 | load_block(&b, src);
|
28468 | 28467 | src += 64;
|
28469 |
| - bool error = false; |
| 28468 | + uint64_t error = 0; |
28470 | 28469 | uint64_t badcharmask = to_base64_mask<base64_url>(&b, &error);
|
28471 | 28470 | if (error) {
|
28472 | 28471 | src -= 64;
|
28473 |
| - while (src < srcend && scalar::base64::is_eight_byte(*src) && |
28474 |
| - to_base64[uint8_t(*src)] <= 64) { |
28475 |
| - src++; |
28476 |
| - } |
28477 |
| - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), |
28478 |
| - size_t(dst - dstinit)}; |
| 28472 | + size_t error_offset = _tzcnt_u64(error); |
| 28473 | + return {error_code::INVALID_BASE64_CHARACTER, |
| 28474 | + size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; |
28479 | 28475 | }
|
28480 | 28476 | if (badcharmask != 0) {
|
28481 | 28477 | // optimization opportunity: check for simple masks like those made of
|
@@ -37992,7 +37988,7 @@ struct block64 {
|
37992 | 37988 | };
|
37993 | 37989 |
|
37994 | 37990 | template <bool base64_url>
|
37995 |
| -static inline uint16_t to_base64_mask(__m128i *src, bool *error) { |
| 37991 | +static inline uint16_t to_base64_mask(__m128i *src, uint32_t *error) { |
37996 | 37992 | const __m128i ascii_space_tbl =
|
37997 | 37993 | _mm_setr_epi8(0x20, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x9, 0xa, 0x0,
|
37998 | 37994 | 0xc, 0xd, 0x0, 0x0);
|
@@ -38059,22 +38055,42 @@ static inline uint16_t to_base64_mask(__m128i *src, bool *error) {
|
38059 | 38055 | if (mask) {
|
38060 | 38056 | __m128i ascii_space =
|
38061 | 38057 | _mm_cmpeq_epi8(_mm_shuffle_epi8(ascii_space_tbl, *src), *src);
|
38062 |
| - *error |= (mask != _mm_movemask_epi8(ascii_space)); |
| 38058 | + *error = (mask ^ _mm_movemask_epi8(ascii_space)); |
38063 | 38059 | }
|
38064 | 38060 | *src = out;
|
38065 | 38061 | return (uint16_t)mask;
|
38066 | 38062 | }
|
38067 | 38063 |
|
38068 | 38064 | template <bool base64_url>
|
38069 |
| -static inline uint64_t to_base64_mask(block64 *b, bool *error) { |
38070 |
| - *error = 0; |
38071 |
| - uint64_t m0 = to_base64_mask<base64_url>(&b->chunks[0], error); |
38072 |
| - uint64_t m1 = to_base64_mask<base64_url>(&b->chunks[1], error); |
38073 |
| - uint64_t m2 = to_base64_mask<base64_url>(&b->chunks[2], error); |
38074 |
| - uint64_t m3 = to_base64_mask<base64_url>(&b->chunks[3], error); |
| 38065 | +static inline uint64_t to_base64_mask(block64 *b, uint64_t *error) { |
| 38066 | + uint32_t err0 = 0; |
| 38067 | + uint32_t err1 = 0; |
| 38068 | + uint32_t err2 = 0; |
| 38069 | + uint32_t err3 = 0; |
| 38070 | + uint64_t m0 = to_base64_mask<base64_url>(&b->chunks[0], &err0); |
| 38071 | + uint64_t m1 = to_base64_mask<base64_url>(&b->chunks[1], &err1); |
| 38072 | + uint64_t m2 = to_base64_mask<base64_url>(&b->chunks[2], &err2); |
| 38073 | + uint64_t m3 = to_base64_mask<base64_url>(&b->chunks[3], &err3); |
| 38074 | + *error = (err0) | ((uint64_t)err1 << 16) | ((uint64_t)err2 << 32) | |
| 38075 | + ((uint64_t)err3 << 48); |
38075 | 38076 | return m0 | (m1 << 16) | (m2 << 32) | (m3 << 48);
|
38076 | 38077 | }
|
38077 | 38078 |
|
| 38079 | +#if defined(_MSC_VER) && !defined(__clang__) |
| 38080 | +static inline size_t simdutf_tzcnt_u64(uint64_t num) { |
| 38081 | + unsigned long ret; |
| 38082 | + if (num == 0) { |
| 38083 | + return 64; |
| 38084 | + } |
| 38085 | + _BitScanForward64(&ret, num); |
| 38086 | + return ret; |
| 38087 | +} |
| 38088 | +#else // GCC or Clang |
| 38089 | +static inline size_t simdutf_tzcnt_u64(uint64_t num) { |
| 38090 | + return num ? __builtin_ctzll(num) : 64; |
| 38091 | +} |
| 38092 | +#endif |
| 38093 | + |
38078 | 38094 | static inline void copy_block(block64 *b, char *output) {
|
38079 | 38095 | _mm_storeu_si128(reinterpret_cast<__m128i *>(output), b->chunks[0]);
|
38080 | 38096 | _mm_storeu_si128(reinterpret_cast<__m128i *>(output + 16), b->chunks[1]);
|
@@ -38222,16 +38238,13 @@ compress_decode_base64(char *dst, const chartype *src, size_t srclen,
|
38222 | 38238 | block64 b;
|
38223 | 38239 | load_block(&b, src);
|
38224 | 38240 | src += 64;
|
38225 |
| - bool error = false; |
| 38241 | + uint64_t error = 0; |
38226 | 38242 | uint64_t badcharmask = to_base64_mask<base64_url>(&b, &error);
|
38227 | 38243 | if (error) {
|
38228 | 38244 | src -= 64;
|
38229 |
| - while (src < srcend && scalar::base64::is_eight_byte(*src) && |
38230 |
| - to_base64[uint8_t(*src)] <= 64) { |
38231 |
| - src++; |
38232 |
| - } |
38233 |
| - return {error_code::INVALID_BASE64_CHARACTER, size_t(src - srcinit), |
38234 |
| - size_t(dst - dstinit)}; |
| 38245 | + size_t error_offset = simdutf_tzcnt_u64(error); |
| 38246 | + return {error_code::INVALID_BASE64_CHARACTER, |
| 38247 | + size_t(src - srcinit + error_offset), size_t(dst - dstinit)}; |
38235 | 38248 | }
|
38236 | 38249 | if (badcharmask != 0) {
|
38237 | 38250 | // optimization opportunity: check for simple masks like those made of
|
|
0 commit comments