Skip to content

Commit a135027

Browse files
nodejs-github-botmarco-ippolito
authored andcommitted
deps: update icu to 75.1
PR-URL: #52573 Reviewed-By: Richard Lau <rlau@redhat.com> Reviewed-By: Michaël Zasso <targos@protonmail.com> Reviewed-By: Mohammed Keyvanzadeh <mohammadkeyvanzade94@gmail.com> Reviewed-By: Luigi Pinca <luigipinca@gmail.com>
1 parent b49464b commit a135027

File tree

408 files changed

+25369
-12777
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

408 files changed

+25369
-12777
lines changed

deps/icu-small/LICENSE

+3-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ UNICODE LICENSE V3
22

33
COPYRIGHT AND PERMISSION NOTICE
44

5-
Copyright © 2016-2023 Unicode, Inc.
5+
Copyright © 2016-2024 Unicode, Inc.
66

77
NOTICE TO USER: Carefully read the following legal agreement. BY
88
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
@@ -38,6 +38,8 @@ not be used in advertising or otherwise to promote the sale, use or other
3838
dealings in these Data Files or Software without prior written
3939
authorization of the copyright holder.
4040

41+
SPDX-License-Identifier: Unicode-3.0
42+
4143
----------------------------------------------------------------------
4244

4345
Third-Party Software Licenses

deps/icu-small/README-FULL-ICU.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
ICU sources - auto generated by shrink-icu-src.py
22

33
This directory contains the ICU subset used by --with-intl=full-icu
4-
It is a strict subset of ICU 74 source files with the following exception(s):
5-
* deps/icu-small/source/data/in/icudt74l.dat.bz2 : compressed data file
4+
It is a strict subset of ICU 75 source files with the following exception(s):
5+
* deps/icu-small/source/data/in/icudt75l.dat.bz2 : compressed data file
66

77

88
To rebuild this directory, see ../../tools/icu/README.md

deps/icu-small/source/common/brkeng.cpp

+2-4
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,11 @@ UnhandledEngine::handleCharacter(UChar32 c) {
114114
*/
115115

116116
ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
117-
fEngines = 0;
117+
fEngines = nullptr;
118118
}
119119

120120
ICULanguageBreakFactory::~ICULanguageBreakFactory() {
121-
if (fEngines != 0) {
122-
delete fEngines;
123-
}
121+
delete fEngines;
124122
}
125123

126124
void ICULanguageBreakFactory::ensureEngines(UErrorCode& status) {

deps/icu-small/source/common/brkiter.cpp

+3-6
Original file line numberDiff line numberDiff line change
@@ -438,17 +438,14 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
438438
UTRACE_ENTRY(UTRACE_UBRK_CREATE_LINE);
439439
uprv_strcpy(lb_lw, "line");
440440
UErrorCode kvStatus = U_ZERO_ERROR;
441-
CharString value;
442-
CharStringByteSink valueSink(&value);
443-
loc.getKeywordValue("lb", valueSink, kvStatus);
441+
auto value = loc.getKeywordValue<CharString>("lb", kvStatus);
444442
if (U_SUCCESS(kvStatus) && (value == "strict" || value == "normal" || value == "loose")) {
445443
uprv_strcat(lb_lw, "_");
446444
uprv_strcat(lb_lw, value.data());
447445
}
448446
// lw=phrase is only supported in Japanese and Korean
449447
if (uprv_strcmp(loc.getLanguage(), "ja") == 0 || uprv_strcmp(loc.getLanguage(), "ko") == 0) {
450-
value.clear();
451-
loc.getKeywordValue("lw", valueSink, kvStatus);
448+
value = loc.getKeywordValue<CharString>("lw", kvStatus);
452449
if (U_SUCCESS(kvStatus) && value == "phrase") {
453450
uprv_strcat(lb_lw, "_");
454451
uprv_strcat(lb_lw, value.data());
@@ -500,7 +497,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status)
500497
Locale
501498
BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const {
502499
if (type == ULOC_REQUESTED_LOCALE) {
503-
return Locale(requestLocale);
500+
return {requestLocale};
504501
}
505502
U_LOCALE_BASED(locBased, *this);
506503
return locBased.getLocale(type, status);

deps/icu-small/source/common/bytesinkutil.h

+90-22
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,52 @@
77
#ifndef BYTESINKUTIL_H
88
#define BYTESINKUTIL_H
99

10+
#include <type_traits>
11+
1012
#include "unicode/utypes.h"
1113
#include "unicode/bytestream.h"
1214
#include "unicode/edits.h"
15+
#include "charstr.h"
1316
#include "cmemory.h"
1417
#include "uassert.h"
18+
#include "ustr_imp.h"
1519

1620
U_NAMESPACE_BEGIN
1721

1822
class ByteSink;
19-
class CharString;
2023
class Edits;
2124

25+
class U_COMMON_API CharStringByteSink : public ByteSink {
26+
public:
27+
CharStringByteSink(CharString* dest);
28+
~CharStringByteSink() override;
29+
30+
CharStringByteSink() = delete;
31+
CharStringByteSink(const CharStringByteSink&) = delete;
32+
CharStringByteSink& operator=(const CharStringByteSink&) = delete;
33+
34+
void Append(const char* bytes, int32_t n) override;
35+
36+
char* GetAppendBuffer(int32_t min_capacity,
37+
int32_t desired_capacity_hint,
38+
char* scratch,
39+
int32_t scratch_capacity,
40+
int32_t* result_capacity) override;
41+
42+
private:
43+
CharString& dest_;
44+
};
45+
46+
// CharString doesn't provide the public API that StringByteSink requires a
47+
// string class to have so this template specialization replaces the default
48+
// implementation of StringByteSink<CharString> with CharStringByteSink.
49+
template<>
50+
class StringByteSink<CharString> : public CharStringByteSink {
51+
public:
52+
StringByteSink(CharString* dest) : CharStringByteSink(dest) { }
53+
StringByteSink(CharString* dest, int32_t /*initialAppendCapacity*/) : CharStringByteSink(dest) { }
54+
};
55+
2256
class U_COMMON_API ByteSinkUtil {
2357
public:
2458
ByteSinkUtil() = delete; // all static
@@ -57,30 +91,64 @@ class U_COMMON_API ByteSinkUtil {
5791
ByteSink &sink, uint32_t options, Edits *edits,
5892
UErrorCode &errorCode);
5993

60-
private:
61-
static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
62-
ByteSink &sink, uint32_t options, Edits *edits);
63-
};
64-
65-
class U_COMMON_API CharStringByteSink : public ByteSink {
66-
public:
67-
CharStringByteSink(CharString* dest);
68-
~CharStringByteSink() override;
69-
70-
CharStringByteSink() = delete;
71-
CharStringByteSink(const CharStringByteSink&) = delete;
72-
CharStringByteSink& operator=(const CharStringByteSink&) = delete;
73-
74-
void Append(const char* bytes, int32_t n) override;
94+
/**
95+
* Calls a lambda that writes to a ByteSink with a CheckedArrayByteSink
96+
* and then returns through u_terminateChars(), in order to implement
97+
* the classic ICU4C C API writing to a fix sized buffer on top of a
98+
* contemporary C++ API.
99+
*
100+
* @param buffer receiving buffer
101+
* @param capacity capacity of receiving buffer
102+
* @param lambda that gets called with the sink as an argument
103+
* @param status set to U_BUFFER_OVERFLOW_ERROR on overflow
104+
* @return number of bytes written, or needed (in case of overflow)
105+
* @internal
106+
*/
107+
template <typename F,
108+
typename = std::enable_if_t<
109+
std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>>
110+
static int32_t viaByteSinkToTerminatedChars(char* buffer, int32_t capacity,
111+
F&& lambda,
112+
UErrorCode& status) {
113+
if (U_FAILURE(status)) { return 0; }
114+
CheckedArrayByteSink sink(buffer, capacity);
115+
lambda(sink, status);
116+
if (U_FAILURE(status)) { return 0; }
117+
118+
int32_t reslen = sink.NumberOfBytesAppended();
119+
120+
if (sink.Overflowed()) {
121+
status = U_BUFFER_OVERFLOW_ERROR;
122+
return reslen;
123+
}
124+
125+
return u_terminateChars(buffer, capacity, reslen, &status);
126+
}
75127

76-
char* GetAppendBuffer(int32_t min_capacity,
77-
int32_t desired_capacity_hint,
78-
char* scratch,
79-
int32_t scratch_capacity,
80-
int32_t* result_capacity) override;
128+
/**
129+
* Calls a lambda that writes to a ByteSink with a CharStringByteSink and
130+
* then returns a CharString, in order to implement a contemporary C++ API
131+
* on top of a C/C++ compatibility ByteSink API.
132+
*
133+
* @param lambda that gets called with the sink as an argument
134+
* @param status to check and report
135+
* @return the resulting string, or an empty string (in case of error)
136+
* @internal
137+
*/
138+
template <typename F,
139+
typename = std::enable_if_t<
140+
std::is_invocable_r_v<void, F, ByteSink&, UErrorCode&>>>
141+
static CharString viaByteSinkToCharString(F&& lambda, UErrorCode& status) {
142+
if (U_FAILURE(status)) { return {}; }
143+
CharString result;
144+
CharStringByteSink sink(&result);
145+
lambda(sink, status);
146+
return result;
147+
}
81148

82149
private:
83-
CharString& dest_;
150+
static void appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
151+
ByteSink &sink, uint32_t options, Edits *edits);
84152
};
85153

86154
U_NAMESPACE_END

deps/icu-small/source/common/caniter.cpp

+29-21
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ U_NAMESPACE_BEGIN
6464

6565
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator)
6666

67+
6768
/**
6869
*@param source string to get results for
6970
*/
@@ -73,10 +74,10 @@ CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode
7374
pieces_lengths(nullptr),
7475
current(nullptr),
7576
current_length(0),
76-
nfd(*Normalizer2::getNFDInstance(status)),
77-
nfcImpl(*Normalizer2Factory::getNFCImpl(status))
77+
nfd(Normalizer2::getNFDInstance(status)),
78+
nfcImpl(Normalizer2Factory::getNFCImpl(status))
7879
{
79-
if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) {
80+
if(U_SUCCESS(status) && nfcImpl->ensureCanonIterData(status)) {
8081
setSource(sourceStr, status);
8182
}
8283
}
@@ -172,7 +173,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
172173
int32_t i = 0;
173174
UnicodeString *list = nullptr;
174175

175-
nfd.normalize(newSource, source, status);
176+
nfd->normalize(newSource, source, status);
176177
if(U_FAILURE(status)) {
177178
return;
178179
}
@@ -194,7 +195,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
194195
current[0] = 0;
195196
pieces[0] = new UnicodeString[1];
196197
pieces_lengths[0] = 1;
197-
if (pieces[0] == 0) {
198+
if (pieces[0] == nullptr) {
198199
status = U_MEMORY_ALLOCATION_ERROR;
199200
goto CleanPartialInitialization;
200201
}
@@ -203,7 +204,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
203204

204205

205206
list = new UnicodeString[source.length()];
206-
if (list == 0) {
207+
if (list == nullptr) {
207208
status = U_MEMORY_ALLOCATION_ERROR;
208209
goto CleanPartialInitialization;
209210
}
@@ -219,7 +220,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
219220
// on the NFD form - see above).
220221
for (; i < source.length(); i += U16_LENGTH(cp)) {
221222
cp = source.char32At(i);
222-
if (nfcImpl.isCanonSegmentStarter(cp)) {
223+
if (nfcImpl->isCanonSegmentStarter(cp)) {
223224
source.extract(start, i-start, list[list_length++]); // add up to i
224225
start = i;
225226
}
@@ -252,9 +253,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
252253
return;
253254
// Common section to cleanup all local variables and reset object variables.
254255
CleanPartialInitialization:
255-
if (list != nullptr) {
256-
delete[] list;
257-
}
256+
delete[] list;
258257
cleanPieces();
259258
}
260259

@@ -264,10 +263,19 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
264263
* @param source the string to find permutations for
265264
* @return the results in a set.
266265
*/
267-
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
266+
void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth) {
268267
if(U_FAILURE(status)) {
269268
return;
270269
}
270+
// To avoid infinity loop caused by permute, we limit the depth of recursive
271+
// call to permute and return U_UNSUPPORTED_ERROR.
272+
// We know in some unit test we need at least 4. Set to 8 just in case some
273+
// unforseen use cases.
274+
constexpr int32_t kPermuteDepthLimit = 8;
275+
if (depth > kPermuteDepthLimit) {
276+
status = U_UNSUPPORTED_ERROR;
277+
return;
278+
}
271279
//if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
272280
int32_t i = 0;
273281

@@ -277,7 +285,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
277285
if (source.length() <= 2 && source.countChar32() <= 1) {
278286
UnicodeString *toPut = new UnicodeString(source);
279287
/* test for nullptr */
280-
if (toPut == 0) {
288+
if (toPut == nullptr) {
281289
status = U_MEMORY_ALLOCATION_ERROR;
282290
return;
283291
}
@@ -311,7 +319,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
311319

312320
// see what the permutations of the characters before and after this one are
313321
//Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
314-
permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status);
322+
permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status, depth+1);
315323
/* Test for buffer overflows */
316324
if(U_FAILURE(status)) {
317325
return;
@@ -346,7 +354,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
346354
Hashtable permutations(status);
347355
Hashtable basic(status);
348356
if (U_FAILURE(status)) {
349-
return 0;
357+
return nullptr;
350358
}
351359
result.setValueDeleter(uprv_deleteUObject);
352360
permutations.setValueDeleter(uprv_deleteUObject);
@@ -381,7 +389,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
381389
//UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer)));
382390
UnicodeString possible(*((UnicodeString *)(ne2->value.pointer)));
383391
UnicodeString attempt;
384-
nfd.normalize(possible, attempt, status);
392+
nfd->normalize(possible, attempt, status);
385393

386394
// TODO: check if operator == is semanticaly the same as attempt.equals(segment)
387395
if (attempt==segment) {
@@ -399,15 +407,15 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
399407

400408
/* Test for buffer overflows */
401409
if(U_FAILURE(status)) {
402-
return 0;
410+
return nullptr;
403411
}
404412
// convert into a String[] to clean up storage
405413
//String[] finalResult = new String[result.size()];
406414
UnicodeString *finalResult = nullptr;
407415
int32_t resultCount;
408416
if((resultCount = result.count()) != 0) {
409417
finalResult = new UnicodeString[resultCount];
410-
if (finalResult == 0) {
418+
if (finalResult == nullptr) {
411419
status = U_MEMORY_ALLOCATION_ERROR;
412420
return nullptr;
413421
}
@@ -448,7 +456,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const cha
448456
for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) {
449457
// see if any character is at the start of some decomposition
450458
U16_GET(segment, 0, i, segLen, cp);
451-
if (!nfcImpl.getCanonStartSet(cp, starts)) {
459+
if (!nfcImpl->getCanonStartSet(cp, starts)) {
452460
continue;
453461
}
454462
// if so, see which decompositions match
@@ -471,7 +479,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const cha
471479
UnicodeString item = *((UnicodeString *)(ne->value.pointer));
472480
UnicodeString *toAdd = new UnicodeString(prefix);
473481
/* test for nullptr */
474-
if (toAdd == 0) {
482+
if (toAdd == nullptr) {
475483
status = U_MEMORY_ALLOCATION_ERROR;
476484
return nullptr;
477485
}
@@ -509,7 +517,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
509517
UnicodeString temp(comp);
510518
int32_t inputLen=temp.length();
511519
UnicodeString decompString;
512-
nfd.normalize(temp, decompString, status);
520+
nfd->normalize(temp, decompString, status);
513521
if (U_FAILURE(status)) {
514522
return nullptr;
515523
}
@@ -573,7 +581,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
573581
// brute force approach
574582
// check to make sure result is canonically equivalent
575583
UnicodeString trial;
576-
nfd.normalize(temp, trial, status);
584+
nfd->normalize(temp, trial, status);
577585
if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) {
578586
return nullptr;
579587
}

0 commit comments

Comments
 (0)