@@ -64,6 +64,7 @@ U_NAMESPACE_BEGIN
64
64
65
65
UOBJECT_DEFINE_RTTI_IMPLEMENTATION (CanonicalIterator)
66
66
67
+
67
68
/* *
68
69
*@param source string to get results for
69
70
*/
@@ -73,10 +74,10 @@ CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode
73
74
pieces_lengths(nullptr ),
74
75
current(nullptr ),
75
76
current_length(0 ),
76
- nfd(* Normalizer2::getNFDInstance (status)),
77
- nfcImpl(* Normalizer2Factory::getNFCImpl (status))
77
+ nfd(Normalizer2::getNFDInstance(status)),
78
+ nfcImpl(Normalizer2Factory::getNFCImpl(status))
78
79
{
79
- if (U_SUCCESS (status) && nfcImpl. ensureCanonIterData (status)) {
80
+ if (U_SUCCESS (status) && nfcImpl-> ensureCanonIterData (status)) {
80
81
setSource (sourceStr, status);
81
82
}
82
83
}
@@ -172,7 +173,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
172
173
int32_t i = 0 ;
173
174
UnicodeString *list = nullptr ;
174
175
175
- nfd. normalize (newSource, source, status);
176
+ nfd-> normalize (newSource, source, status);
176
177
if (U_FAILURE (status)) {
177
178
return ;
178
179
}
@@ -194,7 +195,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
194
195
current[0 ] = 0 ;
195
196
pieces[0 ] = new UnicodeString[1 ];
196
197
pieces_lengths[0 ] = 1 ;
197
- if (pieces[0 ] == 0 ) {
198
+ if (pieces[0 ] == nullptr ) {
198
199
status = U_MEMORY_ALLOCATION_ERROR;
199
200
goto CleanPartialInitialization;
200
201
}
@@ -203,7 +204,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
203
204
204
205
205
206
list = new UnicodeString[source.length ()];
206
- if (list == 0 ) {
207
+ if (list == nullptr ) {
207
208
status = U_MEMORY_ALLOCATION_ERROR;
208
209
goto CleanPartialInitialization;
209
210
}
@@ -219,7 +220,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
219
220
// on the NFD form - see above).
220
221
for (; i < source.length (); i += U16_LENGTH (cp)) {
221
222
cp = source.char32At (i);
222
- if (nfcImpl. isCanonSegmentStarter (cp)) {
223
+ if (nfcImpl-> isCanonSegmentStarter (cp)) {
223
224
source.extract (start, i-start, list[list_length++]); // add up to i
224
225
start = i;
225
226
}
@@ -252,9 +253,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
252
253
return ;
253
254
// Common section to cleanup all local variables and reset object variables.
254
255
CleanPartialInitialization:
255
- if (list != nullptr ) {
256
- delete[] list;
257
- }
256
+ delete[] list;
258
257
cleanPieces ();
259
258
}
260
259
@@ -264,10 +263,19 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
264
263
* @param source the string to find permutations for
265
264
* @return the results in a set.
266
265
*/
267
- void U_EXPORT2 CanonicalIterator::permute (UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
266
+ void U_EXPORT2 CanonicalIterator::permute (UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth ) {
268
267
if (U_FAILURE (status)) {
269
268
return ;
270
269
}
270
+ // To avoid infinity loop caused by permute, we limit the depth of recursive
271
+ // call to permute and return U_UNSUPPORTED_ERROR.
272
+ // We know in some unit test we need at least 4. Set to 8 just in case some
273
+ // unforseen use cases.
274
+ constexpr int32_t kPermuteDepthLimit = 8 ;
275
+ if (depth > kPermuteDepthLimit ) {
276
+ status = U_UNSUPPORTED_ERROR;
277
+ return ;
278
+ }
271
279
// if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
272
280
int32_t i = 0 ;
273
281
@@ -277,7 +285,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
277
285
if (source.length () <= 2 && source.countChar32 () <= 1 ) {
278
286
UnicodeString *toPut = new UnicodeString (source);
279
287
/* test for nullptr */
280
- if (toPut == 0 ) {
288
+ if (toPut == nullptr ) {
281
289
status = U_MEMORY_ALLOCATION_ERROR;
282
290
return ;
283
291
}
@@ -311,7 +319,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
311
319
312
320
// see what the permutations of the characters before and after this one are
313
321
// Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
314
- permute (subPermuteString.remove (i, U16_LENGTH (cp)), skipZeros, &subpermute, status);
322
+ permute (subPermuteString.remove (i, U16_LENGTH (cp)), skipZeros, &subpermute, status, depth+ 1 );
315
323
/* Test for buffer overflows */
316
324
if (U_FAILURE (status)) {
317
325
return ;
@@ -346,7 +354,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
346
354
Hashtable permutations (status);
347
355
Hashtable basic (status);
348
356
if (U_FAILURE (status)) {
349
- return 0 ;
357
+ return nullptr ;
350
358
}
351
359
result.setValueDeleter (uprv_deleteUObject);
352
360
permutations.setValueDeleter (uprv_deleteUObject);
@@ -381,7 +389,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
381
389
// UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer)));
382
390
UnicodeString possible (*((UnicodeString *)(ne2->value .pointer )));
383
391
UnicodeString attempt;
384
- nfd. normalize (possible, attempt, status);
392
+ nfd-> normalize (possible, attempt, status);
385
393
386
394
// TODO: check if operator == is semanticaly the same as attempt.equals(segment)
387
395
if (attempt==segment) {
@@ -399,15 +407,15 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
399
407
400
408
/* Test for buffer overflows */
401
409
if (U_FAILURE (status)) {
402
- return 0 ;
410
+ return nullptr ;
403
411
}
404
412
// convert into a String[] to clean up storage
405
413
// String[] finalResult = new String[result.size()];
406
414
UnicodeString *finalResult = nullptr ;
407
415
int32_t resultCount;
408
416
if ((resultCount = result.count ()) != 0 ) {
409
417
finalResult = new UnicodeString[resultCount];
410
- if (finalResult == 0 ) {
418
+ if (finalResult == nullptr ) {
411
419
status = U_MEMORY_ALLOCATION_ERROR;
412
420
return nullptr ;
413
421
}
@@ -448,7 +456,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const cha
448
456
for (int32_t i = 0 ; i < segLen; i += U16_LENGTH (cp)) {
449
457
// see if any character is at the start of some decomposition
450
458
U16_GET (segment, 0 , i, segLen, cp);
451
- if (!nfcImpl. getCanonStartSet (cp, starts)) {
459
+ if (!nfcImpl-> getCanonStartSet (cp, starts)) {
452
460
continue ;
453
461
}
454
462
// if so, see which decompositions match
@@ -471,7 +479,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const cha
471
479
UnicodeString item = *((UnicodeString *)(ne->value .pointer ));
472
480
UnicodeString *toAdd = new UnicodeString (prefix);
473
481
/* test for nullptr */
474
- if (toAdd == 0 ) {
482
+ if (toAdd == nullptr ) {
475
483
status = U_MEMORY_ALLOCATION_ERROR;
476
484
return nullptr ;
477
485
}
@@ -509,7 +517,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
509
517
UnicodeString temp (comp);
510
518
int32_t inputLen=temp.length ();
511
519
UnicodeString decompString;
512
- nfd. normalize (temp, decompString, status);
520
+ nfd-> normalize (temp, decompString, status);
513
521
if (U_FAILURE (status)) {
514
522
return nullptr ;
515
523
}
@@ -573,7 +581,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
573
581
// brute force approach
574
582
// check to make sure result is canonically equivalent
575
583
UnicodeString trial;
576
- nfd. normalize (temp, trial, status);
584
+ nfd-> normalize (temp, trial, status);
577
585
if (U_FAILURE (status) || trial.compare (segment+segmentPos, segLen - segmentPos) != 0 ) {
578
586
return nullptr ;
579
587
}
0 commit comments