Skip to content

Commit 25fe3d4

Browse files
committedMay 2, 2024
Merge pull request #81639 from MJacred/is_letter
Add `is_valid_letter()` to `TextServer`
2 parents b9e0223 + 717513a commit 25fe3d4

11 files changed

+799
-0
lines changed
 

‎core/string/char_range.inc

+663
Large diffs are not rendered by default.

‎core/string/char_utils.h

+4
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@ static _FORCE_INLINE_ bool is_unicode_lower_case(char32_t c) {
7070
BSEARCH_CHAR_RANGE(lowercase_letter);
7171
}
7272

73+
static _FORCE_INLINE_ bool is_unicode_letter(char32_t c) {
74+
BSEARCH_CHAR_RANGE(unicode_letter);
75+
}
76+
7377
#undef BSEARCH_CHAR_RANGE
7478

7579
static _FORCE_INLINE_ bool is_ascii_upper_case(char32_t c) {

‎doc/classes/TextServer.xml

+7
Original file line numberDiff line numberDiff line change
@@ -1103,6 +1103,13 @@
11031103
- May contain Unicode characters of class XID_Continue in the other positions.
11041104
</description>
11051105
</method>
1106+
<method name="is_valid_letter" qualifiers="const">
1107+
<return type="bool" />
1108+
<param index="0" name="unicode" type="int" />
1109+
<description>
1110+
Returns [code]true[/code] if the given code point is a valid letter, i.e. it belongs to the Unicode category "L".
1111+
</description>
1112+
</method>
11061113
<method name="load_support_data">
11071114
<return type="bool" />
11081115
<param index="0" name="filename" type="String" />

‎doc/classes/TextServerExtension.xml

+6
Original file line numberDiff line numberDiff line change
@@ -1217,6 +1217,12 @@
12171217
Returns [code]true[/code] if [param string] is a valid identifier.
12181218
</description>
12191219
</method>
1220+
<method name="_is_valid_letter" qualifiers="virtual const">
1221+
<return type="bool" />
1222+
<param index="0" name="unicode" type="int" />
1223+
<description>
1224+
</description>
1225+
</method>
12201226
<method name="_load_support_data" qualifiers="virtual">
12211227
<return type="bool" />
12221228
<param index="0" name="filename" type="String" />

‎modules/text_server_adv/text_server_adv.cpp

+10
Original file line numberDiff line numberDiff line change
@@ -7342,6 +7342,16 @@ bool TextServerAdvanced::_is_valid_identifier(const String &p_string) const {
73427342
return true;
73437343
}
73447344

7345+
bool TextServerAdvanced::_is_valid_letter(char32_t p_unicode) const {
7346+
#ifndef ICU_STATIC_DATA
7347+
if (!icu_data_loaded) {
7348+
return TextServer::is_valid_letter(p_unicode);
7349+
}
7350+
#endif
7351+
7352+
return u_isalpha(p_unicode);
7353+
}
7354+
73457355
TextServerAdvanced::TextServerAdvanced() {
73467356
_insert_num_systems_lang();
73477357
_insert_feature_sets();

‎modules/text_server_adv/text_server_adv.h

+1
Original file line numberDiff line numberDiff line change
@@ -988,6 +988,7 @@ class TextServerAdvanced : public TextServerExtension {
988988

989989
MODBIND1RC(String, strip_diacritics, const String &);
990990
MODBIND1RC(bool, is_valid_identifier, const String &);
991+
MODBIND1RC(bool, is_valid_letter, char32_t);
991992

992993
MODBIND2RC(String, string_to_upper, const String &, const String &);
993994
MODBIND2RC(String, string_to_lower, const String &, const String &);

‎servers/text/text_server_extension.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@ void TextServerExtension::_bind_methods() {
332332

333333
GDVIRTUAL_BIND(_strip_diacritics, "string");
334334
GDVIRTUAL_BIND(_is_valid_identifier, "string");
335+
GDVIRTUAL_BIND(_is_valid_letter, "unicode");
335336

336337
GDVIRTUAL_BIND(_string_get_word_breaks, "string", "language", "chars_per_line");
337338
GDVIRTUAL_BIND(_string_get_character_breaks, "string", "language");
@@ -1492,6 +1493,14 @@ bool TextServerExtension::is_valid_identifier(const String &p_string) const {
14921493
return TextServer::is_valid_identifier(p_string);
14931494
}
14941495

1496+
bool TextServerExtension::is_valid_letter(char32_t p_unicode) const {
1497+
bool ret;
1498+
if (GDVIRTUAL_CALL(_is_valid_letter, p_unicode, ret)) {
1499+
return ret;
1500+
}
1501+
return TextServer::is_valid_letter(p_unicode);
1502+
}
1503+
14951504
String TextServerExtension::strip_diacritics(const String &p_string) const {
14961505
String ret;
14971506
if (GDVIRTUAL_CALL(_strip_diacritics, p_string, ret)) {

‎servers/text/text_server_extension.h

+2
Original file line numberDiff line numberDiff line change
@@ -563,6 +563,8 @@ class TextServerExtension : public TextServer {
563563

564564
virtual bool is_valid_identifier(const String &p_string) const override;
565565
GDVIRTUAL1RC(bool, _is_valid_identifier, const String &);
566+
virtual bool is_valid_letter(char32_t p_unicode) const override;
567+
GDVIRTUAL1RC(bool, _is_valid_letter, char32_t);
566568

567569
virtual String string_to_upper(const String &p_string, const String &p_language = "") const override;
568570
virtual String string_to_lower(const String &p_string, const String &p_language = "") const override;

‎servers/text_server.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,7 @@ void TextServer::_bind_methods() {
490490

491491
ClassDB::bind_method(D_METHOD("strip_diacritics", "string"), &TextServer::strip_diacritics);
492492
ClassDB::bind_method(D_METHOD("is_valid_identifier", "string"), &TextServer::is_valid_identifier);
493+
ClassDB::bind_method(D_METHOD("is_valid_letter", "unicode"), &TextServer::is_valid_letter);
493494

494495
ClassDB::bind_method(D_METHOD("string_to_upper", "string", "language"), &TextServer::string_to_upper, DEFVAL(""));
495496
ClassDB::bind_method(D_METHOD("string_to_lower", "string", "language"), &TextServer::string_to_lower, DEFVAL(""));
@@ -2182,6 +2183,10 @@ bool TextServer::is_valid_identifier(const String &p_string) const {
21822183
return true;
21832184
}
21842185

2186+
bool TextServer::is_valid_letter(char32_t p_unicode) const {
2187+
return is_unicode_letter(p_unicode);
2188+
}
2189+
21852190
TextServer::TextServer() {
21862191
_init_diacritics_map();
21872192
}

‎servers/text_server.h

+1
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,7 @@ class TextServer : public RefCounted {
547547

548548
virtual String strip_diacritics(const String &p_string) const;
549549
virtual bool is_valid_identifier(const String &p_string) const;
550+
virtual bool is_valid_letter(char32_t p_unicode) const;
550551

551552
// Other string operations.
552553
virtual String string_to_upper(const String &p_string, const String &p_language = "") const = 0;

‎tests/servers/test_text_server.h

+91
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,97 @@ TEST_SUITE("[TextServer]") {
637637
}
638638
}
639639

640+
SUBCASE("[TextServer] Unicode letters") {
641+
for (int i = 0; i < TextServerManager::get_singleton()->get_interface_count(); i++) {
642+
Ref<TextServer> ts = TextServerManager::get_singleton()->get_interface(i);
643+
CHECK_FALSE_MESSAGE(ts.is_null(), "Invalid TS interface.");
644+
645+
struct ul_testcase {
646+
int fail_index = -1; // Expecting failure at given index.
647+
char32_t text[10]; // Using 0 as the terminator.
648+
};
649+
ul_testcase cases[14] = {
650+
{
651+
0,
652+
{ 0x2D, 0x33, 0x30, 0, 0, 0, 0, 0, 0, 0 }, // "-30"
653+
},
654+
{
655+
1,
656+
{ 0x61, 0x2E, 0x31, 0, 0, 0, 0, 0, 0, 0 }, // "a.1"
657+
},
658+
{
659+
1,
660+
{ 0x61, 0x2C, 0x31, 0, 0, 0, 0, 0, 0, 0 }, // "a,1"
661+
},
662+
{
663+
0,
664+
{ 0x31, 0x65, 0x2D, 0x32, 0, 0, 0, 0, 0, 0 }, // "1e-2"
665+
},
666+
{
667+
0,
668+
{ 0xAB, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // "Left-Pointing Double Angle Quotation Mark"
669+
},
670+
{
671+
-1,
672+
{ 0x41, 0x42, 0, 0, 0, 0, 0, 0, 0, 0 }, // "AB"
673+
},
674+
{
675+
4,
676+
{ 0x54, 0x65, 0x73, 0x74, 0x31, 0, 0, 0, 0, 0 }, // "Test1"
677+
},
678+
{
679+
2,
680+
{ 0x54, 0x65, 0x2A, 0x73, 0x74, 0, 0, 0, 0, 0 }, // "Te*st"
681+
},
682+
{
683+
4,
684+
{ 0x74, 0x65, 0x73, 0x74, 0x5F, 0x74, 0x65, 0x73, 0x74, 0x65 }, // "test_teste"
685+
},
686+
{
687+
4,
688+
{ 0x74, 0x65, 0x73, 0x74, 0x20, 0x74, 0x65, 0x73, 0x74, 0 }, // "test test"
689+
},
690+
{
691+
-1,
692+
{ 0x643, 0x402, 0x716, 0xB05, 0, 0, 0, 0, 0, 0 }, // "كЂܖଅ" (arabic letters),
693+
},
694+
{
695+
-1,
696+
{ 0x643, 0x402, 0x716, 0xB05, 0x54, 0x65, 0x73, 0x74, 0x30AA, 0x4E21 }, // 0-3 arabic letters, 4-7 latin letters, 8-9 CJK letters
697+
},
698+
{
699+
-1,
700+
{ 0x4D2, 0x4D6, 0x4DA, 0x4DC, 0, 0, 0, 0, 0, 0 }, // "ӒӖӚӜ" cyrillic letters
701+
},
702+
{
703+
-1,
704+
{ 0xC2, 0xC3, 0xC4, 0xC5, 0x100, 0x102, 0x104, 0xC7, 0x106, 0x108 }, // "ÂÃÄÅĀĂĄÇĆĈ" rarer latin letters
705+
},
706+
};
707+
708+
for (int j = 0; j < 14; j++) {
709+
ul_testcase test = cases[j];
710+
int failed_on_index = -1;
711+
for (int k = 0; k < 10; k++) {
712+
char32_t character = test.text[k];
713+
if (character == 0) {
714+
break;
715+
}
716+
if (!ts->is_valid_letter(character)) {
717+
failed_on_index = k;
718+
break;
719+
}
720+
}
721+
722+
if (test.fail_index == -1) {
723+
CHECK_MESSAGE(test.fail_index == failed_on_index, "In interface ", ts->get_name() + ": In test case ", j, ", the character at index ", failed_on_index, " should have been a letter.");
724+
} else {
725+
CHECK_MESSAGE(test.fail_index == failed_on_index, "In interface ", ts->get_name() + ": In test case ", j, ", expected first non-letter at index ", test.fail_index, ", but found at index ", failed_on_index);
726+
}
727+
}
728+
}
729+
}
730+
640731
SUBCASE("[TextServer] Strip Diacritics") {
641732
for (int i = 0; i < TextServerManager::get_singleton()->get_interface_count(); i++) {
642733
Ref<TextServer> ts = TextServerManager::get_singleton()->get_interface(i);

0 commit comments

Comments
 (0)
Please sign in to comment.