Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Core] Optimize some String methods #92548

Merged
merged 1 commit into from
Aug 26, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 48 additions & 29 deletions core/string/ustring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1658,30 +1658,40 @@ char32_t String::char_lowercase(char32_t p_char) {
}

String String::to_upper() const {
String upper = *this;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This avoids doing double copy by allocating a new string and then filling it up once, instead of copying all the data and then replacing it if it differs

if (is_empty()) {
return *this;
}

for (int i = 0; i < upper.size(); i++) {
const char32_t s = upper[i];
const char32_t t = _find_upper(s);
if (s != t) { // avoid copy on write
upper[i] = t;
}
String upper;
upper.resize(size());
const char32_t *old_ptr = ptr();
char32_t *upper_ptrw = upper.ptrw();

while (*old_ptr) {
*upper_ptrw++ = _find_upper(*old_ptr++);
}

*upper_ptrw = 0;

return upper;
}

String String::to_lower() const {
String lower = *this;
if (is_empty()) {
return *this;
}

for (int i = 0; i < lower.size(); i++) {
const char32_t s = lower[i];
const char32_t t = _find_lower(s);
if (s != t) { // avoid copy on write
lower[i] = t;
}
String lower;
lower.resize(size());
const char32_t *old_ptr = ptr();
char32_t *lower_ptrw = lower.ptrw();

while (*old_ptr) {
*lower_ptrw++ = _find_lower(*old_ptr++);
}

*lower_ptrw = 0;

return lower;
}

Expand Down Expand Up @@ -1919,15 +1929,16 @@ String String::hex_encode_buffer(const uint8_t *p_buffer, int p_len) {
static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };

String ret;
char v[2] = { 0, 0 };
ret.resize(p_len * 2 + 1);
char32_t *ret_ptrw = ret.ptrw();

for (int i = 0; i < p_len; i++) {
v[0] = hex[p_buffer[i] >> 4];
ret += v;
v[0] = hex[p_buffer[i] & 0xF];
ret += v;
*ret_ptrw++ = hex[p_buffer[i] >> 4];
*ret_ptrw++ = hex[p_buffer[i] & 0xF];
}

*ret_ptrw = 0;

return ret;
}

Expand All @@ -1950,11 +1961,12 @@ Vector<uint8_t> String::hex_decode() const {
Vector<uint8_t> out;
int len = length() / 2;
out.resize(len);
uint8_t *out_ptrw = out.ptrw();
for (int i = 0; i < len; i++) {
char32_t c;
HEX_TO_BYTE(first, i * 2);
HEX_TO_BYTE(second, i * 2 + 1);
out.write[i] = first * 16 + second;
out_ptrw[i] = first * 16 + second;
}
return out;
#undef HEX_TO_BYTE
Expand All @@ -1975,14 +1987,16 @@ CharString String::ascii(bool p_allow_extended) const {

CharString cs;
cs.resize(size());
char *cs_ptrw = cs.ptrw();
const char32_t *this_ptr = ptr();

for (int i = 0; i < size(); i++) {
char32_t c = operator[](i);
char32_t c = this_ptr[i];
if ((c <= 0x7f) || (c <= 0xff && p_allow_extended)) {
cs[i] = c;
cs_ptrw[i] = c;
} else {
print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as ASCII/Latin-1", (uint32_t)c));
cs[i] = 0x20; // ascii doesn't have a replacement character like unicode, 0x1a is sometimes used but is kinda arcane
cs_ptrw[i] = 0x20; // ASCII doesn't have a replacement character like unicode, 0x1a is sometimes used but is kinda arcane.
}
}

Expand Down Expand Up @@ -3115,8 +3129,9 @@ Vector<uint8_t> String::md5_buffer() const {

Vector<uint8_t> ret;
ret.resize(16);
uint8_t *ret_ptrw = ret.ptrw();
for (int i = 0; i < 16; i++) {
ret.write[i] = hash[i];
ret_ptrw[i] = hash[i];
}
return ret;
}
Expand All @@ -3128,8 +3143,9 @@ Vector<uint8_t> String::sha1_buffer() const {

Vector<uint8_t> ret;
ret.resize(20);
uint8_t *ret_ptrw = ret.ptrw();
for (int i = 0; i < 20; i++) {
ret.write[i] = hash[i];
ret_ptrw[i] = hash[i];
}

return ret;
Expand All @@ -3142,8 +3158,9 @@ Vector<uint8_t> String::sha256_buffer() const {

Vector<uint8_t> ret;
ret.resize(32);
uint8_t *ret_ptrw = ret.ptrw();
for (int i = 0; i < 32; i++) {
ret.write[i] = hash[i];
ret_ptrw[i] = hash[i];
}
return ret;
}
Expand Down Expand Up @@ -3871,8 +3888,9 @@ Vector<String> String::bigrams() const {
return b;
}
b.resize(n_pairs);
String *b_ptrw = b.ptrw();
for (int i = 0; i < n_pairs; i++) {
b.write[i] = substr(i, 2);
b_ptrw[i] = substr(i, 2);
}
return b;
}
Expand Down Expand Up @@ -4757,8 +4775,9 @@ String String::xml_unescape() const {
return String();
}
str.resize(len + 1);
_xml_unescape(get_data(), l, str.ptrw());
str[len] = 0;
char32_t *str_ptrw = str.ptrw();
_xml_unescape(get_data(), l, str_ptrw);
str_ptrw[len] = 0;
return str;
}

Expand Down
Loading