Skip to content

Commit a3aa71a

Browse files
addaleaxBridgeAR
authored andcommitted
util,readline: NFC-normalize strings before getStringWidth
The assumption here is that decomposed characters render like their composed character equivalents, and that working with the former comes with a risk of over-estimating string widths given that we compute them on a per-code-point basis. The regression test added here (한글 vs 한글) is an example of that happening. PR-URL: #33052 Reviewed-By: Gus Caplan <me@gus.host> Reviewed-By: Michaël Zasso <targos@protonmail.com> Reviewed-By: Anto Aravinth <anto.aravinth.cse@gmail.com> Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com>
1 parent 98a2c67 commit a3aa71a

File tree

2 files changed

+19
-5
lines changed

2 files changed

+19
-5
lines changed

lib/internal/util/inspect.js

+10-5
Original file line numberDiff line numberDiff line change
@@ -1917,6 +1917,13 @@ function formatWithOptionsInternal(inspectOptions, ...args) {
19171917
return str;
19181918
}
19191919

1920+
function prepareStringForGetStringWidth(str, removeControlChars) {
1921+
str = str.normalize('NFC');
1922+
if (removeControlChars)
1923+
str = stripVTControlCharacters(str);
1924+
return str;
1925+
}
1926+
19201927
if (internalBinding('config').hasIntl) {
19211928
const icu = internalBinding('icu');
19221929
// icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
@@ -1926,8 +1933,8 @@ if (internalBinding('config').hasIntl) {
19261933
// the receiving end supports.
19271934
getStringWidth = function getStringWidth(str, removeControlChars = true) {
19281935
let width = 0;
1929-
if (removeControlChars)
1930-
str = stripVTControlCharacters(str);
1936+
1937+
str = prepareStringForGetStringWidth(str, removeControlChars);
19311938
for (let i = 0; i < str.length; i++) {
19321939
// Try to avoid calling into C++ by first handling the ASCII portion of
19331940
// the string. If it is fully ASCII, we skip the C++ part.
@@ -1947,9 +1954,7 @@ if (internalBinding('config').hasIntl) {
19471954
getStringWidth = function getStringWidth(str, removeControlChars = true) {
19481955
let width = 0;
19491956

1950-
if (removeControlChars)
1951-
str = stripVTControlCharacters(str);
1952-
1957+
str = prepareStringForGetStringWidth(str, removeControlChars);
19531958
for (const char of str) {
19541959
const code = char.codePointAt(0);
19551960
if (isFullWidthCodePoint(code)) {

test/parallel/test-icu-stringwidth.js

+9
Original file line numberDiff line numberDiff line change
@@ -87,3 +87,12 @@ for (let i = 0; i < 256; i++) {
8787
assert.strictEqual(getStringWidth(char), 1);
8888
}
8989
}
90+
91+
{
92+
const a = '한글'.normalize('NFD'); // 한글
93+
const b = '한글'.normalize('NFC'); // 한글
94+
assert.strictEqual(a.length, 6);
95+
assert.strictEqual(b.length, 2);
96+
assert.strictEqual(getStringWidth(a), 4);
97+
assert.strictEqual(getStringWidth(b), 4);
98+
}

0 commit comments

Comments
 (0)