Skip to content

Commit 894d711

Browse files
mertcanaltinjuanarbol
authored andcommitted
test_runner: parse non-ascii character correctly
PR-URL: #45736 Backport-PR-URL: #46839 Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com> Reviewed-By: Moshe Atlow <moshe@atlow.co.il>
1 parent 5b3c606 commit 894d711

File tree

3 files changed

+74
-24
lines changed

3 files changed

+74
-24
lines changed

lib/internal/test_runner/tap_lexer.js

+25-13
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,15 @@ const {
55
ArrayPrototypePush,
66
MathMax,
77
SafeSet,
8-
StringPrototypeIncludes,
8+
StringPrototypeCodePointAt,
99
StringPrototypeTrim,
1010
} = primordials;
1111
const {
1212
codes: { ERR_TAP_LEXER_ERROR },
1313
} = require('internal/errors');
1414

15+
const { isZeroWidthCodePoint } = require('internal/util/inspect');
16+
1517
const kEOL = '';
1618
const kEOF = '';
1719

@@ -474,18 +476,28 @@ class TapLexer {
474476
}
475477

476478
#isLiteralSymbol(char) {
477-
return (
478-
(char >= 'a' && char <= 'z') ||
479-
(char >= 'A' && char <= 'Z') ||
480-
this.#isSpecialCharacterSymbol(char)
481-
);
482-
}
483-
484-
#isSpecialCharacterSymbol(char) {
485-
// We deliberately do not include "# \ + -"" in this list
486-
// these are used for comments/reasons explanations, pragma and escape characters
487-
// whitespace is not included because it is handled separately
488-
return StringPrototypeIncludes('!"$%&\'()*,./:;<=>?@[]^_`{|}~', char);
479+
if (typeof char !== 'string') return false;
480+
const charCode = StringPrototypeCodePointAt(char);
481+
482+
if (isZeroWidthCodePoint(charCode)) return false;
483+
if (this.#isWhitespaceSymbol(char)) return false;
484+
const MAX_ASCII_CHAR_CODE = 0b111_1111; // ASCII is 7-bit long
485+
// Allow all non-latin characters.
486+
if (charCode > MAX_ASCII_CHAR_CODE) return true;
487+
const ZERO = 48; // 0
488+
const NINE = 58; // 9
489+
// Disallow numeric values
490+
if (charCode >= ZERO && char <= NINE) return false;
491+
492+
// Disallow characters with special meaning in TAP
493+
const HASH = 35; // #
494+
const BACKSLASH = 92; // \
495+
const PLUS = 43; // +
496+
const DASH = 45; // -
497+
498+
// Disallow characters with special meaning in TAP
499+
return charCode !== HASH && charCode !== BACKSLASH &&
500+
charCode !== PLUS && charCode !== DASH;
489501
}
490502

491503
#isWhitespaceSymbol(char) {

lib/internal/util/inspect.js

+13-11
Original file line numberDiff line numberDiff line change
@@ -2277,6 +2277,18 @@ function formatWithOptionsInternal(inspectOptions, args) {
22772277
return str;
22782278
}
22792279

2280+
function isZeroWidthCodePoint(code) {
2281+
return code <= 0x1F || // C0 control codes
2282+
(code >= 0x7F && code <= 0x9F) || // C1 control codes
2283+
(code >= 0x300 && code <= 0x36F) || // Combining Diacritical Marks
2284+
(code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters
2285+
// Combining Diacritical Marks for Symbols
2286+
(code >= 0x20D0 && code <= 0x20FF) ||
2287+
(code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors
2288+
(code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks
2289+
(code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors
2290+
}
2291+
22802292
if (internalBinding('config').hasIntl) {
22812293
const icu = internalBinding('icu');
22822294
// icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
@@ -2366,17 +2378,6 @@ if (internalBinding('config').hasIntl) {
23662378
);
23672379
};
23682380

2369-
const isZeroWidthCodePoint = (code) => {
2370-
return code <= 0x1F || // C0 control codes
2371-
(code >= 0x7F && code <= 0x9F) || // C1 control codes
2372-
(code >= 0x300 && code <= 0x36F) || // Combining Diacritical Marks
2373-
(code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters
2374-
// Combining Diacritical Marks for Symbols
2375-
(code >= 0x20D0 && code <= 0x20FF) ||
2376-
(code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors
2377-
(code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks
2378-
(code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors
2379-
};
23802381
}
23812382

23822383
/**
@@ -2396,4 +2397,5 @@ module.exports = {
23962397
formatWithOptions,
23972398
getStringWidth,
23982399
stripVTControlCharacters,
2400+
isZeroWidthCodePoint,
23992401
};

test/parallel/test-runner-tap-lexer.js

+36
Original file line numberDiff line numberDiff line change
@@ -444,3 +444,39 @@ ok 1
444444
assert.strictEqual(tokens[index].value, token.value);
445445
});
446446
}
447+
448+
// Test isLiteralSymbol method
449+
{
450+
const tokens = TAPLexer('ok 1 - description أتث讲演講👍🔥');
451+
452+
[
453+
{ kind: TokenKind.TAP_TEST_OK, value: 'ok' },
454+
{ kind: TokenKind.WHITESPACE, value: ' ' },
455+
{ kind: TokenKind.NUMERIC, value: '1' },
456+
{ kind: TokenKind.WHITESPACE, value: ' ' },
457+
{ kind: TokenKind.DASH, value: '-' },
458+
{ kind: TokenKind.WHITESPACE, value: ' ' },
459+
{ kind: TokenKind.LITERAL, value: 'description' },
460+
{ kind: TokenKind.WHITESPACE, value: ' ' },
461+
{ kind: TokenKind.LITERAL, value: 'أتث讲演講👍🔥' },
462+
{ kind: TokenKind.EOL, value: '' },
463+
].forEach((token, index) => {
464+
assert.strictEqual(tokens[index].kind, token.kind);
465+
assert.strictEqual(tokens[index].value, token.value);
466+
});
467+
}
468+
469+
{
470+
const tokens = TAPLexer('# comment أتث讲演講👍🔥');
471+
[
472+
{ kind: TokenKind.COMMENT, value: '#' },
473+
{ kind: TokenKind.WHITESPACE, value: ' ' },
474+
{ kind: TokenKind.LITERAL, value: 'comment' },
475+
{ kind: TokenKind.WHITESPACE, value: ' ' },
476+
{ kind: TokenKind.LITERAL, value: 'أتث讲演講👍🔥' },
477+
{ kind: TokenKind.EOL, value: '' },
478+
].forEach((token, index) => {
479+
assert.strictEqual(tokens[index].kind, token.kind);
480+
assert.strictEqual(tokens[index].value, token.value);
481+
});
482+
}

0 commit comments

Comments
 (0)