Skip to content

Commit c387361

Browse files
committed
Auto merge of rust-lang#15744 - pvalletbo:15395/character-byte-literals-diagnose, r=Veykril
fix: add diagnostics messages for chars and byte literal errors This PR adds error messages for different invalid byte or character literals. Fixes rust-lang#15395
2 parents 8a23314 + f58a825 commit c387361

9 files changed

+337
-12
lines changed

crates/parser/src/lexed_str.rs

+55
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,11 @@
99
//! include info about comments and whitespace.
1010
1111
use rustc_dependencies::lexer as rustc_lexer;
12+
1213
use std::ops;
1314

15+
use rustc_lexer::unescape::{EscapeError, Mode};
16+
1417
use crate::{
1518
SyntaxKind::{self, *},
1619
T,
@@ -254,13 +257,28 @@ impl<'a> Converter<'a> {
254257
rustc_lexer::LiteralKind::Char { terminated } => {
255258
if !terminated {
256259
err = "Missing trailing `'` symbol to terminate the character literal";
260+
} else {
261+
let text = &self.res.text[self.offset + 1..][..len - 1];
262+
let i = text.rfind('\'').unwrap();
263+
let text = &text[..i];
264+
if let Err(e) = rustc_lexer::unescape::unescape_char(text) {
265+
err = error_to_diagnostic_message(e, Mode::Char);
266+
}
257267
}
258268
CHAR
259269
}
260270
rustc_lexer::LiteralKind::Byte { terminated } => {
261271
if !terminated {
262272
err = "Missing trailing `'` symbol to terminate the byte literal";
273+
} else {
274+
let text = &self.res.text[self.offset + 2..][..len - 2];
275+
let i = text.rfind('\'').unwrap();
276+
let text = &text[..i];
277+
if let Err(e) = rustc_lexer::unescape::unescape_char(text) {
278+
err = error_to_diagnostic_message(e, Mode::Byte);
279+
}
263280
}
281+
264282
BYTE
265283
}
266284
rustc_lexer::LiteralKind::Str { terminated } => {
@@ -305,3 +323,40 @@ impl<'a> Converter<'a> {
305323
self.push(syntax_kind, len, err);
306324
}
307325
}
326+
327+
fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
328+
match error {
329+
EscapeError::ZeroChars => "empty character literal",
330+
EscapeError::MoreThanOneChar => "character literal may only contain one codepoint",
331+
EscapeError::LoneSlash => "",
332+
EscapeError::InvalidEscape if mode == Mode::Byte || mode == Mode::ByteStr => {
333+
"unknown byte escape"
334+
}
335+
EscapeError::InvalidEscape => "unknown character escape",
336+
EscapeError::BareCarriageReturn => "",
337+
EscapeError::BareCarriageReturnInRawString => "",
338+
EscapeError::EscapeOnlyChar if mode == Mode::Byte => "byte constant must be escaped",
339+
EscapeError::EscapeOnlyChar => "character constant must be escaped",
340+
EscapeError::TooShortHexEscape => "numeric character escape is too short",
341+
EscapeError::InvalidCharInHexEscape => "invalid character in numeric character escape",
342+
EscapeError::OutOfRangeHexEscape => "out of range hex escape",
343+
EscapeError::NoBraceInUnicodeEscape => "incorrect unicode escape sequence",
344+
EscapeError::InvalidCharInUnicodeEscape => "invalid character in unicode escape",
345+
EscapeError::EmptyUnicodeEscape => "empty unicode escape",
346+
EscapeError::UnclosedUnicodeEscape => "unterminated unicode escape",
347+
EscapeError::LeadingUnderscoreUnicodeEscape => "invalid start of unicode escape",
348+
EscapeError::OverlongUnicodeEscape => "overlong unicode escape",
349+
EscapeError::LoneSurrogateUnicodeEscape => "invalid unicode character escape",
350+
EscapeError::OutOfRangeUnicodeEscape => "invalid unicode character escape",
351+
EscapeError::UnicodeEscapeInByte => "unicode escape in byte string",
352+
EscapeError::NonAsciiCharInByte if mode == Mode::Byte => {
353+
"non-ASCII character in byte literal"
354+
}
355+
EscapeError::NonAsciiCharInByte if mode == Mode::ByteStr => {
356+
"non-ASCII character in byte string literal"
357+
}
358+
EscapeError::NonAsciiCharInByte => "non-ASCII character in raw byte string literal",
359+
EscapeError::UnskippedWhitespaceWarning => "",
360+
EscapeError::MultipleSkippedLinesWarning => "",
361+
}
362+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
BYTE "b''" error: empty character literal
2+
WHITESPACE "\n"
3+
BYTE "b'\\'" error: Missing trailing `'` symbol to terminate the byte literal
4+
WHITESPACE "\n"
5+
BYTE "b'\n'" error: byte constant must be escaped
6+
WHITESPACE "\n"
7+
BYTE "b'spam'" error: character literal may only contain one codepoint
8+
WHITESPACE "\n"
9+
BYTE "b'\\x0ff'" error: character literal may only contain one codepoint
10+
WHITESPACE "\n"
11+
BYTE "b'\\\"a'" error: character literal may only contain one codepoint
12+
WHITESPACE "\n"
13+
BYTE "b'\\na'" error: character literal may only contain one codepoint
14+
WHITESPACE "\n"
15+
BYTE "b'\\ra'" error: character literal may only contain one codepoint
16+
WHITESPACE "\n"
17+
BYTE "b'\\ta'" error: character literal may only contain one codepoint
18+
WHITESPACE "\n"
19+
BYTE "b'\\\\a'" error: character literal may only contain one codepoint
20+
WHITESPACE "\n"
21+
BYTE "b'\\'a'" error: character literal may only contain one codepoint
22+
WHITESPACE "\n"
23+
BYTE "b'\\0a'" error: character literal may only contain one codepoint
24+
WHITESPACE "\n"
25+
BYTE "b'\\u{0}x'" error: character literal may only contain one codepoint
26+
WHITESPACE "\n"
27+
BYTE "b'\\u{1F63b}}'" error: character literal may only contain one codepoint
28+
WHITESPACE "\n"
29+
BYTE "b'\\v'" error: unknown byte escape
30+
WHITESPACE "\n"
31+
BYTE "b'\\💩'" error: unknown byte escape
32+
WHITESPACE "\n"
33+
BYTE "b'\\●'" error: unknown byte escape
34+
WHITESPACE "\n"
35+
BYTE "b'\\\\\\r'" error: character literal may only contain one codepoint
36+
WHITESPACE "\n"
37+
BYTE "b'\\x'" error: numeric character escape is too short
38+
WHITESPACE "\n"
39+
BYTE "b'\\x0'" error: numeric character escape is too short
40+
WHITESPACE "\n"
41+
BYTE "b'\\xf'" error: numeric character escape is too short
42+
WHITESPACE "\n"
43+
BYTE "b'\\xa'" error: numeric character escape is too short
44+
WHITESPACE "\n"
45+
BYTE "b'\\xx'" error: invalid character in numeric character escape
46+
WHITESPACE "\n"
47+
BYTE "b'\\xы'" error: invalid character in numeric character escape
48+
WHITESPACE "\n"
49+
BYTE "b'\\x🦀'" error: invalid character in numeric character escape
50+
WHITESPACE "\n"
51+
BYTE "b'\\xtt'" error: invalid character in numeric character escape
52+
WHITESPACE "\n"
53+
BYTE "b'\\xff'" error: out of range hex escape
54+
WHITESPACE "\n"
55+
BYTE "b'\\xFF'" error: out of range hex escape
56+
WHITESPACE "\n"
57+
BYTE "b'\\x80'" error: out of range hex escape
58+
WHITESPACE "\n"
59+
BYTE "b'\\u'" error: incorrect unicode escape sequence
60+
WHITESPACE "\n"
61+
BYTE "b'\\u[0123]'" error: incorrect unicode escape sequence
62+
WHITESPACE "\n"
63+
BYTE "b'\\u{0x}'" error: invalid character in unicode escape
64+
WHITESPACE "\n"
65+
BYTE "b'\\u{'" error: unterminated unicode escape
66+
WHITESPACE "\n"
67+
BYTE "b'\\u{0000'" error: unterminated unicode escape
68+
WHITESPACE "\n"
69+
BYTE "b'\\u{}'" error: empty unicode escape
70+
WHITESPACE "\n"
71+
BYTE "b'\\u{_0000}'" error: invalid start of unicode escape
72+
WHITESPACE "\n"
73+
BYTE "b'\\u{0000000}'" error: overlong unicode escape
74+
WHITESPACE "\n"
75+
BYTE "b'\\u{FFFFFF}'" error: invalid unicode character escape
76+
WHITESPACE "\n"
77+
BYTE "b'\\u{ffffff}'" error: invalid unicode character escape
78+
WHITESPACE "\n"
79+
BYTE "b'\\u{ffffff}'" error: invalid unicode character escape
80+
WHITESPACE "\n"
81+
BYTE "b'\\u{DC00}'" error: invalid unicode character escape
82+
WHITESPACE "\n"
83+
BYTE "b'\\u{DDDD}'" error: invalid unicode character escape
84+
WHITESPACE "\n"
85+
BYTE "b'\\u{DFFF}'" error: invalid unicode character escape
86+
WHITESPACE "\n"
87+
BYTE "b'\\u{D800}'" error: invalid unicode character escape
88+
WHITESPACE "\n"
89+
BYTE "b'\\u{DAAA}'" error: invalid unicode character escape
90+
WHITESPACE "\n"
91+
BYTE "b'\\u{DBFF}'" error: invalid unicode character escape
92+
WHITESPACE "\n"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
b''
2+
b'\'
3+
b'
4+
'
5+
b'spam'
6+
b'\x0ff'
7+
b'\"a'
8+
b'\na'
9+
b'\ra'
10+
b'\ta'
11+
b'\\a'
12+
b'\'a'
13+
b'\0a'
14+
b'\u{0}x'
15+
b'\u{1F63b}}'
16+
b'\v'
17+
b'\💩'
18+
b'\●'
19+
b'\\\r'
20+
b'\x'
21+
b'\x0'
22+
b'\xf'
23+
b'\xa'
24+
b'\xx'
25+
b'\xы'
26+
b'\x🦀'
27+
b'\xtt'
28+
b'\xff'
29+
b'\xFF'
30+
b'\x80'
31+
b'\u'
32+
b'\u[0123]'
33+
b'\u{0x}'
34+
b'\u{'
35+
b'\u{0000'
36+
b'\u{}'
37+
b'\u{_0000}'
38+
b'\u{0000000}'
39+
b'\u{FFFFFF}'
40+
b'\u{ffffff}'
41+
b'\u{ffffff}'
42+
b'\u{DC00}'
43+
b'\u{DDDD}'
44+
b'\u{DFFF}'
45+
b'\u{D800}'
46+
b'\u{DAAA}'
47+
b'\u{DBFF}'
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
CHAR "'hello'" error: character literal may only contain one codepoint
2+
WHITESPACE "\n"
3+
CHAR "''" error: empty character literal
4+
WHITESPACE "\n"
5+
CHAR "'\n'" error: character constant must be escaped
6+
WHITESPACE "\n"
7+
CHAR "'spam'" error: character literal may only contain one codepoint
8+
WHITESPACE "\n"
9+
CHAR "'\\x0ff'" error: character literal may only contain one codepoint
10+
WHITESPACE "\n"
11+
CHAR "'\\\"a'" error: character literal may only contain one codepoint
12+
WHITESPACE "\n"
13+
CHAR "'\\na'" error: character literal may only contain one codepoint
14+
WHITESPACE "\n"
15+
CHAR "'\\ra'" error: character literal may only contain one codepoint
16+
WHITESPACE "\n"
17+
CHAR "'\\ta'" error: character literal may only contain one codepoint
18+
WHITESPACE "\n"
19+
CHAR "'\\\\a'" error: character literal may only contain one codepoint
20+
WHITESPACE "\n"
21+
CHAR "'\\'a'" error: character literal may only contain one codepoint
22+
WHITESPACE "\n"
23+
CHAR "'\\0a'" error: character literal may only contain one codepoint
24+
WHITESPACE "\n"
25+
CHAR "'\\u{0}x'" error: character literal may only contain one codepoint
26+
WHITESPACE "\n"
27+
CHAR "'\\u{1F63b}}'" error: character literal may only contain one codepoint
28+
WHITESPACE "\n"
29+
CHAR "'\\v'" error: unknown character escape
30+
WHITESPACE "\n"
31+
CHAR "'\\💩'" error: unknown character escape
32+
WHITESPACE "\n"
33+
CHAR "'\\●'" error: unknown character escape
34+
WHITESPACE "\n"
35+
CHAR "'\\\\\\r'" error: character literal may only contain one codepoint
36+
WHITESPACE "\n"
37+
CHAR "'\\x'" error: numeric character escape is too short
38+
WHITESPACE "\n"
39+
CHAR "'\\x0'" error: numeric character escape is too short
40+
WHITESPACE "\n"
41+
CHAR "'\\xf'" error: numeric character escape is too short
42+
WHITESPACE "\n"
43+
CHAR "'\\xa'" error: numeric character escape is too short
44+
WHITESPACE "\n"
45+
CHAR "'\\xx'" error: invalid character in numeric character escape
46+
WHITESPACE "\n"
47+
CHAR "'\\xы'" error: invalid character in numeric character escape
48+
WHITESPACE "\n"
49+
CHAR "'\\x🦀'" error: invalid character in numeric character escape
50+
WHITESPACE "\n"
51+
CHAR "'\\xtt'" error: invalid character in numeric character escape
52+
WHITESPACE "\n"
53+
CHAR "'\\xff'" error: out of range hex escape
54+
WHITESPACE "\n"
55+
CHAR "'\\xFF'" error: out of range hex escape
56+
WHITESPACE "\n"
57+
CHAR "'\\x80'" error: out of range hex escape
58+
WHITESPACE "\n"
59+
CHAR "'\\u'" error: incorrect unicode escape sequence
60+
WHITESPACE "\n"
61+
CHAR "'\\u[0123]'" error: incorrect unicode escape sequence
62+
WHITESPACE "\n"
63+
CHAR "'\\u{0x}'" error: invalid character in unicode escape
64+
WHITESPACE "\n"
65+
CHAR "'\\u{'" error: unterminated unicode escape
66+
WHITESPACE "\n"
67+
CHAR "'\\u{0000'" error: unterminated unicode escape
68+
WHITESPACE "\n"
69+
CHAR "'\\u{}'" error: empty unicode escape
70+
WHITESPACE "\n"
71+
CHAR "'\\u{_0000}'" error: invalid start of unicode escape
72+
WHITESPACE "\n"
73+
CHAR "'\\u{0000000}'" error: overlong unicode escape
74+
WHITESPACE "\n"
75+
CHAR "'\\u{FFFFFF}'" error: invalid unicode character escape
76+
WHITESPACE "\n"
77+
CHAR "'\\u{ffffff}'" error: invalid unicode character escape
78+
WHITESPACE "\n"
79+
CHAR "'\\u{ffffff}'" error: invalid unicode character escape
80+
WHITESPACE "\n"
81+
CHAR "'\\u{DC00}'" error: invalid unicode character escape
82+
WHITESPACE "\n"
83+
CHAR "'\\u{DDDD}'" error: invalid unicode character escape
84+
WHITESPACE "\n"
85+
CHAR "'\\u{DFFF}'" error: invalid unicode character escape
86+
WHITESPACE "\n"
87+
CHAR "'\\u{D800}'" error: invalid unicode character escape
88+
WHITESPACE "\n"
89+
CHAR "'\\u{DAAA}'" error: invalid unicode character escape
90+
WHITESPACE "\n"
91+
CHAR "'\\u{DBFF}'" error: invalid unicode character escape
92+
WHITESPACE "\n"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
'hello'
2+
''
3+
'
4+
'
5+
'spam'
6+
'\x0ff'
7+
'\"a'
8+
'\na'
9+
'\ra'
10+
'\ta'
11+
'\\a'
12+
'\'a'
13+
'\0a'
14+
'\u{0}x'
15+
'\u{1F63b}}'
16+
'\v'
17+
'\💩'
18+
'\●'
19+
'\\\r'
20+
'\x'
21+
'\x0'
22+
'\xf'
23+
'\xa'
24+
'\xx'
25+
'\xы'
26+
'\x🦀'
27+
'\xtt'
28+
'\xff'
29+
'\xFF'
30+
'\x80'
31+
'\u'
32+
'\u[0123]'
33+
'\u{0x}'
34+
'\u{'
35+
'\u{0000'
36+
'\u{}'
37+
'\u{_0000}'
38+
'\u{0000000}'
39+
'\u{FFFFFF}'
40+
'\u{ffffff}'
41+
'\u{ffffff}'
42+
'\u{DC00}'
43+
'\u{DDDD}'
44+
'\u{DFFF}'
45+
'\u{D800}'
46+
'\u{DAAA}'
47+
'\u{DBFF}'
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,9 @@
1-
BYTE "b''"
2-
WHITESPACE " "
31
BYTE "b'x'"
42
WHITESPACE " "
53
BYTE_STRING "b\"foo\""
64
WHITESPACE " "
75
BYTE_STRING "br\"\""
86
WHITESPACE "\n"
9-
BYTE "b''suf"
10-
WHITESPACE " "
117
BYTE_STRING "b\"\"ix"
128
WHITESPACE " "
139
BYTE_STRING "br\"\"br"
@@ -17,6 +13,4 @@ WHITESPACE " "
1713
BYTE "b'\\\\'"
1814
WHITESPACE " "
1915
BYTE "b'\\''"
20-
WHITESPACE " "
21-
BYTE "b'hello'"
2216
WHITESPACE "\n"
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
b'' b'x' b"foo" br""
2-
b''suf b""ix br""br
3-
b'\n' b'\\' b'\'' b'hello'
1+
b'x' b"foo" br""
2+
b""ix br""br
3+
b'\n' b'\\' b'\''

0 commit comments

Comments
 (0)