Skip to content

Commit f3e1136

Browse files
authored
Merge pull request #756 from byroot/utf8-snippets
Ensure parser error snippets are valid UTF-8
2 parents b86a47d + e144793 commit f3e1136

File tree

3 files changed

+30
-3
lines changed

3 files changed

+30
-3
lines changed

CHANGES.md

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Changes
22

3+
* Ensure document snippets that are included in parser errors don't include truncated multibyte characters.
4+
35
### 2025-02-10 (2.10.1)
46

57
* Fix a compatibility issue with `MultiJson.dump(obj, pretty: true)`: `no implicit conversion of false into Proc (TypeError)`.

ext/json/ext/parser/parser.c

+12-3
Original file line numberDiff line numberDiff line change
@@ -454,15 +454,24 @@ RBIMPL_ATTR_NORETURN()
454454
#endif
455455
static void raise_parse_error(const char *format, const char *start)
456456
{
457-
char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
457+
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
458458

459459
size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0;
460460
const char *ptr = start;
461461

462462
if (len == PARSE_ERROR_FRAGMENT_LEN) {
463463
MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
464-
buffer[PARSE_ERROR_FRAGMENT_LEN] = '\0';
465-
ptr = buffer;
464+
465+
while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
466+
len--;
467+
}
468+
469+
if (buffer[len - 1] >= 0xC0) { // multibyte character start
470+
len--;
471+
}
472+
473+
buffer[len] = '\0';
474+
ptr = (const char *)buffer;
466475
}
467476

468477
rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);

test/json/json_parser_test.rb

+16
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,22 @@ def test_parse_error_incomplete_hash
645645
end
646646
end
647647

648+
def test_parse_error_snippet
649+
omit "C ext only test" unless RUBY_ENGINE == "ruby"
650+
651+
error = assert_raise(JSON::ParserError) { JSON.parse("あああああああああああああああああああああああ") }
652+
assert_equal "unexpected character: 'ああああああああああ'", error.message
653+
654+
error = assert_raise(JSON::ParserError) { JSON.parse("aあああああああああああああああああああああああ") }
655+
assert_equal "unexpected character: 'aああああああああああ'", error.message
656+
657+
error = assert_raise(JSON::ParserError) { JSON.parse("abあああああああああああああああああああああああ") }
658+
assert_equal "unexpected character: 'abあああああああああ'", error.message
659+
660+
error = assert_raise(JSON::ParserError) { JSON.parse("abcあああああああああああああああああああああああ") }
661+
assert_equal "unexpected character: 'abcあああああああああ'", error.message
662+
end
663+
648664
def test_parse_leading_slash
649665
# ref: https://github.com/ruby/ruby/pull/12598
650666
assert_raise(JSON::ParserError) do

0 commit comments

Comments
 (0)