Skip to content

Commit 57911f1

Browse files
authored
Merge pull request #762 from byroot/invalid-escape
Raise a ParserError on all incomplete unicode escape sequence.
2 parents c079793 + 7d0637b commit 57911f1

File tree

3 files changed

+48
-43
lines changed

3 files changed

+48
-43
lines changed

CHANGES.md

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Changes
22

3+
* Raise a ParserError on all incomplete unicode escape sequence. This was the behavior until `2.10.0` unadvertently changed it.
34
* Ensure document snippets that are included in parser errors don't include truncated multibyte characters.
45

56
### 2025-02-10 (2.10.1)

ext/json/ext/parser/parser.c

+42-43
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,44 @@ static void rvalue_stack_eagerly_release(VALUE handle)
341341
}
342342
}
343343

344+
345+
#ifndef HAVE_STRNLEN
346+
static size_t strnlen(const char *s, size_t maxlen)
347+
{
348+
char *p;
349+
return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
350+
}
351+
#endif
352+
353+
#define PARSE_ERROR_FRAGMENT_LEN 32
354+
#ifdef RBIMPL_ATTR_NORETURN
355+
RBIMPL_ATTR_NORETURN()
356+
#endif
357+
static void raise_parse_error(const char *format, const char *start)
358+
{
359+
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
360+
361+
size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0;
362+
const char *ptr = start;
363+
364+
if (len == PARSE_ERROR_FRAGMENT_LEN) {
365+
MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
366+
367+
while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
368+
len--;
369+
}
370+
371+
if (buffer[len - 1] >= 0xC0) { // multibyte character start
372+
len--;
373+
}
374+
375+
buffer[len] = '\0';
376+
ptr = (const char *)buffer;
377+
}
378+
379+
rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
380+
}
381+
344382
/* unicode */
345383

346384
static const signed char digit_values[256] = {
@@ -362,21 +400,19 @@ static const signed char digit_values[256] = {
362400

363401
static uint32_t unescape_unicode(const unsigned char *p)
364402
{
365-
const uint32_t replacement_char = 0xFFFD;
366-
367403
signed char b;
368404
uint32_t result = 0;
369405
b = digit_values[p[0]];
370-
if (b < 0) return replacement_char;
406+
if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
371407
result = (result << 4) | (unsigned char)b;
372408
b = digit_values[p[1]];
373-
if (b < 0) return replacement_char;
409+
if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
374410
result = (result << 4) | (unsigned char)b;
375411
b = digit_values[p[2]];
376-
if (b < 0) return replacement_char;
412+
if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
377413
result = (result << 4) | (unsigned char)b;
378414
b = digit_values[p[3]];
379-
if (b < 0) return replacement_char;
415+
if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
380416
result = (result << 4) | (unsigned char)b;
381417
return result;
382418
}
@@ -440,43 +476,6 @@ typedef struct JSON_ParserStateStruct {
440476

441477
static const rb_data_type_t JSON_ParserConfig_type;
442478

443-
#ifndef HAVE_STRNLEN
444-
static size_t strnlen(const char *s, size_t maxlen)
445-
{
446-
char *p;
447-
return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
448-
}
449-
#endif
450-
451-
#define PARSE_ERROR_FRAGMENT_LEN 32
452-
#ifdef RBIMPL_ATTR_NORETURN
453-
RBIMPL_ATTR_NORETURN()
454-
#endif
455-
static void raise_parse_error(const char *format, const char *start)
456-
{
457-
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
458-
459-
size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0;
460-
const char *ptr = start;
461-
462-
if (len == PARSE_ERROR_FRAGMENT_LEN) {
463-
MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
464-
465-
while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
466-
len--;
467-
}
468-
469-
if (buffer[len - 1] >= 0xC0) { // multibyte character start
470-
len--;
471-
}
472-
473-
buffer[len] = '\0';
474-
ptr = (const char *)buffer;
475-
}
476-
477-
rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
478-
}
479-
480479
static const bool whitespace[256] = {
481480
[' '] = 1,
482481
['\t'] = 1,

test/json/json_parser_test.rb

+5
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,11 @@ def test_invalid_unicode_escape
311311
assert_raise(JSON::ParserError) { parse('"\uaa"') }
312312
assert_raise(JSON::ParserError) { parse('"\uaaa"') }
313313
assert_equal "\uaaaa", parse('"\uaaaa"')
314+
315+
assert_raise(JSON::ParserError) { parse('"\u______"') }
316+
assert_raise(JSON::ParserError) { parse('"\u1_____"') }
317+
assert_raise(JSON::ParserError) { parse('"\u11____"') }
318+
assert_raise(JSON::ParserError) { parse('"\u111___"') }
314319
end
315320

316321
def test_parse_big_integers

0 commit comments

Comments
 (0)