@@ -101,6 +101,7 @@ static void raise_generator_error(VALUE invalid_object, const char *fmt, ...)
101
101
// 0 - single byte char that don't need to be escaped.
102
102
// (x | 8) - char that needs to be escaped.
103
103
static const unsigned char CHAR_LENGTH_MASK = 7 ;
104
+ static const unsigned char ESCAPE_MASK = 8 ;
104
105
105
106
static const unsigned char escape_table [256 ] = {
106
107
// ASCII Control Characters
@@ -165,6 +166,84 @@ static const unsigned char script_safe_escape_table[256] = {
165
166
4 , 4 , 4 , 4 , 4 , 4 , 4 , 4 , 5 , 5 , 5 , 5 , 6 , 6 , 9 , 9 ,
166
167
};
167
168
169
+
170
+ typedef struct _search_state {
171
+ const char * ptr ;
172
+ const char * end ;
173
+ const char * cursor ;
174
+ FBuffer * buffer ;
175
+ } search_state ;
176
+
177
+ static inline void search_flush (search_state * search )
178
+ {
179
+ fbuffer_append (search -> buffer , search -> cursor , search -> ptr - search -> cursor );
180
+ search -> cursor = search -> ptr ;
181
+ }
182
+
183
+ static inline unsigned char search_escape (search_state * search , const unsigned char escape_table [256 ])
184
+ {
185
+ while (search -> ptr < search -> end ) {
186
+ unsigned char ch = (unsigned char )* search -> ptr ;
187
+ unsigned char ch_len = escape_table [ch ];
188
+
189
+ if (RB_UNLIKELY (ch_len )) {
190
+ if (ch_len & ESCAPE_MASK ) {
191
+ if (RB_UNLIKELY (ch_len == 11 )) {
192
+ const unsigned char * uptr = (const unsigned char * )search -> ptr ;
193
+ if (!(uptr [1 ] == 0x80 && (uptr [2 ] >> 1 ) == 0x54 )) {
194
+ search -> ptr += 3 ;
195
+ continue ;
196
+ }
197
+ }
198
+ search_flush (search );
199
+ return ch_len & CHAR_LENGTH_MASK ;
200
+ } else {
201
+ search -> ptr += ch_len ;
202
+ }
203
+ } else {
204
+ search -> ptr ++ ;
205
+ }
206
+ }
207
+ search_flush (search );
208
+ return 0 ;
209
+ }
210
+
211
+ static inline void fast_escape_UTF8_char (search_state * search , unsigned char ch_len ) {
212
+ const unsigned char ch = (unsigned char )* search -> ptr ;
213
+ switch (ch_len ) {
214
+ case 1 : {
215
+ switch (ch ) {
216
+ case '"' : fbuffer_append (search -> buffer , "\\\"" , 2 ); break ;
217
+ case '\\' : fbuffer_append (search -> buffer , "\\\\" , 2 ); break ;
218
+ case '/' : fbuffer_append (search -> buffer , "\\/" , 2 ); break ;
219
+ case '\b' : fbuffer_append (search -> buffer , "\\b" , 2 ); break ;
220
+ case '\f' : fbuffer_append (search -> buffer , "\\f" , 2 ); break ;
221
+ case '\n' : fbuffer_append (search -> buffer , "\\n" , 2 ); break ;
222
+ case '\r' : fbuffer_append (search -> buffer , "\\r" , 2 ); break ;
223
+ case '\t' : fbuffer_append (search -> buffer , "\\t" , 2 ); break ;
224
+ default : {
225
+ const char * hexdig = "0123456789abcdef" ;
226
+ char scratch [6 ] = { '\\' , 'u' , '0' , '0' , 0 , 0 };
227
+ scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
228
+ scratch [5 ] = hexdig [ch & 0xf ];
229
+ fbuffer_append (search -> buffer , scratch , 6 );
230
+ break ;
231
+ }
232
+ }
233
+ break ;
234
+ }
235
+ case 3 : {
236
+ if (search -> ptr [2 ] & 1 ) {
237
+ fbuffer_append (search -> buffer , "\\u2029" , 6 );
238
+ } else {
239
+ fbuffer_append (search -> buffer , "\\u2028" , 6 );
240
+ }
241
+ break ;
242
+ }
243
+ }
244
+ search -> cursor = (search -> ptr += ch_len );
245
+ }
246
+
168
247
/* Converts in_string to a JSON string (without the wrapping '"'
169
248
* characters) in FBuffer out_buffer.
170
249
*
@@ -181,182 +260,114 @@ static const unsigned char script_safe_escape_table[256] = {
181
260
* Everything else (should be UTF-8) is just passed through and
182
261
* appended to the result.
183
262
*/
184
- static inline void convert_UTF8_to_JSON (FBuffer * out_buffer , VALUE str , const unsigned char escape_table [256 ])
263
+ static inline void convert_UTF8_to_JSON (search_state * search , const unsigned char escape_table [256 ])
185
264
{
186
- const char * hexdig = "0123456789abcdef" ;
187
- char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
188
-
189
- const char * ptr = RSTRING_PTR (str );
190
- unsigned long len = RSTRING_LEN (str );
191
-
192
- unsigned long beg = 0 , pos = 0 ;
193
-
194
- #define FLUSH_POS (bytes ) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
265
+ unsigned char ch_len ;
266
+ while ((ch_len = search_escape (search , escape_table ))) {
267
+ fast_escape_UTF8_char (search , ch_len );
268
+ }
269
+ }
195
270
196
- while (pos < len ) {
197
- unsigned char ch = ptr [pos ];
271
+ static inline unsigned char search_ascii_only_escape (search_state * search , const unsigned char escape_table [256 ])
272
+ {
273
+ while (search -> ptr < search -> end ) {
274
+ unsigned char ch = (unsigned char )* search -> ptr ;
198
275
unsigned char ch_len = escape_table [ch ];
199
- /* JSON encoding */
200
276
201
277
if (RB_UNLIKELY (ch_len )) {
202
- switch (ch_len ) {
203
- case 9 : {
204
- FLUSH_POS (1 );
205
- switch (ch ) {
206
- case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
207
- case '\\' : fbuffer_append (out_buffer , "\\\\" , 2 ); break ;
208
- case '/' : fbuffer_append (out_buffer , "\\/" , 2 ); break ;
209
- case '\b' : fbuffer_append (out_buffer , "\\b" , 2 ); break ;
210
- case '\f' : fbuffer_append (out_buffer , "\\f" , 2 ); break ;
211
- case '\n' : fbuffer_append (out_buffer , "\\n" , 2 ); break ;
212
- case '\r' : fbuffer_append (out_buffer , "\\r" , 2 ); break ;
213
- case '\t' : fbuffer_append (out_buffer , "\\t" , 2 ); break ;
214
- default : {
215
- scratch [2 ] = '0' ;
216
- scratch [3 ] = '0' ;
217
- scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
218
- scratch [5 ] = hexdig [ch & 0xf ];
219
- fbuffer_append (out_buffer , scratch , 6 );
220
- break ;
221
- }
222
- }
223
- break ;
224
- }
225
- case 11 : {
226
- unsigned char b2 = ptr [pos + 1 ];
227
- if (RB_UNLIKELY (b2 == 0x80 )) {
228
- unsigned char b3 = ptr [pos + 2 ];
229
- if (b3 == 0xA8 ) {
230
- FLUSH_POS (3 );
231
- fbuffer_append (out_buffer , "\\u2028" , 6 );
232
- break ;
233
- } else if (b3 == 0xA9 ) {
234
- FLUSH_POS (3 );
235
- fbuffer_append (out_buffer , "\\u2029" , 6 );
236
- break ;
237
- }
238
- }
239
- ch_len = 3 ;
240
- // fallthrough
241
- }
242
- default :
243
- pos += ch_len ;
244
- break ;
245
- }
278
+ search_flush (search );
279
+ return ch_len & CHAR_LENGTH_MASK ;
246
280
} else {
247
- pos ++ ;
281
+ search -> ptr ++ ;
248
282
}
249
283
}
250
- #undef FLUSH_POS
251
-
252
- if (beg < len ) {
253
- fbuffer_append (out_buffer , & ptr [beg ], len - beg );
254
- }
255
-
256
- RB_GC_GUARD (str );
284
+ search_flush (search );
285
+ return 0 ;
257
286
}
258
287
259
- static void convert_UTF8_to_ASCII_only_JSON (FBuffer * out_buffer , VALUE str , const unsigned char escape_table [256 ])
260
- {
261
- const char * hexdig = "0123456789abcdef" ;
262
- char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
263
-
264
- const char * ptr = RSTRING_PTR (str );
265
- unsigned long len = RSTRING_LEN (str );
266
-
267
- unsigned long beg = 0 , pos = 0 ;
268
-
269
- #define FLUSH_POS (bytes ) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
270
-
271
- while (pos < len ) {
272
- unsigned char ch = ptr [pos ];
273
- unsigned char ch_len = escape_table [ch ];
274
-
275
- if (RB_UNLIKELY (ch_len )) {
276
- switch (ch_len ) {
277
- case 9 : {
278
- FLUSH_POS (1 );
279
- switch (ch ) {
280
- case '"' : fbuffer_append (out_buffer , "\\\"" , 2 ); break ;
281
- case '\\' : fbuffer_append (out_buffer , "\\\\" , 2 ); break ;
282
- case '/' : fbuffer_append (out_buffer , "\\/" , 2 ); break ;
283
- case '\b' : fbuffer_append (out_buffer , "\\b" , 2 ); break ;
284
- case '\f' : fbuffer_append (out_buffer , "\\f" , 2 ); break ;
285
- case '\n' : fbuffer_append (out_buffer , "\\n" , 2 ); break ;
286
- case '\r' : fbuffer_append (out_buffer , "\\r" , 2 ); break ;
287
- case '\t' : fbuffer_append (out_buffer , "\\t" , 2 ); break ;
288
- default : {
289
- scratch [2 ] = '0' ;
290
- scratch [3 ] = '0' ;
291
- scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
292
- scratch [5 ] = hexdig [ch & 0xf ];
293
- fbuffer_append (out_buffer , scratch , 6 );
294
- break ;
295
- }
296
- }
288
+ static inline void full_escape_UTF8_char (search_state * search , unsigned char ch_len ) {
289
+ const unsigned char ch = (unsigned char )* search -> ptr ;
290
+ switch (ch_len ) {
291
+ case 1 : {
292
+ switch (ch ) {
293
+ case '"' : fbuffer_append (search -> buffer , "\\\"" , 2 ); break ;
294
+ case '\\' : fbuffer_append (search -> buffer , "\\\\" , 2 ); break ;
295
+ case '/' : fbuffer_append (search -> buffer , "\\/" , 2 ); break ;
296
+ case '\b' : fbuffer_append (search -> buffer , "\\b" , 2 ); break ;
297
+ case '\f' : fbuffer_append (search -> buffer , "\\f" , 2 ); break ;
298
+ case '\n' : fbuffer_append (search -> buffer , "\\n" , 2 ); break ;
299
+ case '\r' : fbuffer_append (search -> buffer , "\\r" , 2 ); break ;
300
+ case '\t' : fbuffer_append (search -> buffer , "\\t" , 2 ); break ;
301
+ default : {
302
+ const char * hexdig = "0123456789abcdef" ;
303
+ char scratch [6 ] = { '\\' , 'u' , '0' , '0' , 0 , 0 };
304
+ scratch [4 ] = hexdig [(ch >> 4 ) & 0xf ];
305
+ scratch [5 ] = hexdig [ch & 0xf ];
306
+ fbuffer_append (search -> buffer , scratch , 6 );
297
307
break ;
298
308
}
299
- default : {
300
- uint32_t wchar = 0 ;
301
- ch_len = ch_len & CHAR_LENGTH_MASK ;
302
-
303
- switch (ch_len ) {
304
- case 2 :
305
- wchar = ptr [pos ] & 0x1F ;
306
- break ;
307
- case 3 :
308
- wchar = ptr [pos ] & 0x0F ;
309
- break ;
310
- case 4 :
311
- wchar = ptr [pos ] & 0x07 ;
312
- break ;
313
- }
309
+ }
310
+ break ;
311
+ }
312
+ default : {
313
+ const char * hexdig = "0123456789abcdef" ;
314
+ char scratch [12 ] = { '\\' , 'u' , 0 , 0 , 0 , 0 , '\\' , 'u' };
314
315
315
- for (short i = 1 ; i < ch_len ; i ++ ) {
316
- wchar = (wchar << 6 ) | (ptr [pos + i ] & 0x3F );
317
- }
316
+ uint32_t wchar = 0 ;
318
317
319
- FLUSH_POS (ch_len );
318
+ switch (ch_len ) {
319
+ case 2 :
320
+ wchar = ch & 0x1F ;
321
+ break ;
322
+ case 3 :
323
+ wchar = ch & 0x0F ;
324
+ break ;
325
+ case 4 :
326
+ wchar = ch & 0x07 ;
327
+ break ;
328
+ }
320
329
321
- if (wchar <= 0xFFFF ) {
322
- scratch [2 ] = hexdig [wchar >> 12 ];
323
- scratch [3 ] = hexdig [(wchar >> 8 ) & 0xf ];
324
- scratch [4 ] = hexdig [(wchar >> 4 ) & 0xf ];
325
- scratch [5 ] = hexdig [wchar & 0xf ];
326
- fbuffer_append (out_buffer , scratch , 6 );
327
- } else {
328
- uint16_t hi , lo ;
329
- wchar -= 0x10000 ;
330
- hi = 0xD800 + (uint16_t )(wchar >> 10 );
331
- lo = 0xDC00 + (uint16_t )(wchar & 0x3FF );
332
-
333
- scratch [2 ] = hexdig [hi >> 12 ];
334
- scratch [3 ] = hexdig [(hi >> 8 ) & 0xf ];
335
- scratch [4 ] = hexdig [(hi >> 4 ) & 0xf ];
336
- scratch [5 ] = hexdig [hi & 0xf ];
337
-
338
- scratch [8 ] = hexdig [lo >> 12 ];
339
- scratch [9 ] = hexdig [(lo >> 8 ) & 0xf ];
340
- scratch [10 ] = hexdig [(lo >> 4 ) & 0xf ];
341
- scratch [11 ] = hexdig [lo & 0xf ];
342
-
343
- fbuffer_append (out_buffer , scratch , 12 );
344
- }
330
+ for (short i = 1 ; i < ch_len ; i ++ ) {
331
+ wchar = (wchar << 6 ) | (search -> ptr [i ] & 0x3F );
332
+ }
345
333
346
- break ;
347
- }
334
+ if (wchar <= 0xFFFF ) {
335
+ scratch [2 ] = hexdig [wchar >> 12 ];
336
+ scratch [3 ] = hexdig [(wchar >> 8 ) & 0xf ];
337
+ scratch [4 ] = hexdig [(wchar >> 4 ) & 0xf ];
338
+ scratch [5 ] = hexdig [wchar & 0xf ];
339
+ fbuffer_append (search -> buffer , scratch , 6 );
340
+ } else {
341
+ uint16_t hi , lo ;
342
+ wchar -= 0x10000 ;
343
+ hi = 0xD800 + (uint16_t )(wchar >> 10 );
344
+ lo = 0xDC00 + (uint16_t )(wchar & 0x3FF );
345
+
346
+ scratch [2 ] = hexdig [hi >> 12 ];
347
+ scratch [3 ] = hexdig [(hi >> 8 ) & 0xf ];
348
+ scratch [4 ] = hexdig [(hi >> 4 ) & 0xf ];
349
+ scratch [5 ] = hexdig [hi & 0xf ];
350
+
351
+ scratch [8 ] = hexdig [lo >> 12 ];
352
+ scratch [9 ] = hexdig [(lo >> 8 ) & 0xf ];
353
+ scratch [10 ] = hexdig [(lo >> 4 ) & 0xf ];
354
+ scratch [11 ] = hexdig [lo & 0xf ];
355
+
356
+ fbuffer_append (search -> buffer , scratch , 12 );
348
357
}
349
- } else {
350
- pos ++ ;
358
+
359
+ break ;
351
360
}
352
361
}
353
- #undef FLUSH_POS
362
+ search -> cursor = (search -> ptr += ch_len );
363
+ }
354
364
355
- if (beg < len ) {
356
- fbuffer_append (out_buffer , & ptr [beg ], len - beg );
365
+ static void convert_UTF8_to_ASCII_only_JSON (search_state * search , const unsigned char escape_table [256 ])
366
+ {
367
+ unsigned char ch_len ;
368
+ while ((ch_len = search_ascii_only_escape (search , escape_table ))) {
369
+ full_escape_UTF8_char (search , ch_len );
357
370
}
358
-
359
- RB_GC_GUARD (str );
360
371
}
361
372
362
373
/*
@@ -911,13 +922,20 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat
911
922
912
923
fbuffer_append_char (buffer , '"' );
913
924
925
+ long len ;
926
+ search_state search ;
927
+ search .buffer = buffer ;
928
+ RSTRING_GETMEM (obj , search .ptr , len );
929
+ search .cursor = search .ptr ;
930
+ search .end = search .ptr + len ;
931
+
914
932
switch (rb_enc_str_coderange (obj )) {
915
933
case ENC_CODERANGE_7BIT :
916
934
case ENC_CODERANGE_VALID :
917
935
if (RB_UNLIKELY (state -> ascii_only )) {
918
- convert_UTF8_to_ASCII_only_JSON (buffer , obj , state -> script_safe ? script_safe_escape_table : ascii_only_escape_table );
936
+ convert_UTF8_to_ASCII_only_JSON (& search , state -> script_safe ? script_safe_escape_table : ascii_only_escape_table );
919
937
} else {
920
- convert_UTF8_to_JSON (buffer , obj , state -> script_safe ? script_safe_escape_table : escape_table );
938
+ convert_UTF8_to_JSON (& search , state -> script_safe ? script_safe_escape_table : escape_table );
921
939
}
922
940
break ;
923
941
default :
0 commit comments