-
Notifications
You must be signed in to change notification settings - Fork 113
/
Copy pathIonReaderTextUserX.java
426 lines (376 loc) · 14.4 KB
/
IonReaderTextUserX.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package com.amazon.ion.impl;
import static com.amazon.ion.SystemSymbols.ION_1_0;
import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE;
import com.amazon.ion.IonCatalog;
import com.amazon.ion.IonType;
import com.amazon.ion.OffsetSpan;
import com.amazon.ion.SeekableReader;
import com.amazon.ion.Span;
import com.amazon.ion.SpanProvider;
import com.amazon.ion.SymbolTable;
import com.amazon.ion.SymbolToken;
import com.amazon.ion.TextSpan;
import com.amazon.ion.UnknownSymbolException;
import com.amazon.ion.UnsupportedIonVersionException;
import java.util.regex.Pattern;
/**
* The text user reader add support for symbols and recognizes,
* and consumes (and processes), the system values $ion_1_0 and
* local symbol tables (tagged with $ion_symbol_table).
*
* Should this materialize and "symbolate" all the symbol
* values as they come through? - No.
*
* Probably if we want the symbol id's to be the same for this
* reader as it is for other variants. Hmmm, that's expensive
* when you don't need it (which is most of the time).
*
* This will not auto-populate a symbol table. In the event
* a symbol is a '$<digits>' symbol id symbol this will return
* that value. If the string is present in the current symbol
* table it will return the id, which would be true if the
* symbol is a system symbol or if there is a local symbol
* table in the input stream. Otherwise it return the
* undefined symbol value.
*
*/
class IonReaderTextUserX
extends IonReaderTextSystemX
implements _Private_ReaderWriter
{
private static final Pattern ION_VERSION_MARKER_REGEX = Pattern.compile("^\\$ion_[0-9]+_[0-9]+$");
/**
* This is the physical start-of-stream offset when this reader was created.
* It must be subtracted from the logical offsets exposed by
* {@link OffsetSpan}s.
*/
private final int _physical_start_offset;
private final _Private_LocalSymbolTableFactory _lstFactory;
// IonSystem _system; now in IonReaderTextSystemX where it could be null
IonCatalog _catalog;
SymbolTable _symbols;
protected IonReaderTextUserX(IonCatalog catalog,
_Private_LocalSymbolTableFactory lstFactory,
UnifiedInputStreamX uis,
int physicalStartOffset)
{
super(uis);
_symbols = _system_symtab;
_physical_start_offset = physicalStartOffset;
_catalog = catalog;
_lstFactory = lstFactory;
}
protected IonReaderTextUserX(IonCatalog catalog,
_Private_LocalSymbolTableFactory lstFactory,
UnifiedInputStreamX uis) {
this(catalog, lstFactory, uis, 0);
}
/**
* this looks forward to see if there is an upcoming value
* and if there is it returns true. It may have to clean up
* any value that's partially complete (for example a
* collection whose annotation has been read and loaded
* but the use has chosen not to step into the collection).
*
* The user reader variant of hasNext also looks for system
* values to process. System values are the Ion version
* marker ($ion_1_0) and local symbol tables. If either of
* these is encountered the symbol table processing will be
* handled and the value will be "skipped".
*
* @return true if more data remains, false on eof
*/
@Override
public boolean hasNext()
{
boolean has_next = has_next_user_value();
return has_next;
}
private final boolean has_next_user_value()
{
// clear out our previous value
clear_system_value_stack();
// changed to 'while' since consumed
// values will not be counted
while (!_has_next_called)
{
// first move to the next value regardless of whether
// it's a system value or a user value
has_next_raw_value();
// system values are only at the datagram level
// we don't care about them if they're buried
// down in some other value - note that _value_type
// will be null at eof and on as yet undetermined
// numeric types (which are never system values)
if (_value_type != null && !isNullValue() && IonType.DATAGRAM.equals(getContainerType())) {
switch (_value_type) {
case STRUCT:
if (_annotation_count > 0 && ION_SYMBOL_TABLE.equals(_annotations[0].getText())) {
_symbols = _lstFactory.newLocalSymtab(_catalog,
this,
true);
push_symbol_table(_symbols);
_has_next_called = false;
}
break;
case SYMBOL:
if (_annotation_count == 0)
{
// $ion_1_0 is read as an IVM only if it is not annotated
String version = symbolValue().getText();
if (isIonVersionMarker(version))
{
if (ION_1_0.equals(version))
{
if (_value_keyword != IonTokenConstsX.KEYWORD_sid)
{
symbol_table_reset();
push_symbol_table(_system_symtab);
}
_has_next_called = false;
}
else
{
throw new UnsupportedIonVersionException(version);
}
}
}
break;
default:
break;
}
}
}
return (!_eof);
}
private static boolean isIonVersionMarker(String text)
{
return text != null && ION_VERSION_MARKER_REGEX.matcher(text).matches();
}
private final void symbol_table_reset()
{
IonType t = next();
assert( IonType.SYMBOL.equals(t) );
_symbols = _system_symtab;
return;
}
private void validateSymbolToken(SymbolToken symbol) {
if (symbol != null) {
if (symbol.getText() == null && symbol.getSid() > getSymbolTable().getMaxId()) {
throw new UnknownSymbolException(symbol.getSid());
}
}
}
@Override
public SymbolToken[] getTypeAnnotationSymbols() {
SymbolToken[] annotations = super.getTypeAnnotationSymbols();
for (SymbolToken annotation : annotations) {
validateSymbolToken(annotation);
}
return annotations;
}
@Override
public final SymbolToken getFieldNameSymbol() {
SymbolToken fieldName = super.getFieldNameSymbol();
validateSymbolToken(fieldName);
return fieldName;
}
@Override
public final SymbolToken symbolValue() {
SymbolToken symbol = super.symbolValue();
validateSymbolToken(symbol);
return symbol;
}
@Override
public SymbolTable getSymbolTable()
{
return _symbols;
}
//
// This code handles the skipped symbol table
// support - it is cloned in IonReaderTreeUserX
// and IonReaderBinaryUserX
//
// SO ANY FIXES HERE WILL BE NEEDED IN THOSE
// TWO LOCATIONS AS WELL.
//
private int _symbol_table_top = 0;
private SymbolTable[] _symbol_table_stack = new SymbolTable[3]; // 3 is rare, IVM followed by a local sym tab with open content
private void clear_system_value_stack()
{
while (_symbol_table_top > 0) {
_symbol_table_top--;
_symbol_table_stack[_symbol_table_top] = null;
}
}
private void push_symbol_table(SymbolTable symbols)
{
assert(symbols != null);
if (_symbol_table_top >= _symbol_table_stack.length) {
int new_len = _symbol_table_stack.length * 2;
SymbolTable[] temp = new SymbolTable[new_len];
System.arraycopy(_symbol_table_stack, 0, temp, 0, _symbol_table_stack.length);
_symbol_table_stack = temp;
}
_symbol_table_stack[_symbol_table_top++] = symbols;
}
@Override
public SymbolTable pop_passed_symbol_table()
{
if (_symbol_table_top <= 0) {
return null;
}
_symbol_table_top--;
SymbolTable symbols = _symbol_table_stack[_symbol_table_top];
_symbol_table_stack[_symbol_table_top] = null;
return symbols;
}
private static final class IonReaderTextSpan
extends DowncastingFaceted
implements Span, TextSpan, OffsetSpan
{
private final UnifiedDataPageX _data_page;
private final SymbolTable _symbols;
private final IonType _container_type;
private final long _start_offset;
private final long _start_line;
private final long _start_column;
IonReaderTextSpan(IonReaderTextUserX reader)
{
// TODO: convert _start_char_offset from a long and data page
// to be an abstract reference into the Unified* data source
UnifiedInputStreamX current_stream = reader._scanner.getSourceStream();
//
// TODO: this page isn't safe, except where we have only a single
// page of buffered input Which is the case for the time
// being. Later, when this is stream aware, this needs to change.
_data_page = current_stream._buffer.getCurrentPage();
_symbols = reader.getSymbolTable();
_container_type = reader.getContainerType();
_start_offset = reader._value_start_offset - reader._physical_start_offset;
_start_line = reader._value_start_line;
_start_column = reader._value_start_column;
}
public long getStartLine()
{
if (_start_line < 1) {
throw new IllegalStateException("not positioned on a reader");
}
return _start_line;
}
public long getStartColumn()
{
if (_start_column < 0) {
throw new IllegalStateException("not positioned on a reader");
}
return _start_column;
}
public long getFinishLine()
{
return -1;
}
public long getFinishColumn()
{
return -1;
}
public long getStartOffset()
{
return _start_offset;
}
public long getFinishOffset()
{
return -1;
}
IonType getContainerType() {
return _container_type;
}
UnifiedDataPageX getDataPage() {
return _data_page;
}
}
public Span currentSpanImpl()
{
if (getType() == null) {
throw new IllegalStateException("must be on a value");
}
IonReaderTextSpan pos = new IonReaderTextSpan(this);
return pos;
}
private void hoistImpl(Span span)
{
if (!(span instanceof IonReaderTextSpan)) {
throw new IllegalArgumentException("position must match the reader");
}
IonReaderTextSpan text_span = (IonReaderTextSpan)span;
UnifiedInputStreamX current_stream = _scanner.getSourceStream();
UnifiedDataPageX curr_page = text_span.getDataPage();
int array_offset = (int)text_span._start_offset + _physical_start_offset;
int page_limit = curr_page._page_limit;
int array_length = page_limit - array_offset;
// we're going to cast this value down. Since we only support
// in memory single buffered chars here this is ok.
assert(text_span.getStartOffset() <= Integer.MAX_VALUE);
// Now - create a new stream
// TODO: this is a pretty expensive way to do this. UnifiedInputStreamX
// needs to have a reset method added that can reset the position
// and length of the input to be some subset of the original source.
// This would avoid a lot of object creation (and wasted destruction.
// But this is a time-to-market solution here. The change can be
// made as support for streams is added.
UnifiedInputStreamX iis;
if (current_stream._is_byte_data) {
byte[] bytes = current_stream.getByteArray();
assert(bytes != null);
iis = UnifiedInputStreamX.makeStream(
bytes
, array_offset
, array_length
);
}
else {
char[] chars = current_stream.getCharArray();
assert(chars != null);
iis = UnifiedInputStreamX.makeStream(
chars
, array_offset
, array_length
);
}
IonType container = text_span.getContainerType();
re_init(iis, container, text_span._start_line, text_span._start_column);
_symbols = text_span._symbols;
}
//========================================================================
@Override
public <T> T asFacet(Class<T> facetType)
{
if (facetType == SpanProvider.class)
{
return facetType.cast(new SpanProviderFacet());
}
if (facetType == SeekableReader.class && _scanner.isBufferedInput())
{
return facetType.cast(new SeekableReaderFacet());
}
return super.asFacet(facetType);
}
private class SpanProviderFacet
implements SpanProvider
{
public Span currentSpan()
{
return currentSpanImpl();
}
}
private final class SeekableReaderFacet
extends SpanProviderFacet
implements SeekableReader
{
public void hoist(Span span)
{
hoistImpl(span);
}
}
}