40
40
import java .util .List ;
41
41
import java .util .Map ;
42
42
43
+ import org .apache .arrow .memory .RootAllocator ;
43
44
import org .apache .arrow .vector .BaseFixedWidthVector ;
44
45
import org .apache .arrow .vector .BigIntVector ;
45
46
import org .apache .arrow .vector .BitVector ;
@@ -92,6 +93,21 @@ public class JdbcToArrowUtils {
92
93
private static final int DEFAULT_STREAM_BUFFER_SIZE = 1024 ;
93
94
private static final int DEFAULT_CLOB_SUBSTRING_READ_SIZE = 256 ;
94
95
96
+ /**
97
+ * Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}.
98
+ *
99
+ * @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from.
100
+ * @param calendar The calendar to use the time zone field of, to construct Timestamp fields from.
101
+ * @return {@link Schema}
102
+ * @throws SQLException on error
103
+ */
104
+ public static Schema jdbcToArrowSchema (ResultSetMetaData rsmd , Calendar calendar ) throws SQLException {
105
+ Preconditions .checkNotNull (rsmd , "JDBC ResultSetMetaData object can't be null" );
106
+ Preconditions .checkNotNull (calendar , "Calendar object can't be null" );
107
+
108
+ return jdbcToArrowSchema (rsmd , new JdbcToArrowConfig (new RootAllocator (0 ), calendar ));
109
+ }
110
+
95
111
/**
96
112
* Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}.
97
113
*
@@ -122,15 +138,15 @@ public class JdbcToArrowUtils {
122
138
* CLOB --> ArrowType.Utf8
123
139
* BLOB --> ArrowType.Binary
124
140
*
125
- * @param rsmd ResultSetMetaData
141
+ * @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from.
142
+ * @param config The configuration to use when constructing the schema.
126
143
* @return {@link Schema}
127
144
* @throws SQLException on error
128
145
*/
129
- public static Schema jdbcToArrowSchema (ResultSetMetaData rsmd , Calendar calendar , boolean includeMetadata )
130
- throws SQLException {
131
-
146
+ public static Schema jdbcToArrowSchema (ResultSetMetaData rsmd , JdbcToArrowConfig config ) throws SQLException {
132
147
Preconditions .checkNotNull (rsmd , "JDBC ResultSetMetaData object can't be null" );
133
- Preconditions .checkNotNull (calendar , "Calendar object can't be null" );
148
+ Preconditions .checkNotNull (config , "The configuration object must not be null" );
149
+ Preconditions .checkArgument (config .isValid (), "The configuration object must be valid" );
134
150
135
151
List <Field > fields = new ArrayList <>();
136
152
int columnCount = rsmd .getColumnCount ();
@@ -139,7 +155,7 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar
139
155
final FieldType fieldType ;
140
156
141
157
final Map <String , String > metadata ;
142
- if (includeMetadata ) {
158
+ if (config . includeMetadata () ) {
143
159
metadata = new HashMap <String , String >();
144
160
metadata .put (JdbcToArrow .SQL_CATALOG_NAME_KEY , rsmd .getCatalogName (i ));
145
161
metadata .put (JdbcToArrow .SQL_TABLE_NAME_KEY , rsmd .getTableName (i ));
@@ -196,8 +212,12 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar
196
212
fieldType = new FieldType (true , new ArrowType .Time (TimeUnit .MILLISECOND , 32 ), null , metadata );
197
213
break ;
198
214
case Types .TIMESTAMP :
199
- fieldType = new FieldType (true , new ArrowType .Timestamp (TimeUnit .MILLISECOND , calendar .getTimeZone ().getID ()),
200
- null , metadata );
215
+ fieldType =
216
+ new FieldType (
217
+ true ,
218
+ new ArrowType .Timestamp (TimeUnit .MILLISECOND , config .getCalendar ().getTimeZone ().getID ()),
219
+ null ,
220
+ metadata );
201
221
break ;
202
222
case Types .BINARY :
203
223
case Types .VARBINARY :
@@ -239,17 +259,38 @@ private static void allocateVectors(VectorSchemaRoot root, int size) {
239
259
* Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate
240
260
* the given Arrow Vector objects.
241
261
*
242
- * @param rs ResultSet to use to fetch the data from underlying database
243
- * @param root Arrow {@link VectorSchemaRoot} object to populate
262
+ * @param rs ResultSet to use to fetch the data from underlying database
263
+ * @param root Arrow {@link VectorSchemaRoot} object to populate
264
+ * @param calendar The calendar to use when reading time-based data.
244
265
* @throws SQLException on error
245
266
*/
246
267
public static void jdbcToArrowVectors (ResultSet rs , VectorSchemaRoot root , Calendar calendar )
247
268
throws SQLException , IOException {
248
269
249
270
Preconditions .checkNotNull (rs , "JDBC ResultSet object can't be null" );
250
- Preconditions .checkNotNull (root , "JDBC ResultSet object can't be null" );
271
+ Preconditions .checkNotNull (root , "Vector Schema cannot be null" );
251
272
Preconditions .checkNotNull (calendar , "Calendar object can't be null" );
252
273
274
+ jdbcToArrowVectors (rs , root , new JdbcToArrowConfig (new RootAllocator (0 ), calendar ));
275
+ }
276
+
277
+ /**
278
+ * Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate
279
+ * the given Arrow Vector objects.
280
+ *
281
+ * @param rs ResultSet to use to fetch the data from underlying database
282
+ * @param root Arrow {@link VectorSchemaRoot} object to populate
283
+ * @param config The configuration to use when reading the data.
284
+ * @throws SQLException on error
285
+ */
286
+ public static void jdbcToArrowVectors (ResultSet rs , VectorSchemaRoot root , JdbcToArrowConfig config )
287
+ throws SQLException , IOException {
288
+
289
+ Preconditions .checkNotNull (rs , "JDBC ResultSet object can't be null" );
290
+ Preconditions .checkNotNull (root , "JDBC ResultSet object can't be null" );
291
+ Preconditions .checkNotNull (config , "JDBC-to-Arrow configuration cannot be null" );
292
+ Preconditions .checkArgument (config .isValid (), "JDBC-to-Arrow configuration must be valid" );
293
+
253
294
ResultSetMetaData rsmd = rs .getMetaData ();
254
295
int columnCount = rsmd .getColumnCount ();
255
296
@@ -306,16 +347,16 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen
306
347
break ;
307
348
case Types .DATE :
308
349
updateVector ((DateMilliVector ) root .getVector (columnName ),
309
- rs .getDate (i , calendar ), !rs .wasNull (), rowCount );
350
+ rs .getDate (i , config . getCalendar () ), !rs .wasNull (), rowCount );
310
351
break ;
311
352
case Types .TIME :
312
353
updateVector ((TimeMilliVector ) root .getVector (columnName ),
313
- rs .getTime (i , calendar ), !rs .wasNull (), rowCount );
354
+ rs .getTime (i , config . getCalendar () ), !rs .wasNull (), rowCount );
314
355
break ;
315
356
case Types .TIMESTAMP :
316
357
// TODO: Need to handle precision such as milli, micro, nano
317
358
updateVector ((TimeStampVector ) root .getVector (columnName ),
318
- rs .getTimestamp (i , calendar ), !rs .wasNull (), rowCount );
359
+ rs .getTimestamp (i , config . getCalendar () ), !rs .wasNull (), rowCount );
319
360
break ;
320
361
case Types .BINARY :
321
362
case Types .VARBINARY :
0 commit comments