Skip to content

Commit 5bfd6a2

Browse files
author
Mike Pigott
committed
Merge branch 'jdbc-to-arrow-config' into jdbc-column-metadata
2 parents b5b0cb1 + 68c91e7 commit 5bfd6a2

File tree

1 file changed

+55
-14
lines changed

1 file changed

+55
-14
lines changed

java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java

+55-14
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import java.util.List;
4141
import java.util.Map;
4242

43+
import org.apache.arrow.memory.RootAllocator;
4344
import org.apache.arrow.vector.BaseFixedWidthVector;
4445
import org.apache.arrow.vector.BigIntVector;
4546
import org.apache.arrow.vector.BitVector;
@@ -92,6 +93,21 @@ public class JdbcToArrowUtils {
9293
private static final int DEFAULT_STREAM_BUFFER_SIZE = 1024;
9394
private static final int DEFAULT_CLOB_SUBSTRING_READ_SIZE = 256;
9495

96+
/**
97+
* Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}.
98+
*
99+
* @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from.
100+
* @param calendar The calendar to use the time zone field of, to construct Timestamp fields from.
101+
* @return {@link Schema}
102+
* @throws SQLException on error
103+
*/
104+
public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) throws SQLException {
105+
Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null");
106+
Preconditions.checkNotNull(calendar, "Calendar object can't be null");
107+
108+
return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar));
109+
}
110+
95111
/**
96112
* Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}.
97113
*
@@ -122,15 +138,15 @@ public class JdbcToArrowUtils {
122138
* CLOB --> ArrowType.Utf8
123139
* BLOB --> ArrowType.Binary
124140
*
125-
* @param rsmd ResultSetMetaData
141+
* @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from.
142+
* @param config The configuration to use when constructing the schema.
126143
* @return {@link Schema}
127144
* @throws SQLException on error
128145
*/
129-
public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar, boolean includeMetadata)
130-
throws SQLException {
131-
146+
public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) throws SQLException {
132147
Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null");
133-
Preconditions.checkNotNull(calendar, "Calendar object can't be null");
148+
Preconditions.checkNotNull(config, "The configuration object must not be null");
149+
Preconditions.checkArgument(config.isValid(), "The configuration object must be valid");
134150

135151
List<Field> fields = new ArrayList<>();
136152
int columnCount = rsmd.getColumnCount();
@@ -139,7 +155,7 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar
139155
final FieldType fieldType;
140156

141157
final Map<String, String> metadata;
142-
if (includeMetadata) {
158+
if (config.includeMetadata()) {
143159
metadata = new HashMap<String, String>();
144160
metadata.put(JdbcToArrow.SQL_CATALOG_NAME_KEY, rsmd.getCatalogName(i));
145161
metadata.put(JdbcToArrow.SQL_TABLE_NAME_KEY, rsmd.getTableName(i));
@@ -196,8 +212,12 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar
196212
fieldType = new FieldType(true, new ArrowType.Time(TimeUnit.MILLISECOND, 32), null, metadata);
197213
break;
198214
case Types.TIMESTAMP:
199-
fieldType = new FieldType(true, new ArrowType.Timestamp(TimeUnit.MILLISECOND, calendar.getTimeZone().getID()),
200-
null, metadata);
215+
fieldType =
216+
new FieldType(
217+
true,
218+
new ArrowType.Timestamp(TimeUnit.MILLISECOND, config.getCalendar().getTimeZone().getID()),
219+
null,
220+
metadata);
201221
break;
202222
case Types.BINARY:
203223
case Types.VARBINARY:
@@ -239,17 +259,38 @@ private static void allocateVectors(VectorSchemaRoot root, int size) {
239259
* Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate
240260
* the given Arrow Vector objects.
241261
*
242-
* @param rs ResultSet to use to fetch the data from underlying database
243-
* @param root Arrow {@link VectorSchemaRoot} object to populate
262+
* @param rs ResultSet to use to fetch the data from underlying database
263+
* @param root Arrow {@link VectorSchemaRoot} object to populate
264+
* @param calendar The calendar to use when reading time-based data.
244265
* @throws SQLException on error
245266
*/
246267
public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calendar calendar)
247268
throws SQLException, IOException {
248269

249270
Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null");
250-
Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null");
271+
Preconditions.checkNotNull(root, "Vector Schema cannot be null");
251272
Preconditions.checkNotNull(calendar, "Calendar object can't be null");
252273

274+
jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar));
275+
}
276+
277+
/**
278+
* Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate
279+
* the given Arrow Vector objects.
280+
*
281+
* @param rs ResultSet to use to fetch the data from underlying database
282+
* @param root Arrow {@link VectorSchemaRoot} object to populate
283+
* @param config The configuration to use when reading the data.
284+
* @throws SQLException on error
285+
*/
286+
public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcToArrowConfig config)
287+
throws SQLException, IOException {
288+
289+
Preconditions.checkNotNull(rs, "JDBC ResultSet object can't be null");
290+
Preconditions.checkNotNull(root, "JDBC ResultSet object can't be null");
291+
Preconditions.checkNotNull(config, "JDBC-to-Arrow configuration cannot be null");
292+
Preconditions.checkArgument(config.isValid(), "JDBC-to-Arrow configuration must be valid");
293+
253294
ResultSetMetaData rsmd = rs.getMetaData();
254295
int columnCount = rsmd.getColumnCount();
255296

@@ -306,16 +347,16 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen
306347
break;
307348
case Types.DATE:
308349
updateVector((DateMilliVector) root.getVector(columnName),
309-
rs.getDate(i, calendar), !rs.wasNull(), rowCount);
350+
rs.getDate(i, config.getCalendar()), !rs.wasNull(), rowCount);
310351
break;
311352
case Types.TIME:
312353
updateVector((TimeMilliVector) root.getVector(columnName),
313-
rs.getTime(i, calendar), !rs.wasNull(), rowCount);
354+
rs.getTime(i, config.getCalendar()), !rs.wasNull(), rowCount);
314355
break;
315356
case Types.TIMESTAMP:
316357
// TODO: Need to handle precision such as milli, micro, nano
317358
updateVector((TimeStampVector) root.getVector(columnName),
318-
rs.getTimestamp(i, calendar), !rs.wasNull(), rowCount);
359+
rs.getTimestamp(i, config.getCalendar()), !rs.wasNull(), rowCount);
319360
break;
320361
case Types.BINARY:
321362
case Types.VARBINARY:

0 commit comments

Comments
 (0)