Skip to content

Commit 9e1897f

Browse files
styling etc ...
1 parent f66fa80 commit 9e1897f

10 files changed

+115
-47
lines changed

r/DESCRIPTION

+1
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ Collate:
6161
'memory_pool.R'
6262
'message.R'
6363
'on_exit.R'
64+
'parquet.R'
6465
'read_record_batch.R'
6566
'read_table.R'
6667
'reexports-bit64.R'

r/NAMESPACE

+1
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ export(print.integer64)
113113
export(read_arrow)
114114
export(read_feather)
115115
export(read_message)
116+
export(read_parquet)
116117
export(read_record_batch)
117118
export(read_schema)
118119
export(read_table)

r/R/RcppExports.R

+4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/R/parquet.R

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
#' Read parquet file from disk
19+
#'
20+
#' @param files a vector of filenames
21+
#'
22+
#' @importFrom purrr map_dfr
23+
#'
24+
#' @export
25+
read_parquet <- function(files) {
26+
map_dfr(files, ~as_tibble(shared_ptr(`arrow::Table`, read_parquet_file(f))))
27+
}

r/README.Rmd

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ git clone https://github.com/apache/arrow.git
2525
cd arrow/cpp && mkdir release && cd release
2626

2727
# It is important to statically link to boost libraries
28-
cmake .. -DCMAKE_BUILD_TYPE=Release -DARROW_BOOST_USE_SHARED:BOOL=Off
28+
cmake .. -DARROW_PARQUET=ON -DCMAKE_BUILD_TYPE=Release -DARROW_BOOST_USE_SHARED:BOOL=Off
2929
make install
3030
```
3131

r/README.md

+16-45
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ git clone https://github.com/apache/arrow.git
1414
cd arrow/cpp && mkdir release && cd release
1515

1616
# It is important to statically link to boost libraries
17-
cmake .. -DCMAKE_BUILD_TYPE=Release -DARROW_BOOST_USE_SHARED:BOOL=Off
17+
cmake .. -DARROW_PARQUET=ON -DCMAKE_BUILD_TYPE=Release -DARROW_BOOST_USE_SHARED:BOOL=Off
1818
make install
1919
```
2020

@@ -38,48 +38,19 @@ tf <- tempfile()
3838
#> # A tibble: 10 x 2
3939
#> x y
4040
#> <int> <dbl>
41-
#> 1 1 -0.255
42-
#> 2 2 -0.162
43-
#> 3 3 -0.614
44-
#> 4 4 -0.322
45-
#> 5 5 0.0693
46-
#> 6 6 -0.920
47-
#> 7 7 -1.08
48-
#> 8 8 0.658
49-
#> 9 9 0.821
50-
#> 10 10 0.539
51-
arrow::write_arrow(tib, tf)
52-
53-
# read it back with pyarrow
54-
pa <- import("pyarrow")
55-
as_tibble(pa$open_file(tf)$read_pandas())
56-
#> # A tibble: 10 x 2
57-
#> x y
58-
#> <int> <dbl>
59-
#> 1 1 -0.255
60-
#> 2 2 -0.162
61-
#> 3 3 -0.614
62-
#> 4 4 -0.322
63-
#> 5 5 0.0693
64-
#> 6 6 -0.920
65-
#> 7 7 -1.08
66-
#> 8 8 0.658
67-
#> 9 9 0.821
68-
#> 10 10 0.539
69-
```
70-
71-
## Development
72-
73-
### Code style
74-
75-
We use Google C++ style in our C++ code. Check for style errors with
76-
77-
```
78-
./lint.sh
79-
```
80-
81-
You can fix the style issues with
82-
41+
#> 1 1 0.0855
42+
#> 2 2 -1.68
43+
#> 3 3 -0.0294
44+
#> 4 4 -0.124
45+
#> 5 5 0.0675
46+
#> 6 6 1.64
47+
#> 7 7 1.54
48+
#> 8 8 -0.0209
49+
#> 9 9 -0.982
50+
#> 10 10 0.349
51+
# arrow::write_arrow(tib, tf)
52+
53+
# # read it back with pyarrow
54+
# pa <- import("pyarrow")
55+
# as_tibble(pa$open_file(tf)$read_pandas())
8356
```
84-
./lint.sh --fix
85-
```

r/configure

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
# R CMD INSTALL --configure-vars='INCLUDE_DIR=/.../include LIB_DIR=/.../lib'
2727

2828
# Library settings
29-
PKG_CONFIG_NAME="arrow"
29+
PKG_CONFIG_NAME="arrow parquet"
3030
PKG_DEB_NAME="arrow"
3131
PKG_RPM_NAME="arrow"
3232
PKG_CSW_NAME="arrow"

r/man/read_parquet.Rd

+14
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/src/RcppExports.cpp

+12
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/src/parquetfilereader.cpp

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// // Licensed to the Apache Software Foundation (ASF) under one
2+
// // or more contributor license agreements. See the NOTICE file
3+
// // distributed with this work for additional information
4+
// // regarding copyright ownership. The ASF licenses this file
5+
// // to you under the Apache License, Version 2.0 (the
6+
// // "License"); you may not use this file except in compliance
7+
// // with the License. You may obtain a copy of the License at
8+
// //
9+
// // http://www.apache.org/licenses/LICENSE-2.0
10+
// //
11+
// // Unless required by applicable law or agreed to in writing,
12+
// // software distributed under the License is distributed on an
13+
// // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// // KIND, either express or implied. See the License for the
15+
// // specific language governing permissions and limitations
16+
// // under the License.
17+
//
18+
//
19+
#include <arrow/api.h>
20+
#include <arrow/io/api.h>
21+
#include <parquet/arrow/reader.h>
22+
#include <parquet/arrow/writer.h>
23+
#include <parquet/exception.h>
24+
25+
// [[Rcpp::export]]
26+
std::shared_ptr<arrow::Table> read_parquet_file(std::string filename) {
27+
std::shared_ptr<arrow::io::ReadableFile> infile;
28+
PARQUET_THROW_NOT_OK(
29+
arrow::io::ReadableFile::Open(filename, arrow::default_memory_pool(), &infile));
30+
31+
std::unique_ptr<parquet::arrow::FileReader> reader;
32+
PARQUET_THROW_NOT_OK(
33+
parquet::arrow::OpenFile(infile, arrow::default_memory_pool(), &reader));
34+
std::shared_ptr<arrow::Table> table;
35+
PARQUET_THROW_NOT_OK(reader->ReadTable(&table));
36+
37+
return table;
38+
}

0 commit comments

Comments
 (0)