Skip to content

Commit 5a5b70e

Browse files
authored
Merge pull request #4 from apache/master
Syncing from original
2 parents 8e4c892 + bc9f9e5 commit 5a5b70e

File tree

11 files changed

+161
-50
lines changed

11 files changed

+161
-50
lines changed

c_glib/arrow-glib/Makefile.am

+6-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
# under the License.
1717

1818
CLEANFILES =
19+
DISTCLEANFILES =
1920

2021
EXTRA_DIST = \
2122
meson.build
@@ -169,6 +170,10 @@ BUILT_SOURCES = \
169170
stamp-enums.c \
170171
stamp-enums.h
171172

173+
DISTCLEANFILES += \
174+
stamp-enums.c \
175+
stamp-enums.h
176+
172177
EXTRA_DIST += \
173178
enums.c.template \
174179
enums.h.template
@@ -214,7 +219,7 @@ INTROSPECTION_SCANNER_ARGS =
214219
INTROSPECTION_SCANNER_ENV =
215220
if USE_ARROW_BUILD_DIR
216221
INTROSPECTION_SCANNER_ENV += \
217-
LD_LIBRARY_PATH=$(ARROW_LIB_DIR):$${PKG_CONFIG_PATH}
222+
LD_LIBRARY_PATH=$(ARROW_LIB_DIR):$${LD_LIBRARY_PATH}
218223
endif
219224
if OS_MACOS
220225
INTROSPECTION_SCANNER_ENV += \

cpp/apidoc/HDFS.md

+4
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ export CLASSPATH=`$HADOOP_HOME/bin/hadoop classpath --glob`
5050
* `ARROW_LIBHDFS_DIR` (optional): explicit location of `libhdfs.so` if it is
5151
installed somewhere other than `$HADOOP_HOME/lib/native`.
5252

53+
To accommodate distribution-specific nuances, the `JAVA_HOME` variable may be
54+
set to the root path for the Java SDK, the JRE path itself, or to the directory
55+
containing the `libjvm` library.
56+
5357
### Mac Specifics
5458

5559
The installed location of Java on OS X can vary, however the following snippet

cpp/cmake_modules/FindParquet.cmake

+16-2
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,22 @@ if(PARQUET_HOME)
6060
PATHS ${PARQUET_HOME} NO_DEFAULT_PATH
6161
PATH_SUFFIXES "lib")
6262
get_filename_component(PARQUET_LIBS ${PARQUET_LIBRARIES} PATH )
63-
set(PARQUET_ABI_VERSION "1.0.0")
64-
set(PARQUET_SO_VERSION "1")
63+
64+
# Try to autodiscover the Parquet ABI version
65+
get_filename_component(PARQUET_LIB_REALPATH ${PARQUET_LIBRARIES} REALPATH)
66+
get_filename_component(PARQUET_EXT_REALPATH ${PARQUET_LIB_REALPATH} EXT)
67+
string(REGEX MATCH ".([0-9]+.[0-9]+.[0-9]+)" HAS_ABI_VERSION ${PARQUET_EXT_REALPATH})
68+
if (HAS_ABI_VERSION)
69+
if (APPLE)
70+
string(REGEX REPLACE ".([0-9]+.[0-9]+.[0-9]+).dylib" "\\1" PARQUET_ABI_VERSION ${PARQUET_EXT_REALPATH})
71+
else()
72+
string(REGEX REPLACE ".so.([0-9]+.[0-9]+.[0-9]+)" "\\1" PARQUET_ABI_VERSION ${PARQUET_EXT_REALPATH})
73+
endif()
74+
string(REGEX REPLACE "([0-9]+).[0-9]+.[0-9]+" "\\1" PARQUET_SO_VERSION ${PARQUET_ABI_VERSION})
75+
else()
76+
set(PARQUET_ABI_VERSION "1.0.0")
77+
set(PARQUET_SO_VERSION "1")
78+
endif()
6579
else()
6680
pkg_check_modules(PARQUET parquet)
6781
if (PARQUET_FOUND)

cpp/src/arrow/buffer-test.cc

+25
Original file line numberDiff line numberDiff line change
@@ -194,4 +194,29 @@ TEST(TestBuffer, SliceMutableBuffer) {
194194
ASSERT_TRUE(slice->Equals(expected));
195195
}
196196

197+
TEST(TestBufferBuilder, ResizeReserve) {
198+
const std::string data = "some data";
199+
auto data_ptr = data.c_str();
200+
201+
BufferBuilder builder;
202+
203+
ASSERT_OK(builder.Append(data_ptr, 9));
204+
ASSERT_EQ(9, builder.length());
205+
206+
ASSERT_OK(builder.Resize(128));
207+
ASSERT_EQ(128, builder.capacity());
208+
209+
// Do not shrink to fit
210+
ASSERT_OK(builder.Resize(64, false));
211+
ASSERT_EQ(128, builder.capacity());
212+
213+
// Shrink to fit
214+
ASSERT_OK(builder.Resize(64));
215+
ASSERT_EQ(64, builder.capacity());
216+
217+
// Reserve elements
218+
ASSERT_OK(builder.Reserve(60));
219+
ASSERT_EQ(128, builder.capacity());
220+
}
221+
197222
} // namespace arrow

cpp/src/arrow/buffer.h

+30-11
Original file line numberDiff line numberDiff line change
@@ -25,20 +25,20 @@
2525
#include <string>
2626
#include <type_traits>
2727

28+
#include "arrow/memory_pool.h"
2829
#include "arrow/status.h"
2930
#include "arrow/util/bit-util.h"
3031
#include "arrow/util/macros.h"
3132
#include "arrow/util/visibility.h"
3233

3334
namespace arrow {
3435

35-
class MemoryPool;
36-
3736
// ----------------------------------------------------------------------
3837
// Buffer classes
3938

40-
/// Immutable API for a chunk of bytes which may or may not be owned by the
41-
/// class instance.
39+
/// \class Buffer
40+
/// \brief Object containing a pointer to a piece of contiguous memory with a
41+
/// particular size. Base class does not own its memory
4242
///
4343
/// Buffers have two related notions of length: size and capacity. Size is
4444
/// the number of bytes that might have valid data. Capacity is the number
@@ -133,7 +133,8 @@ ARROW_EXPORT
133133
std::shared_ptr<Buffer> SliceMutableBuffer(const std::shared_ptr<Buffer>& buffer,
134134
const int64_t offset, const int64_t length);
135135

136-
/// A Buffer whose contents can be mutated. May or may not own its data.
136+
/// \class MutableBuffer
137+
/// \brief A Buffer whose contents can be mutated. May or may not own its data.
137138
class ARROW_EXPORT MutableBuffer : public Buffer {
138139
public:
139140
MutableBuffer(uint8_t* data, const int64_t size) : Buffer(data, size) {
@@ -148,6 +149,8 @@ class ARROW_EXPORT MutableBuffer : public Buffer {
148149
MutableBuffer() : Buffer(NULLPTR, 0) {}
149150
};
150151

152+
/// \class ResizableBuffer
153+
/// \brief A mutable buffer that can be resized
151154
class ARROW_EXPORT ResizableBuffer : public MutableBuffer {
152155
public:
153156
/// Change buffer reported size to indicated size, allocating memory if
@@ -190,13 +193,22 @@ class ARROW_EXPORT PoolBuffer : public ResizableBuffer {
190193
MemoryPool* pool_;
191194
};
192195

196+
/// \class BufferBuilder
197+
/// \brief A class for incrementally building a contiguous chunk of in-memory data
193198
class ARROW_EXPORT BufferBuilder {
194199
public:
195-
explicit BufferBuilder(MemoryPool* pool)
200+
explicit BufferBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
196201
: pool_(pool), data_(NULLPTR), capacity_(0), size_(0) {}
197202

198-
/// Resizes the buffer to the nearest multiple of 64 bytes per Layout.md
199-
Status Resize(const int64_t elements) {
203+
/// \brief Resizes the buffer to the nearest multiple of 64 bytes
204+
///
205+
/// \param elements the new capacity of the of the builder. Will be rounded
206+
/// up to a multiple of 64 bytes for padding
207+
/// \param shrink_to_fit if new capacity smaller than existing size,
208+
/// reallocate internal buffer. Set to false to avoid reallocations when
209+
/// shrinking the builder
210+
/// \return Status
211+
Status Resize(const int64_t elements, bool shrink_to_fit = true) {
200212
// Resize(0) is a no-op
201213
if (elements == 0) {
202214
return Status::OK();
@@ -205,7 +217,7 @@ class ARROW_EXPORT BufferBuilder {
205217
buffer_ = std::make_shared<PoolBuffer>(pool_);
206218
}
207219
int64_t old_capacity = capacity_;
208-
RETURN_NOT_OK(buffer_->Resize(elements));
220+
RETURN_NOT_OK(buffer_->Resize(elements, shrink_to_fit));
209221
capacity_ = buffer_->capacity();
210222
data_ = buffer_->mutable_data();
211223
if (capacity_ > old_capacity) {
@@ -214,7 +226,14 @@ class ARROW_EXPORT BufferBuilder {
214226
return Status::OK();
215227
}
216228

217-
Status Append(const uint8_t* data, int64_t length) {
229+
/// \brief Ensure that builder can accommodate the additional number of bytes
230+
/// without the need to perform allocations
231+
///
232+
/// \param size number of additional bytes to make space for
233+
/// \return Status
234+
Status Reserve(const int64_t size) { return Resize(size_ + size, false); }
235+
236+
Status Append(const void* data, int64_t length) {
218237
if (capacity_ < length + size_) {
219238
int64_t new_capacity = BitUtil::NextPower2(length + size_);
220239
RETURN_NOT_OK(Resize(new_capacity));
@@ -248,7 +267,7 @@ class ARROW_EXPORT BufferBuilder {
248267
}
249268

250269
// Unsafe methods don't check existing size
251-
void UnsafeAppend(const uint8_t* data, int64_t length) {
270+
void UnsafeAppend(const void* data, int64_t length) {
252271
memcpy(data_ + size_, data, static_cast<size_t>(length));
253272
size_ += length;
254273
}

cpp/src/arrow/io/hdfs-internal.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ static std::vector<fs::path> get_potential_libjvm_paths() {
147147
file_name = "jvm.dll";
148148
#elif __APPLE__
149149
search_prefixes = {""};
150-
search_suffixes = {"", "/jre/lib/server"};
150+
search_suffixes = {"", "/jre/lib/server", "/lib/server"};
151151
file_name = "libjvm.dylib";
152152

153153
// SFrame uses /usr/libexec/java_home to find JAVA_HOME; for now we are
@@ -175,7 +175,7 @@ static std::vector<fs::path> get_potential_libjvm_paths() {
175175
"/usr/lib/jvm/default", // alt centos
176176
"/usr/java/latest", // alt centos
177177
};
178-
search_suffixes = {"/jre/lib/amd64/server"};
178+
search_suffixes = {"", "/jre/lib/amd64/server", "/lib/amd64/server"};
179179
file_name = "libjvm.so";
180180
#endif
181181
// From direct environment variable

dev/docker-compose.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
version: '3'
1818
services:
1919
gen_apidocs:
20-
build:
20+
build:
2121
context: gen_apidocs
2222
volumes:
2323
- ../..:/apache-arrow
@@ -29,7 +29,7 @@ services:
2929
volumes:
3030
- ../..:/apache-arrow
3131
dask_integration:
32-
build:
32+
build:
3333
context: dask_integration
3434
volumes:
3535
- ../..:/apache-arrow

dev/gen_apidocs/Dockerfile

+13-7
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,24 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616
#
17-
FROM ubuntu:14.04
18-
# Prerequsites for apt-add-repository
19-
RUN apt-get update && apt-get install -y \
20-
software-properties-common python-software-properties
17+
FROM ubuntu:16.04
18+
2119
# Basic OS dependencies
22-
RUN apt-add-repository -y ppa:ubuntu-toolchain-r/test && \
23-
apt-get update && apt-get install -y \
20+
RUN apt-get update && apt-get install -y \
2421
wget \
2522
rsync \
2623
git \
2724
gcc-4.9 \
2825
g++-4.9 \
29-
build-essential
26+
build-essential \
27+
software-properties-common
28+
29+
# Java build fails with default JDK8
30+
RUN add-apt-repository ppa:openjdk-r/ppa &&\
31+
apt-get update &&\
32+
apt-get install -y openjdk-7-jdk &&\
33+
update-java-alternatives -s java-1.7.0-openjdk-amd64
34+
3035
# This will install conda in /home/ubuntu/miniconda
3136
RUN wget -O /tmp/miniconda.sh \
3237
https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
@@ -73,6 +78,7 @@ RUN /home/ubuntu/miniconda/bin/conda create -y -q -n pyarrow-dev \
7378
doxygen \
7479
maven \
7580
-c conda-forge
81+
7682
ADD . /apache-arrow
7783
WORKDIR /apache-arrow
7884
CMD arrow/dev/gen_apidocs/create_documents.sh

dev/gen_apidocs/create_documents.sh

+48-25
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ export ARROW_HOME=$(pwd)/dist
2727
export PARQUET_HOME=$(pwd)/dist
2828
CONDA_BASE=/home/ubuntu/miniconda
2929
export LD_LIBRARY_PATH=$(pwd)/dist/lib:${CONDA_BASE}/lib:${LD_LIBRARY_PATH}
30+
export PKG_CONFIG_PATH=$(pwd)/dist/lib/pkgconfig:${PKG_CONFIG_PATH}
3031
export PATH=${CONDA_BASE}/bin:${PATH}
3132

3233
# Prepare the asf-site before copying api docs
@@ -38,16 +39,38 @@ git clone --branch=asf-site \
3839
https://git-wip-us.apache.org/repos/asf/arrow-site.git asf-site
3940
popd
4041

42+
# Make Java documentation
43+
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64
44+
wget http://mirrors.gigenet.com/apache/maven/maven-3/3.5.2/binaries/apache-maven-3.5.2-bin.tar.gz
45+
tar xvf apache-maven-3.5.2-bin.tar.gz
46+
export PATH=$(pwd)/apache-maven-3.5.2/bin:$PATH
47+
48+
pushd arrow/java
49+
rm -rf target/site/apidocs/*
50+
mvn -Drat.skip=true install
51+
mvn -Drat.skip=true site
52+
mkdir -p ../site/asf-site/docs/java/
53+
rsync -r target/site/apidocs/ ../site/asf-site/docs/java/
54+
popd
55+
4156
# Make Python documentation (Depends on C++ )
4257
# Build Arrow C++
4358
source activate pyarrow-dev
4459

4560
export ARROW_BUILD_TOOLCHAIN=$CONDA_PREFIX
61+
export BOOST_ROOT=$CONDA_PREFIX
4662
export PARQUET_BUILD_TOOLCHAIN=$CONDA_PREFIX
63+
export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:${LD_LIBRARY_PATH}
64+
export PKG_CONFIG_PATH=$CONDA_PREFIX/lib/pkgconfig:${PKG_CONFIG_PATH}
65+
66+
export CC=gcc-4.9
67+
export CXX=g++-4.9
4768

48-
rm -rf arrow/cpp/build_docs
49-
mkdir arrow/cpp/build_docs
50-
pushd arrow/cpp/build_docs
69+
CPP_BUILD_DIR=$(pwd)/arrow/cpp/build_docs
70+
71+
rm -rf $CPP_BUILD_DIR
72+
mkdir $CPP_BUILD_DIR
73+
pushd $CPP_BUILD_DIR
5174
cmake -DCMAKE_BUILD_TYPE=$ARROW_BUILD_TYPE \
5275
-DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
5376
-DARROW_PYTHON=on \
@@ -58,6 +81,28 @@ make -j4
5881
make install
5982
popd
6083

84+
# Build c_glib documentation
85+
pushd arrow/c_glib
86+
if [ -f Makefile ]; then
87+
# Ensure updating to prevent auto re-configure
88+
touch configure **/Makefile
89+
make distclean
90+
# Work around for 'make distclean' removes doc/reference/xml/
91+
git checkout doc/reference/xml
92+
fi
93+
./autogen.sh
94+
rm -rf build_docs
95+
mkdir build_docs
96+
pushd build_docs
97+
../configure \
98+
--prefix=${AROW_HOME} \
99+
--enable-gtk-doc
100+
make -j4 GTK_DOC_V_XREF=": "
101+
mkdir -p ../../site/asf-site/docs/c_glib
102+
rsync -r doc/reference/html/ ../../site/asf-site/docs/c_glib
103+
popd
104+
popd
105+
61106
# Build Parquet C++
62107
rm -rf parquet-cpp/build_docs
63108
mkdir parquet-cpp/build_docs
@@ -83,32 +128,10 @@ mkdir -p ../site/asf-site/docs/python
83128
rsync -r doc/_build/html/ ../site/asf-site/docs/python
84129
popd
85130

86-
# Build c_glib documentation
87-
pushd arrow/c_glib
88-
rm -rf doc/reference/html/*
89-
./autogen.sh
90-
./configure \
91-
--with-arrow-cpp-build-dir=$(pwd)/../cpp/build \
92-
--with-arrow-cpp-build-type=$ARROW_BUILD_TYPE \
93-
--enable-gtk-doc
94-
LD_LIBRARY_PATH=$(pwd)/../cpp/build/$ARROW_BUILD_TYPE make GTK_DOC_V_XREF=": "
95-
mkdir -p ../site/asf-site/docs/c_glib
96-
rsync -r doc/reference/html/ ../site/asf-site/docs/c_glib
97-
popd
98-
99131
# Make C++ documentation
100132
pushd arrow/cpp/apidoc
101133
rm -rf html/*
102134
doxygen Doxyfile
103135
mkdir -p ../../site/asf-site/docs/cpp
104136
rsync -r html/ ../../site/asf-site/docs/cpp
105137
popd
106-
107-
# Make Java documentation
108-
pushd arrow/java
109-
rm -rf target/site/apidocs/*
110-
mvn -Drat.skip=true install
111-
mvn -Drat.skip=true site
112-
mkdir -p ../site/asf-site/docs/java/
113-
rsync -r target/site/apidocs/ ../site/asf-site/docs/java/
114-
popd

0 commit comments

Comments
 (0)