Skip to content

Commit 5bf6ae4

Browse files
committed
ARROW-456: Add jemalloc based MemoryPool
Runtimes of the `builder-benchmark`: ``` BM_BuildPrimitiveArrayNoNulls/repeats:3 901 ms 889 ms 1 576.196MB/s BM_BuildPrimitiveArrayNoNulls/repeats:3 833 ms 829 ms 1 617.6MB/s BM_BuildPrimitiveArrayNoNulls/repeats:3 825 ms 821 ms 1 623.855MB/s BM_BuildPrimitiveArrayNoNulls/repeats:3_mean 853 ms 846 ms 1 605.884MB/s BM_BuildPrimitiveArrayNoNulls/repeats:3_stddev 34 ms 30 ms 0 21.147MB/s BM_BuildVectorNoNulls/repeats:3 712 ms 701 ms 1 729.866MB/s BM_BuildVectorNoNulls/repeats:3 671 ms 670 ms 1 764.464MB/s BM_BuildVectorNoNulls/repeats:3 688 ms 681 ms 1 751.285MB/s BM_BuildVectorNoNulls/repeats:3_mean 690 ms 684 ms 1 748.538MB/s BM_BuildVectorNoNulls/repeats:3_stddev 17 ms 13 ms 0 14.2578MB/s ``` With an aligned `Reallocate`, the jemalloc version is 50% faster and even outperforms `std::vector`: ``` BM_BuildPrimitiveArrayNoNulls/repeats:3 565 ms 559 ms 1 916.516MB/s BM_BuildPrimitiveArrayNoNulls/repeats:3 540 ms 537 ms 1 952.727MB/s BM_BuildPrimitiveArrayNoNulls/repeats:3 544 ms 543 ms 1 942.948MB/s BM_BuildPrimitiveArrayNoNulls/repeats:3_mean 550 ms 546 ms 1 937.397MB/s BM_BuildPrimitiveArrayNoNulls/repeats:3_stddev 11 ms 9 ms 0 15.2949MB/s ``` Author: Uwe L. Korn <uwelk@xhochy.com> Closes #270 from xhochy/ARROW-456 and squashes the following commits: d3ce3bf [Uwe L. Korn] Zero arrays for now 831399d [Uwe L. Korn] cpplint #2 e6e251b [Uwe L. Korn] cpplint 52b3c76 [Uwe L. Korn] Add Reallocate implementation to PyArrowMemoryPool 113e650 [Uwe L. Korn] Add missing file d331cd9 [Uwe L. Korn] Add tests for Reallocate c2be086 [Uwe L. Korn] Add JEMALLOC_HOME to the Readme bd47f51 [Uwe L. Korn] Add missing return value 5142ac3 [Uwe L. Korn] Don't use deprecated GBenchmark interfaces b6bff98 [Uwe L. Korn] Add missing (win) include 6f08e19 [Uwe L. Korn] Don't build jemalloc on AppVeyor 834c3b2 [Uwe L. Korn] Add jemalloc to Travis builds 10c6839 [Uwe L. Korn] Implement Reallocate function a17b313 [Uwe L. Korn] ARROW-456: C++: Add jemalloc based MemoryPool
1 parent 320f587 commit 5bf6ae4

25 files changed

+704
-29
lines changed

.travis.yml

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ addons:
1515
- libboost-dev
1616
- libboost-filesystem-dev
1717
- libboost-system-dev
18+
- libjemalloc-dev
1819

1920
matrix:
2021
fast_finish: true

appveyor.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ build_script:
3232
- cd build
3333
# A lot of features are still deactivated as they do not build on Windows
3434
# * gbenchmark doesn't build with MSVC
35-
- cmake -G "%GENERATOR%" -DARROW_BOOST_USE_SHARED=OFF -DARROW_IPC=OFF -DARROW_HDFS=OFF -DARROW_BUILD_BENCHMARKS=OFF ..
35+
- cmake -G "%GENERATOR%" -DARROW_BOOST_USE_SHARED=OFF -DARROW_IPC=OFF -DARROW_HDFS=OFF -DARROW_BUILD_BENCHMARKS=OFF -DARROW_JEMALLOC=OFF ..
3636
- cmake --build . --config Debug
3737

3838
# test_script:

ci/travis_before_script_cpp.sh

+5
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@ set -ex
1717

1818
: ${CPP_BUILD_DIR=$TRAVIS_BUILD_DIR/cpp-build}
1919

20+
if [ $TRAVIS_OS_NAME == "osx" ]; then
21+
brew update > /dev/null
22+
brew install jemalloc
23+
fi
24+
2025
mkdir $CPP_BUILD_DIR
2126
pushd $CPP_BUILD_DIR
2227

cpp/CMakeLists.txt

+29-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ set(THIRDPARTY_DIR "${CMAKE_SOURCE_DIR}/thirdparty")
2828

2929
set(GFLAGS_VERSION "2.1.2")
3030
set(GTEST_VERSION "1.7.0")
31-
set(GBENCHMARK_VERSION "1.0.0")
31+
set(GBENCHMARK_VERSION "1.1.0")
3232
set(FLATBUFFERS_VERSION "1.3.0")
3333

3434
find_package(ClangTools)
@@ -74,6 +74,10 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
7474
"Build the Arrow IPC extensions"
7575
ON)
7676

77+
option(ARROW_JEMALLOC
78+
"Build the Arrow jemalloc-based allocator"
79+
ON)
80+
7781
option(ARROW_BOOST_USE_SHARED
7882
"Rely on boost shared libraries where relevant"
7983
ON)
@@ -238,6 +242,16 @@ function(ADD_ARROW_BENCHMARK_DEPENDENCIES REL_BENCHMARK_NAME)
238242
add_dependencies(${BENCHMARK_NAME} ${ARGN})
239243
endfunction()
240244

245+
# A wrapper for target_link_libraries() that is compatible with NO_BENCHMARKS.
246+
function(ARROW_BENCHMARK_LINK_LIBRARIES REL_BENCHMARK_NAME)
247+
if(NO_TESTS)
248+
return()
249+
endif()
250+
get_filename_component(BENCHMARK_NAME ${REL_BENCHMARK_NAME} NAME_WE)
251+
252+
target_link_libraries(${BENCHMARK_NAME} ${ARGN})
253+
endfunction()
254+
241255

242256
############################################################
243257
# Testing
@@ -526,7 +540,11 @@ if(ARROW_BUILD_BENCHMARKS)
526540
set(GBENCHMARK_CMAKE_ARGS
527541
"-DCMAKE_BUILD_TYPE=Release"
528542
"-DCMAKE_INSTALL_PREFIX:PATH=${GBENCHMARK_PREFIX}"
543+
"-DBENCHMARK_ENABLE_TESTING=OFF"
529544
"-DCMAKE_CXX_FLAGS=-fPIC ${GBENCHMARK_CMAKE_CXX_FLAGS}")
545+
if (APPLE)
546+
set(GBENCHMARK_CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS} "-DBENCHMARK_USE_LIBCXX=ON")
547+
endif()
530548
if (CMAKE_VERSION VERSION_GREATER "3.2")
531549
# BUILD_BYPRODUCTS is a 3.2+ feature
532550
ExternalProject_Add(gbenchmark_ep
@@ -575,6 +593,12 @@ endif()
575593
message(STATUS "RapidJSON include dir: ${RAPIDJSON_INCLUDE_DIR}")
576594
include_directories(SYSTEM ${RAPIDJSON_INCLUDE_DIR})
577595

596+
if (ARROW_JEMALLOC)
597+
find_package(jemalloc REQUIRED)
598+
ADD_THIRDPARTY_LIB(jemalloc
599+
SHARED_LIB ${JEMALLOC_SHARED_LIB})
600+
endif()
601+
578602
## Google PerfTools
579603
##
580604
## Disabled with TSAN/ASAN as well as with gold+dynamic linking (see comment
@@ -737,6 +761,10 @@ add_subdirectory(src/arrow)
737761
add_subdirectory(src/arrow/io)
738762
add_subdirectory(src/arrow/util)
739763

764+
if(ARROW_JEMALLOC)
765+
add_subdirectory(src/arrow/jemalloc)
766+
endif()
767+
740768
#----------------------------------------------------------------------
741769
# IPC library
742770

cpp/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ variables
6060
* Google Benchmark: `GBENCHMARK_HOME` (only required if building benchmarks)
6161
* Flatbuffers: `FLATBUFFERS_HOME` (only required for the IPC extensions)
6262
* Hadoop: `HADOOP_HOME` (only required for the HDFS I/O extensions)
63+
* jemalloc: `JEMALLOC_HOME` (only required for the jemalloc-based memory pool)
6364

6465
## Continuous Integration
6566

cpp/cmake_modules/Findjemalloc.cmake

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
#
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
#
14+
# Tries to find jemalloc headers and libraries.
15+
#
16+
# Usage of this module as follows:
17+
#
18+
# find_package(jemalloc)
19+
#
20+
# Variables used by this module, they can change the default behaviour and need
21+
# to be set before calling find_package:
22+
#
23+
# JEMALLOC_HOME -
24+
# When set, this path is inspected instead of standard library locations as
25+
# the root of the jemalloc installation. The environment variable
26+
# JEMALLOC_HOME overrides this veriable.
27+
#
28+
# This module defines
29+
# JEMALLOC_INCLUDE_DIR, directory containing headers
30+
# JEMALLOC_SHARED_LIB, path to libjemalloc.so/dylib
31+
# JEMALLOC_FOUND, whether flatbuffers has been found
32+
33+
if( NOT "$ENV{JEMALLOC_HOME}" STREQUAL "")
34+
file( TO_CMAKE_PATH "$ENV{JEMALLOC_HOME}" _native_path )
35+
list( APPEND _jemalloc_roots ${_native_path} )
36+
elseif ( JEMALLOC_HOME )
37+
list( APPEND _jemalloc_roots ${JEMALLOC_HOME} )
38+
endif()
39+
40+
set(LIBJEMALLOC_NAMES jemalloc libjemalloc.so.1 libjemalloc.so.2 libjemalloc.dylib)
41+
42+
# Try the parameterized roots, if they exist
43+
if ( _jemalloc_roots )
44+
find_path( JEMALLOC_INCLUDE_DIR NAMES jemalloc/jemalloc.h
45+
PATHS ${_jemalloc_roots} NO_DEFAULT_PATH
46+
PATH_SUFFIXES "include" )
47+
find_library( JEMALLOC_SHARED_LIB NAMES ${LIBJEMALLOC_NAMES}
48+
PATHS ${_jemalloc_roots} NO_DEFAULT_PATH
49+
PATH_SUFFIXES "lib" )
50+
else ()
51+
find_path( JEMALLOC_INCLUDE_DIR NAMES jemalloc/jemalloc.h )
52+
message(STATUS ${JEMALLOC_INCLUDE_DIR})
53+
find_library( JEMALLOC_SHARED_LIB NAMES ${LIBJEMALLOC_NAMES})
54+
message(STATUS ${JEMALLOC_SHARED_LIB})
55+
endif ()
56+
57+
if (JEMALLOC_INCLUDE_DIR AND JEMALLOC_SHARED_LIB)
58+
set(JEMALLOC_FOUND TRUE)
59+
else ()
60+
set(JEMALLOC_FOUND FALSE)
61+
endif ()
62+
63+
if (JEMALLOC_FOUND)
64+
if (NOT jemalloc_FIND_QUIETLY)
65+
message(STATUS "Found the jemalloc library: ${JEMALLOC_LIBRARIES}")
66+
endif ()
67+
else ()
68+
if (NOT jemalloc_FIND_QUIETLY)
69+
set(JEMALLOC_ERR_MSG "Could not find the jemalloc library. Looked in ")
70+
if ( _flatbuffers_roots )
71+
set(JEMALLOC_ERR_MSG "${JEMALLOC_ERR_MSG} in ${_jemalloc_roots}.")
72+
else ()
73+
set(JEMALLOC_ERR_MSG "${JEMALLOC_ERR_MSG} system search paths.")
74+
endif ()
75+
if (jemalloc_FIND_REQUIRED)
76+
message(FATAL_ERROR "${JEMALLOC_ERR_MSG}")
77+
else (jemalloc_FIND_REQUIRED)
78+
message(STATUS "${JEMALLOC_ERR_MSG}")
79+
endif (jemalloc_FIND_REQUIRED)
80+
endif ()
81+
endif ()
82+
83+
mark_as_advanced(
84+
JEMALLOC_INCLUDE_DIR
85+
JEMALLOC_SHARED_LIB
86+
)

cpp/src/arrow/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -59,4 +59,5 @@ ADD_ARROW_TEST(schema-test)
5959
ADD_ARROW_TEST(status-test)
6060
ADD_ARROW_TEST(table-test)
6161

62+
ADD_ARROW_BENCHMARK(builder-benchmark)
6263
ADD_ARROW_BENCHMARK(column-benchmark)

cpp/src/arrow/buffer.cc

+2-4
Original file line numberDiff line numberDiff line change
@@ -80,13 +80,11 @@ Status PoolBuffer::Reserve(int64_t new_capacity) {
8080
uint8_t* new_data;
8181
new_capacity = BitUtil::RoundUpToMultipleOf64(new_capacity);
8282
if (mutable_data_) {
83-
RETURN_NOT_OK(pool_->Allocate(new_capacity, &new_data));
84-
memcpy(new_data, mutable_data_, size_);
85-
pool_->Free(mutable_data_, capacity_);
83+
RETURN_NOT_OK(pool_->Reallocate(capacity_, new_capacity, &mutable_data_));
8684
} else {
8785
RETURN_NOT_OK(pool_->Allocate(new_capacity, &new_data));
86+
mutable_data_ = new_data;
8887
}
89-
mutable_data_ = new_data;
9088
data_ = mutable_data_;
9189
capacity_ = new_capacity;
9290
}

cpp/src/arrow/builder-benchmark.cc

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#include "benchmark/benchmark.h"
19+
20+
#include "arrow/builder.h"
21+
#include "arrow/memory_pool.h"
22+
#include "arrow/test-util.h"
23+
24+
namespace arrow {
25+
26+
constexpr int64_t kFinalSize = 256;
27+
28+
static void BM_BuildPrimitiveArrayNoNulls(
29+
benchmark::State& state) { // NOLINT non-const reference
30+
// 2 MiB block
31+
std::vector<int64_t> data(256 * 1024, 100);
32+
while (state.KeepRunning()) {
33+
Int64Builder builder(default_memory_pool(), arrow::int64());
34+
for (int i = 0; i < kFinalSize; i++) {
35+
// Build up an array of 512 MiB in size
36+
builder.Append(data.data(), data.size(), nullptr);
37+
}
38+
std::shared_ptr<Array> out;
39+
builder.Finish(&out);
40+
}
41+
state.SetBytesProcessed(
42+
state.iterations() * data.size() * sizeof(int64_t) * kFinalSize);
43+
}
44+
45+
BENCHMARK(BM_BuildPrimitiveArrayNoNulls)->Repetitions(3)->Unit(benchmark::kMillisecond);
46+
47+
static void BM_BuildVectorNoNulls(
48+
benchmark::State& state) { // NOLINT non-const reference
49+
// 2 MiB block
50+
std::vector<int64_t> data(256 * 1024, 100);
51+
while (state.KeepRunning()) {
52+
std::vector<int64_t> builder;
53+
for (int i = 0; i < kFinalSize; i++) {
54+
// Build up an array of 512 MiB in size
55+
builder.insert(builder.end(), data.cbegin(), data.cend());
56+
}
57+
}
58+
state.SetBytesProcessed(
59+
state.iterations() * data.size() * sizeof(int64_t) * kFinalSize);
60+
}
61+
62+
BENCHMARK(BM_BuildVectorNoNulls)->Repetitions(3)->Unit(benchmark::kMillisecond);
63+
64+
} // namespace arrow

cpp/src/arrow/builder.cc

+1
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ Status PrimitiveBuilder<T>::Resize(int32_t capacity) {
156156
const int64_t new_bytes = TypeTraits<T>::bytes_required(capacity);
157157
RETURN_NOT_OK(data_->Resize(new_bytes));
158158
raw_data_ = reinterpret_cast<value_type*>(data_->mutable_data());
159+
// TODO(emkornfield) valgrind complains without this
159160
memset(data_->mutable_data() + old_bytes, 0, new_bytes - old_bytes);
160161
}
161162
return Status::OK();

cpp/src/arrow/column-benchmark.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
3737
static void BM_BuildInt32ColumnByChunk(
3838
benchmark::State& state) { // NOLINT non-const reference
3939
ArrayVector arrays;
40-
for (int chunk_n = 0; chunk_n < state.range_x(); ++chunk_n) {
40+
for (int chunk_n = 0; chunk_n < state.range(0); ++chunk_n) {
4141
arrays.push_back(MakePrimitive<Int32Array>(100, 10));
4242
}
4343
const auto INT32 = std::make_shared<Int32Type>();

cpp/src/arrow/io/interfaces.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ Status ReadableFileInterface::ReadAt(
4545
}
4646

4747
Status Writeable::Write(const std::string& data) {
48-
return Write(reinterpret_cast<const uint8_t*>(data.c_str()),
49-
static_cast<int64_t>(data.size()));
48+
return Write(
49+
reinterpret_cast<const uint8_t*>(data.c_str()), static_cast<int64_t>(data.size()));
5050
}
5151

5252
} // namespace io

cpp/src/arrow/io/io-file-test.cc

+13
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,19 @@ class MyMemoryPool : public MemoryPool {
292292

293293
void Free(uint8_t* buffer, int64_t size) override { std::free(buffer); }
294294

295+
Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override {
296+
*ptr = reinterpret_cast<uint8_t*>(std::realloc(*ptr, new_size));
297+
298+
if (*ptr == NULL) {
299+
std::stringstream ss;
300+
ss << "realloc of size " << new_size << " failed";
301+
return Status::OutOfMemory(ss.str());
302+
}
303+
304+
305+
return Status::OK();
306+
}
307+
295308
int64_t bytes_allocated() const override { return -1; }
296309

297310
int64_t num_allocations() const { return num_allocations_; }

0 commit comments

Comments
 (0)