Skip to content

Commit 676c32c

Browse files
committed
ARROW-317: Add Slice, Copy methods to Buffer
There's also a little bit of naming cleanup in `bit-util.h`, pardon the diff noise. Author: Wes McKinney <wes.mckinney@twosigma.com> Closes #177 from wesm/ARROW-317 and squashes the following commits: 0666b22 [Wes McKinney] Fix up pyarrow usage of BitUtil 3ab4e7a [Wes McKinney] Add Slice, Copy methods to Buffer cb9519d [Wes McKinney] Use more conforming names in bit-util.h
1 parent 732a205 commit 676c32c

18 files changed

+173
-90
lines changed

cpp/src/arrow/array.cc

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include <cstdint>
2121

22+
#include "arrow/util/bit-util.h"
2223
#include "arrow/util/buffer.h"
2324
#include "arrow/util/status.h"
2425

@@ -43,7 +44,7 @@ bool Array::EqualsExact(const Array& other) const {
4344
return false;
4445
}
4546
if (null_count_ > 0) {
46-
return null_bitmap_->Equals(*other.null_bitmap_, util::bytes_for_bits(length_));
47+
return null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_));
4748
}
4849
return true;
4950
}

cpp/src/arrow/array.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class ARROW_EXPORT Array {
4545

4646
// Determine if a slot is null. For inner loops. Does *not* boundscheck
4747
bool IsNull(int i) const {
48-
return null_count_ > 0 && util::bit_not_set(null_bitmap_data_, i);
48+
return null_count_ > 0 && BitUtil::BitNotSet(null_bitmap_data_, i);
4949
}
5050

5151
int32_t length() const { return length_; }

cpp/src/arrow/builder.cc

+6-6
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ Status ArrayBuilder::AppendToBitmap(bool is_valid) {
3131
// TODO(emkornfield) doubling isn't great default allocation practice
3232
// see https://github.com/facebook/folly/blob/master/folly/docs/FBVector.md
3333
// fo discussion
34-
RETURN_NOT_OK(Resize(util::next_power2(capacity_ + 1)));
34+
RETURN_NOT_OK(Resize(BitUtil::NextPower2(capacity_ + 1)));
3535
}
3636
UnsafeAppendToBitmap(is_valid);
3737
return Status::OK();
@@ -45,7 +45,7 @@ Status ArrayBuilder::AppendToBitmap(const uint8_t* valid_bytes, int32_t length)
4545
}
4646

4747
Status ArrayBuilder::Init(int32_t capacity) {
48-
int32_t to_alloc = util::ceil_byte(capacity) / 8;
48+
int32_t to_alloc = BitUtil::CeilByte(capacity) / 8;
4949
null_bitmap_ = std::make_shared<PoolBuffer>(pool_);
5050
RETURN_NOT_OK(null_bitmap_->Resize(to_alloc));
5151
// Buffers might allocate more then necessary to satisfy padding requirements
@@ -58,7 +58,7 @@ Status ArrayBuilder::Init(int32_t capacity) {
5858

5959
Status ArrayBuilder::Resize(int32_t new_bits) {
6060
if (!null_bitmap_) { return Init(new_bits); }
61-
int32_t new_bytes = util::ceil_byte(new_bits) / 8;
61+
int32_t new_bytes = BitUtil::CeilByte(new_bits) / 8;
6262
int32_t old_bytes = null_bitmap_->size();
6363
RETURN_NOT_OK(null_bitmap_->Resize(new_bytes));
6464
null_bitmap_data_ = null_bitmap_->mutable_data();
@@ -82,7 +82,7 @@ Status ArrayBuilder::Advance(int32_t elements) {
8282
Status ArrayBuilder::Reserve(int32_t elements) {
8383
if (length_ + elements > capacity_) {
8484
// TODO(emkornfield) power of 2 growth is potentially suboptimal
85-
int32_t new_capacity = util::next_power2(length_ + elements);
85+
int32_t new_capacity = BitUtil::NextPower2(length_ + elements);
8686
return Resize(new_capacity);
8787
}
8888
return Status::OK();
@@ -96,7 +96,7 @@ Status ArrayBuilder::SetNotNull(int32_t length) {
9696

9797
void ArrayBuilder::UnsafeAppendToBitmap(bool is_valid) {
9898
if (is_valid) {
99-
util::set_bit(null_bitmap_data_, length_);
99+
BitUtil::SetBit(null_bitmap_data_, length_);
100100
} else {
101101
++null_count_;
102102
}
@@ -118,7 +118,7 @@ void ArrayBuilder::UnsafeSetNotNull(int32_t length) {
118118
const int32_t new_length = length + length_;
119119
// TODO(emkornfield) Optimize for large values of length?
120120
for (int32_t i = length_; i < new_length; ++i) {
121-
util::set_bit(null_bitmap_data_, i);
121+
BitUtil::SetBit(null_bitmap_data_, i);
122122
}
123123
length_ = new_length;
124124
}

cpp/src/arrow/column-benchmark.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ std::shared_ptr<Array> MakePrimitive(int32_t length, int32_t null_count = 0) {
2929
auto data = std::make_shared<PoolBuffer>(pool);
3030
auto null_bitmap = std::make_shared<PoolBuffer>(pool);
3131
data->Resize(length * sizeof(typename ArrayType::value_type));
32-
null_bitmap->Resize(util::bytes_for_bits(length));
32+
null_bitmap->Resize(BitUtil::BytesForBits(length));
3333
return std::make_shared<ArrayType>(length, data, 10, null_bitmap);
3434
}
3535
} // anonymous namespace

cpp/src/arrow/ipc/adapter.cc

+3-2
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include "arrow/types/primitive.h"
3838
#include "arrow/types/string.h"
3939
#include "arrow/types/struct.h"
40+
#include "arrow/util/bit-util.h"
4041
#include "arrow/util/buffer.h"
4142
#include "arrow/util/logging.h"
4243
#include "arrow/util/status.h"
@@ -49,7 +50,7 @@ namespace ipc {
4950

5051
namespace {
5152
Status CheckMultipleOf64(int64_t size) {
52-
if (util::is_multiple_of_64(size)) { return Status::OK(); }
53+
if (BitUtil::IsMultipleOf64(size)) { return Status::OK(); }
5354
return Status::Invalid(
5455
"Attempted to write a buffer that "
5556
"wasn't a multiple of 64 bytes");
@@ -155,7 +156,7 @@ class RecordBatchWriter {
155156
// The buffer might be null if we are handling zero row lengths.
156157
if (buffer) {
157158
size = buffer->size();
158-
padding = util::RoundUpToMultipleOf64(size) - size;
159+
padding = BitUtil::RoundUpToMultipleOf64(size) - size;
159160
}
160161

161162
// TODO(wesm): We currently have no notion of shared memory page id's,

cpp/src/arrow/ipc/test-common.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "arrow/types/primitive.h"
3232
#include "arrow/types/string.h"
3333
#include "arrow/types/struct.h"
34+
#include "arrow/util/bit-util.h"
3435
#include "arrow/util/buffer.h"
3536
#include "arrow/util/memory-pool.h"
3637

@@ -263,7 +264,7 @@ Status MakeStruct(std::shared_ptr<RecordBatch>* out) {
263264
std::vector<uint8_t> null_bytes(list_batch->num_rows(), 1);
264265
null_bytes[0] = 0;
265266
std::shared_ptr<Buffer> null_bitmask;
266-
RETURN_NOT_OK(util::bytes_to_bits(null_bytes, &null_bitmask));
267+
RETURN_NOT_OK(BitUtil::BytesToBits(null_bytes, &null_bitmask));
267268
ArrayPtr with_nulls(
268269
new StructArray(type, list_batch->num_rows(), columns, 1, null_bitmask));
269270

cpp/src/arrow/test-util.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ class TestBase : public ::testing::Test {
6969
auto data = std::make_shared<PoolBuffer>(pool_);
7070
auto null_bitmap = std::make_shared<PoolBuffer>(pool_);
7171
EXPECT_OK(data->Resize(length * sizeof(typename ArrayType::value_type)));
72-
EXPECT_OK(null_bitmap->Resize(util::bytes_for_bits(length)));
72+
EXPECT_OK(null_bitmap->Resize(BitUtil::BytesForBits(length)));
7373
return std::make_shared<ArrayType>(length, data, 10, null_bitmap);
7474
}
7575

@@ -152,7 +152,7 @@ static inline int bitmap_popcount(const uint8_t* data, int length) {
152152
// versions of popcount but the code complexity is likely not worth it)
153153
const int loop_tail_index = fast_counts * pop_len;
154154
for (int i = loop_tail_index; i < length; ++i) {
155-
if (util::get_bit(data, i)) { ++count; }
155+
if (BitUtil::GetBit(data, i)) { ++count; }
156156
}
157157

158158
return count;
@@ -170,7 +170,7 @@ std::shared_ptr<Buffer> bytes_to_null_buffer(const std::vector<uint8_t>& bytes)
170170
std::shared_ptr<Buffer> out;
171171

172172
// TODO(wesm): error checking
173-
util::bytes_to_bits(bytes, &out);
173+
BitUtil::BytesToBits(bytes, &out);
174174
return out;
175175
}
176176

cpp/src/arrow/types/list.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ bool ListArray::EqualsExact(const ListArray& other) const {
3030
bool equal_null_bitmap = true;
3131
if (null_count_ > 0) {
3232
equal_null_bitmap =
33-
null_bitmap_->Equals(*other.null_bitmap_, util::bytes_for_bits(length_));
33+
null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_));
3434
}
3535

3636
if (!equal_null_bitmap) { return false; }

cpp/src/arrow/types/primitive-test.cc

+8-8
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ void TestPrimitiveBuilder<PBoolean>::Check(
236236

237237
for (int i = 0; i < result->length(); ++i) {
238238
if (nullable) { ASSERT_EQ(valid_bytes_[i] == 0, result->IsNull(i)) << i; }
239-
bool actual = util::get_bit(result->raw_data(), i);
239+
bool actual = BitUtil::GetBit(result->raw_data(), i);
240240
ASSERT_EQ(static_cast<bool>(draws_[i]), actual) << i;
241241
}
242242
ASSERT_TRUE(result->EqualsExact(*expected.get()));
@@ -258,8 +258,8 @@ TYPED_TEST(TestPrimitiveBuilder, TestInit) {
258258

259259
int n = 1000;
260260
ASSERT_OK(this->builder_->Reserve(n));
261-
ASSERT_EQ(util::next_power2(n), this->builder_->capacity());
262-
ASSERT_EQ(util::next_power2(TypeTraits<Type>::bytes_required(n)),
261+
ASSERT_EQ(BitUtil::NextPower2(n), this->builder_->capacity());
262+
ASSERT_EQ(BitUtil::NextPower2(TypeTraits<Type>::bytes_required(n)),
263263
this->builder_->data()->size());
264264

265265
// unsure if this should go in all builder classes
@@ -409,10 +409,10 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) {
409409
}
410410

411411
ASSERT_EQ(size, this->builder_->length());
412-
ASSERT_EQ(util::next_power2(size), this->builder_->capacity());
412+
ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
413413

414414
ASSERT_EQ(size, this->builder_nn_->length());
415-
ASSERT_EQ(util::next_power2(size), this->builder_nn_->capacity());
415+
ASSERT_EQ(BitUtil::NextPower2(size), this->builder_nn_->capacity());
416416

417417
this->Check(this->builder_, true);
418418
this->Check(this->builder_nn_, false);
@@ -444,7 +444,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendVector) {
444444
ASSERT_OK(this->builder_nn_->Append(draws.data() + K, size - K));
445445

446446
ASSERT_EQ(size, this->builder_->length());
447-
ASSERT_EQ(util::next_power2(size), this->builder_->capacity());
447+
ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity());
448448

449449
this->Check(this->builder_, true);
450450
this->Check(this->builder_nn_, false);
@@ -472,7 +472,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestResize) {
472472
ASSERT_EQ(cap, this->builder_->capacity());
473473

474474
ASSERT_EQ(TypeTraits<Type>::bytes_required(cap), this->builder_->data()->size());
475-
ASSERT_EQ(util::bytes_for_bits(cap), this->builder_->null_bitmap()->size());
475+
ASSERT_EQ(BitUtil::BytesForBits(cap), this->builder_->null_bitmap()->size());
476476
}
477477

478478
TYPED_TEST(TestPrimitiveBuilder, TestReserve) {
@@ -484,7 +484,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestReserve) {
484484
ASSERT_OK(this->builder_->Advance(100));
485485
ASSERT_OK(this->builder_->Reserve(kMinBuilderCapacity));
486486

487-
ASSERT_EQ(util::next_power2(kMinBuilderCapacity + 100), this->builder_->capacity());
487+
ASSERT_EQ(BitUtil::NextPower2(kMinBuilderCapacity + 100), this->builder_->capacity());
488488
}
489489

490490
} // namespace arrow

cpp/src/arrow/types/primitive.cc

+7-6
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include <memory>
2121

22+
#include "arrow/util/bit-util.h"
2223
#include "arrow/util/buffer.h"
2324
#include "arrow/util/logging.h"
2425

@@ -41,7 +42,7 @@ bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const {
4142

4243
if (null_count_ > 0) {
4344
bool equal_bitmap =
44-
null_bitmap_->Equals(*other.null_bitmap_, util::ceil_byte(length_) / 8);
45+
null_bitmap_->Equals(*other.null_bitmap_, BitUtil::CeilByte(length_) / 8);
4546
if (!equal_bitmap) { return false; }
4647

4748
const uint8_t* this_data = raw_data_;
@@ -156,9 +157,9 @@ Status PrimitiveBuilder<BooleanType>::Append(
156157
if ((valid_bytes != nullptr) && !valid_bytes[i]) continue;
157158

158159
if (values[i] > 0) {
159-
util::set_bit(raw_data_, length_ + i);
160+
BitUtil::SetBit(raw_data_, length_ + i);
160161
} else {
161-
util::clear_bit(raw_data_, length_ + i);
162+
BitUtil::ClearBit(raw_data_, length_ + i);
162163
}
163164
}
164165

@@ -196,20 +197,20 @@ bool BooleanArray::EqualsExact(const BooleanArray& other) const {
196197

197198
if (null_count_ > 0) {
198199
bool equal_bitmap =
199-
null_bitmap_->Equals(*other.null_bitmap_, util::bytes_for_bits(length_));
200+
null_bitmap_->Equals(*other.null_bitmap_, BitUtil::BytesForBits(length_));
200201
if (!equal_bitmap) { return false; }
201202

202203
const uint8_t* this_data = raw_data_;
203204
const uint8_t* other_data = other.raw_data_;
204205

205206
for (int i = 0; i < length_; ++i) {
206-
if (!IsNull(i) && util::get_bit(this_data, i) != util::get_bit(other_data, i)) {
207+
if (!IsNull(i) && BitUtil::GetBit(this_data, i) != BitUtil::GetBit(other_data, i)) {
207208
return false;
208209
}
209210
}
210211
return true;
211212
} else {
212-
return data_->Equals(*other.data_, util::bytes_for_bits(length_));
213+
return data_->Equals(*other.data_, BitUtil::BytesForBits(length_));
213214
}
214215
}
215216

cpp/src/arrow/types/primitive.h

+6-6
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ class ARROW_EXPORT NumericBuilder : public PrimitiveBuilder<T> {
173173

174174
// Does not capacity-check; make sure to call Reserve beforehand
175175
void UnsafeAppend(value_type val) {
176-
util::set_bit(null_bitmap_data_, length_);
176+
BitUtil::SetBit(null_bitmap_data_, length_);
177177
raw_data_[length_++] = val;
178178
}
179179

@@ -290,15 +290,15 @@ class ARROW_EXPORT BooleanArray : public PrimitiveArray {
290290

291291
const uint8_t* raw_data() const { return reinterpret_cast<const uint8_t*>(raw_data_); }
292292

293-
bool Value(int i) const { return util::get_bit(raw_data(), i); }
293+
bool Value(int i) const { return BitUtil::GetBit(raw_data(), i); }
294294
};
295295

296296
template <>
297297
struct TypeTraits<BooleanType> {
298298
typedef BooleanArray ArrayType;
299299

300300
static inline int bytes_required(int elements) {
301-
return util::bytes_for_bits(elements);
301+
return BitUtil::BytesForBits(elements);
302302
}
303303
};
304304

@@ -314,11 +314,11 @@ class ARROW_EXPORT BooleanBuilder : public PrimitiveBuilder<BooleanType> {
314314
// Scalar append
315315
Status Append(bool val) {
316316
Reserve(1);
317-
util::set_bit(null_bitmap_data_, length_);
317+
BitUtil::SetBit(null_bitmap_data_, length_);
318318
if (val) {
319-
util::set_bit(raw_data_, length_);
319+
BitUtil::SetBit(raw_data_, length_);
320320
} else {
321-
util::clear_bit(raw_data_, length_);
321+
BitUtil::ClearBit(raw_data_, length_);
322322
}
323323
++length_;
324324
return Status::OK();

cpp/src/arrow/util/bit-util-test.cc

+18-18
Original file line numberDiff line numberDiff line change
@@ -22,33 +22,33 @@
2222
namespace arrow {
2323

2424
TEST(UtilTests, TestIsMultipleOf64) {
25-
using util::is_multiple_of_64;
26-
EXPECT_TRUE(is_multiple_of_64(64));
27-
EXPECT_TRUE(is_multiple_of_64(0));
28-
EXPECT_TRUE(is_multiple_of_64(128));
29-
EXPECT_TRUE(is_multiple_of_64(192));
30-
EXPECT_FALSE(is_multiple_of_64(23));
31-
EXPECT_FALSE(is_multiple_of_64(32));
25+
using BitUtil::IsMultipleOf64;
26+
EXPECT_TRUE(IsMultipleOf64(64));
27+
EXPECT_TRUE(IsMultipleOf64(0));
28+
EXPECT_TRUE(IsMultipleOf64(128));
29+
EXPECT_TRUE(IsMultipleOf64(192));
30+
EXPECT_FALSE(IsMultipleOf64(23));
31+
EXPECT_FALSE(IsMultipleOf64(32));
3232
}
3333

3434
TEST(UtilTests, TestNextPower2) {
35-
using util::next_power2;
35+
using BitUtil::NextPower2;
3636

37-
ASSERT_EQ(8, next_power2(6));
38-
ASSERT_EQ(8, next_power2(8));
37+
ASSERT_EQ(8, NextPower2(6));
38+
ASSERT_EQ(8, NextPower2(8));
3939

40-
ASSERT_EQ(1, next_power2(1));
41-
ASSERT_EQ(256, next_power2(131));
40+
ASSERT_EQ(1, NextPower2(1));
41+
ASSERT_EQ(256, NextPower2(131));
4242

43-
ASSERT_EQ(1024, next_power2(1000));
43+
ASSERT_EQ(1024, NextPower2(1000));
4444

45-
ASSERT_EQ(4096, next_power2(4000));
45+
ASSERT_EQ(4096, NextPower2(4000));
4646

47-
ASSERT_EQ(65536, next_power2(64000));
47+
ASSERT_EQ(65536, NextPower2(64000));
4848

49-
ASSERT_EQ(1LL << 32, next_power2((1LL << 32) - 1));
50-
ASSERT_EQ(1LL << 31, next_power2((1LL << 31) - 1));
51-
ASSERT_EQ(1LL << 62, next_power2((1LL << 62) - 1));
49+
ASSERT_EQ(1LL << 32, NextPower2((1LL << 32) - 1));
50+
ASSERT_EQ(1LL << 31, NextPower2((1LL << 31) - 1));
51+
ASSERT_EQ(1LL << 62, NextPower2((1LL << 62) - 1));
5252
}
5353

5454
} // namespace arrow

0 commit comments

Comments
 (0)