Skip to content

Commit bc5db7d

Browse files
committed
ARROW-1712: [C++] Remove unneeded data member in BinaryBuilder and modify test case
1 parent 5a5b70e commit bc5db7d

File tree

3 files changed

+28
-51
lines changed

3 files changed

+28
-51
lines changed

cpp/src/arrow/array-test.cc

+18-37
Original file line numberDiff line numberDiff line change
@@ -1156,51 +1156,32 @@ TEST_F(TestBinaryBuilder, TestScalarAppend) {
11561156
}
11571157

11581158
TEST_F(TestBinaryBuilder, TestCapacityReserve) {
1159-
vector<string> strings = {"a", "bb", "cc", "ddddd", "eeeee"};
1160-
int64_t N = static_cast<int>(strings.size());
1159+
vector<string> strings = {"aaaaa", "bbbbbbbbbb", "ccccccccccccccc", "dddddddddddddddddddd", "eeeeeeeeee"};
1160+
int N = static_cast<int>(strings.size());
1161+
int reps = 10;
11611162
int64_t length = 0;
1162-
int64_t data_length = 0;
1163-
int64_t capacity = N;
1164-
1165-
ASSERT_OK(builder_->Reserve(capacity));
1163+
int64_t capacity = 1000;
1164+
1165+
11661166
ASSERT_OK(builder_->ReserveData(capacity));
11671167

1168-
ASSERT_EQ(builder_->length(), length);
1169-
ASSERT_EQ(builder_->capacity(), BitUtil::NextPower2(capacity));
1170-
ASSERT_EQ(builder_->value_data_length(), data_length);
1171-
ASSERT_EQ(builder_->value_data_capacity(), capacity);
1168+
ASSERT_EQ(length, builder_->value_data_length());
1169+
ASSERT_EQ(BitUtil::RoundUpToMultipleOf64(capacity), builder_->value_data_capacity());
11721170

1173-
for(const string& str : strings) {
1174-
ASSERT_OK(builder_->Append(str));
1175-
length++;
1176-
data_length += static_cast<int>(str.size());
1171+
for (int j = 0; j < reps; ++j) {
1172+
for (int i = 0; i < N; ++i) {
1173+
ASSERT_OK(builder_->Append(strings[i]));
1174+
length += static_cast<int>(strings[i].size());
11771175

1178-
ASSERT_EQ(builder_->length(), length);
1179-
ASSERT_EQ(builder_->capacity(), BitUtil::NextPower2(capacity));
1180-
ASSERT_EQ(builder_->value_data_length(), data_length);
1181-
if (data_length <= capacity) {
1182-
ASSERT_EQ(builder_->value_data_capacity(), capacity);
1183-
} else {
1184-
ASSERT_EQ(builder_->value_data_capacity(), data_length);
1176+
ASSERT_EQ(length, builder_->value_data_length());
1177+
ASSERT_EQ(BitUtil::RoundUpToMultipleOf64(capacity), builder_->value_data_capacity());
11851178
}
11861179
}
1187-
1188-
int extra_capacity = 20;
1189-
1190-
ASSERT_OK(builder_->Reserve(extra_capacity));
1191-
ASSERT_OK(builder_->ReserveData(extra_capacity));
1192-
1193-
ASSERT_EQ(builder_->length(), length);
1194-
ASSERT_EQ(builder_->capacity(), BitUtil::NextPower2(length + extra_capacity));
1195-
ASSERT_EQ(builder_->value_data_length(), data_length);
1196-
ASSERT_EQ(builder_->value_data_capacity(), data_length + extra_capacity);
1197-
11981180
Done();
1199-
1200-
ASSERT_EQ(result_->length(), N);
1201-
ASSERT_EQ(result_->null_count(), 0);
1202-
ASSERT_EQ(result_->value_data()->size(), data_length);
1203-
ASSERT_EQ(result_->value_data()->capacity(), BitUtil::RoundUpToMultipleOf64(data_length + extra_capacity));
1181+
ASSERT_EQ(reps * N, result_->length());
1182+
ASSERT_EQ(0, result_->null_count());
1183+
ASSERT_EQ(reps * 60, result_->value_data()->size());
1184+
ASSERT_EQ(BitUtil::RoundUpToMultipleOf64(capacity), result_->value_data()->capacity());
12041185
}
12051186

12061187
TEST_F(TestBinaryBuilder, TestZeroLength) {

cpp/src/arrow/builder.cc

+6-11
Original file line numberDiff line numberDiff line change
@@ -1208,7 +1208,7 @@ ArrayBuilder* ListBuilder::value_builder() const {
12081208
// String and binary
12091209

12101210
BinaryBuilder::BinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
1211-
: ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool), data_capacity_(0) {}
1211+
: ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool) {}
12121212

12131213
BinaryBuilder::BinaryBuilder(MemoryPool* pool) : BinaryBuilder(binary(), pool) {}
12141214

@@ -1226,14 +1226,12 @@ Status BinaryBuilder::Resize(int64_t capacity) {
12261226
return ArrayBuilder::Resize(capacity);
12271227
}
12281228

1229-
Status BinaryBuilder::ReserveData(int64_t capacity) {
1230-
if (value_data_length() + capacity > data_capacity_) {
1231-
if (value_data_length() + capacity > std::numeric_limits<int32_t>::max()) {
1229+
Status BinaryBuilder::ReserveData(int64_t elements) {
1230+
if (value_data_length() + elements > value_data_capacity()) {
1231+
if (value_data_length() + elements > std::numeric_limits<int32_t>::max()) {
12321232
return Status::Invalid("Cannot reserve capacity larger than 2^31 - 1 in length for binary data");
1233-
}
1234-
1235-
RETURN_NOT_OK(value_data_builder_.Resize(value_data_length() + capacity));
1236-
data_capacity_ = value_data_length() + capacity;
1233+
}
1234+
RETURN_NOT_OK(value_data_builder_.Reserve(elements));
12371235
}
12381236
return Status::OK();
12391237
}
@@ -1253,9 +1251,6 @@ Status BinaryBuilder::Append(const uint8_t* value, int32_t length) {
12531251
RETURN_NOT_OK(Reserve(1));
12541252
RETURN_NOT_OK(AppendNextOffset());
12551253
RETURN_NOT_OK(value_data_builder_.Append(value, length));
1256-
if (data_capacity_ < value_data_length()) {
1257-
data_capacity_ = value_data_length();
1258-
}
12591254
UnsafeAppendToBitmap(true);
12601255
return Status::OK();
12611256
}

cpp/src/arrow/builder.h

+4-3
Original file line numberDiff line numberDiff line change
@@ -682,13 +682,15 @@ class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {
682682

683683
Status Init(int64_t elements) override;
684684
Status Resize(int64_t capacity) override;
685-
Status ReserveData(int64_t capacity);
685+
/// Ensures there is enough space for adding the number of value elements
686+
/// by checking value buffer capacity and resizing if necessary.
687+
Status ReserveData(int64_t elements);
686688
Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
687689

688690
/// \return size of values buffer so far
689691
int64_t value_data_length() const { return value_data_builder_.length(); }
690692
/// \return capacity of values buffer
691-
int64_t value_data_capacity() const { return data_capacity_; }
693+
int64_t value_data_capacity() const { return value_data_builder_.capacity(); }
692694

693695
/// Temporary access to a value.
694696
///
@@ -699,7 +701,6 @@ class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {
699701
TypedBufferBuilder<int32_t> offsets_builder_;
700702
TypedBufferBuilder<uint8_t> value_data_builder_;
701703

702-
int64_t data_capacity_;
703704
static constexpr int64_t kMaximumCapacity = std::numeric_limits<int32_t>::max() - 1;
704705

705706
Status AppendNextOffset();

0 commit comments

Comments
 (0)