Skip to content

Commit 77f8f3c

Browse files
authored
Merge pull request #5 from apache/master
Syncing from original
2 parents bc5db7d + ed27243 commit 77f8f3c

36 files changed

+463
-257
lines changed

c_glib/configure.ac

+1-1
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ AC_CONFIG_FILES([
143143
arrow-gpu-glib/arrow-gpu-glib.pc
144144
doc/Makefile
145145
doc/reference/Makefile
146-
doc/reference/xml/Makefile
146+
doc/reference/entities.xml
147147
example/Makefile
148148
example/lua/Makefile
149149
tool/Makefile

c_glib/doc/reference/Makefile.am

+1-3
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,6 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717

18-
SUBDIRS = \
19-
xml
20-
2118
DOC_MODULE = arrow-glib
2219

2320
DOC_MAIN_SGML_FILE = $(DOC_MODULE)-docs.xml
@@ -72,4 +69,5 @@ CLEANFILES += \
7269
$(DOC_MODULE).types
7370

7471
EXTRA_DIST += \
72+
entities.xml.in \
7573
meson.build

c_glib/doc/reference/arrow-glib-docs.xml

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@
2121
"http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd"
2222
[
2323
<!ENTITY % local.common.attrib "xmlns:xi CDATA #FIXED 'http://www.w3.org/2003/XInclude'">
24-
<!ENTITY % gtkdocentities SYSTEM "xml/gtkdocentities.ent">
24+
<!ENTITY % gtkdocentities SYSTEM "entities.xml">
2525
%gtkdocentities;
2626
]>
27-
<book id="index">
27+
<book id="index" xmlns:xi="http://www.w3.org/2003/XInclude">
2828
<bookinfo>
2929
<title>&package_name; Reference Manual</title>
3030
<releaseinfo>

c_glib/doc/reference/xml/gtkdocentities.ent.in c_glib/doc/reference/entities.xml.in

+6-6
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@
1616
specific language governing permissions and limitations
1717
under the License.
1818
-->
19-
<!ENTITY package "@package@">
20-
<!ENTITY package_bugreport "@package_bugreport@">
21-
<!ENTITY package_name "@package_name@">
22-
<!ENTITY package_string "@package_string@">
23-
<!ENTITY package_url "@package_url@">
24-
<!ENTITY package_version "@package_version@">
19+
<!ENTITY package "@PACKAGE@">
20+
<!ENTITY package_bugreport "@PACKAGE_BUGREPORT@">
21+
<!ENTITY package_name "@PACKAGE_NAME@">
22+
<!ENTITY package_string "@PACKAGE_STRING@">
23+
<!ENTITY package_url "@PACKAGE_URL@">
24+
<!ENTITY package_version "@PACKAGE_VERSION@">

c_glib/doc/reference/meson.build

+12-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,18 @@
1717
# specific language governing permissions and limitations
1818
# under the License.
1919

20-
subdir('xml')
20+
entities_conf = configuration_data()
21+
entities_conf.set('PACKAGE', meson.project_name())
22+
entities_conf.set('PACKAGE_BUGREPORT',
23+
'https://issues.apache.org/jira/browse/ARROW')
24+
entities_conf.set('PACKAGE_NAME', meson.project_name())
25+
entities_conf.set('PACKAGE_STRING',
26+
' '.join([meson.project_name(), version]))
27+
entities_conf.set('PACKAGE_URL', 'https://arrow.apache.org/')
28+
entities_conf.set('PACKAGE_VERSION', version)
29+
configure_file(input: 'entities.xml.in',
30+
output: 'entities.xml',
31+
configuration: entities_conf)
2132

2233
private_headers = [
2334
]

c_glib/doc/reference/xml/Makefile.am

-20
This file was deleted.

c_glib/doc/reference/xml/meson.build

-31
This file was deleted.

ci/travis_lint.sh

+3-3
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,10 @@ popd
3535
# Fail fast on style checks
3636
sudo pip install flake8
3737

38-
PYARROW_DIR=$TRAVIS_BUILD_DIR/python/pyarrow
38+
PYTHON_DIR=$TRAVIS_BUILD_DIR/python
3939

40-
flake8 --count $PYARROW_DIR
40+
flake8 --count $PYTHON_DIR/pyarrow
4141

4242
# Check Cython files with some checks turned off
4343
flake8 --count --config=$PYTHON_DIR/.flake8.cython \
44-
$PYARROW_DIR
44+
$PYTHON_DIR/pyarrow

cpp/src/arrow/python/numpy_to_arrow.cc

+3-3
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ static Status AppendObjectBinaries(PyArrayObject* arr, PyArrayObject* mask,
175175
continue;
176176
} else if (!PyBytes_Check(obj)) {
177177
std::stringstream ss;
178-
ss << "Error converting to Python objects to bytes: ";
178+
ss << "Error converting from Python objects to bytes: ";
179179
RETURN_NOT_OK(InvalidConversion(obj, "str, bytes", &ss));
180180
return Status::Invalid(ss.str());
181181
}
@@ -230,7 +230,7 @@ static Status AppendObjectStrings(PyArrayObject* arr, PyArrayObject* mask, int64
230230
*have_bytes = true;
231231
} else {
232232
std::stringstream ss;
233-
ss << "Error converting to Python objects to String/UTF8: ";
233+
ss << "Error converting from Python objects to String/UTF8: ";
234234
RETURN_NOT_OK(InvalidConversion(obj, "str, bytes", &ss));
235235
return Status::Invalid(ss.str());
236236
}
@@ -278,7 +278,7 @@ static Status AppendObjectFixedWidthBytes(PyArrayObject* arr, PyArrayObject* mas
278278
tmp_obj.reset(obj);
279279
} else if (!PyBytes_Check(obj)) {
280280
std::stringstream ss;
281-
ss << "Error converting to Python objects to FixedSizeBinary: ";
281+
ss << "Error converting from Python objects to FixedSizeBinary: ";
282282
RETURN_NOT_OK(InvalidConversion(obj, "str, bytes", &ss));
283283
return Status::Invalid(ss.str());
284284
}

cpp/src/arrow/table-test.cc

+31
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,21 @@ TEST_F(TestChunkedArray, EqualsDifferingLengths) {
108108
ASSERT_TRUE(one_->Equals(*another_.get()));
109109
}
110110

111+
TEST_F(TestChunkedArray, SliceEquals) {
112+
arrays_one_.push_back(MakeRandomArray<Int32Array>(100));
113+
arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
114+
arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
115+
Construct();
116+
117+
std::shared_ptr<ChunkedArray> slice = one_->Slice(125, 50);
118+
ASSERT_EQ(slice->length(), 50);
119+
ASSERT_TRUE(slice->Equals(one_->Slice(125, 50)));
120+
121+
std::shared_ptr<ChunkedArray> slice2 = one_->Slice(75)->Slice(25)->Slice(25, 50);
122+
ASSERT_EQ(slice2->length(), 50);
123+
ASSERT_TRUE(slice2->Equals(slice));
124+
}
125+
111126
class TestColumn : public TestChunkedArray {
112127
protected:
113128
void Construct() override {
@@ -158,6 +173,22 @@ TEST_F(TestColumn, ChunksInhomogeneous) {
158173
ASSERT_RAISES(Invalid, column_->ValidateData());
159174
}
160175

176+
TEST_F(TestColumn, SliceEquals) {
177+
arrays_one_.push_back(MakeRandomArray<Int32Array>(100));
178+
arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
179+
arrays_one_.push_back(MakeRandomArray<Int32Array>(50));
180+
one_field_ = field("column", int32());
181+
Construct();
182+
183+
std::shared_ptr<Column> slice = one_col_->Slice(125, 50);
184+
ASSERT_EQ(slice->length(), 50);
185+
ASSERT_TRUE(slice->Equals(one_col_->Slice(125, 50)));
186+
187+
std::shared_ptr<Column> slice2 = one_col_->Slice(75)->Slice(25)->Slice(25, 50);
188+
ASSERT_EQ(slice2->length(), 50);
189+
ASSERT_TRUE(slice2->Equals(slice));
190+
}
191+
161192
TEST_F(TestColumn, Equals) {
162193
std::vector<bool> null_bitmap(100, true);
163194
std::vector<int32_t> data(100, 1);

cpp/src/arrow/table.cc

+24
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,30 @@ bool ChunkedArray::Equals(const std::shared_ptr<ChunkedArray>& other) const {
102102
return Equals(*other.get());
103103
}
104104

105+
std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t offset, int64_t length) const {
106+
DCHECK_LE(offset, length_);
107+
108+
int curr_chunk = 0;
109+
while (offset >= chunk(curr_chunk)->length()) {
110+
offset -= chunk(curr_chunk)->length();
111+
curr_chunk++;
112+
}
113+
114+
ArrayVector new_chunks;
115+
while (length > 0 && curr_chunk < num_chunks()) {
116+
new_chunks.push_back(chunk(curr_chunk)->Slice(offset, length));
117+
length -= chunk(curr_chunk)->length() - offset;
118+
offset = 0;
119+
curr_chunk++;
120+
}
121+
122+
return std::make_shared<ChunkedArray>(new_chunks);
123+
}
124+
125+
std::shared_ptr<ChunkedArray> ChunkedArray::Slice(int64_t offset) const {
126+
return Slice(offset, length_);
127+
}
128+
105129
Column::Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks)
106130
: field_(field) {
107131
data_ = std::make_shared<ChunkedArray>(chunks);

cpp/src/arrow/table.h

+35-1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ class ARROW_EXPORT ChunkedArray {
4444
/// \return the total length of the chunked array; computed on construction
4545
int64_t length() const { return length_; }
4646

47+
/// \return the total number of nulls among all chunks
4748
int64_t null_count() const { return null_count_; }
4849

4950
int num_chunks() const { return static_cast<int>(chunks_.size()); }
@@ -53,6 +54,20 @@ class ARROW_EXPORT ChunkedArray {
5354

5455
const ArrayVector& chunks() const { return chunks_; }
5556

57+
/// \brief Construct a zero-copy slice of the chunked array with the
58+
/// indicated offset and length
59+
///
60+
/// \param[in] offset the position of the first element in the constructed
61+
/// slice
62+
/// \param[in] length the length of the slice. If there are not enough
63+
/// elements in the chunked array, the length will be adjusted accordingly
64+
///
65+
/// \return a new object wrapped in std::shared_ptr<ChunkedArray>
66+
std::shared_ptr<ChunkedArray> Slice(int64_t offset, int64_t length) const;
67+
68+
/// \brief Slice from offset until end of the chunked array
69+
std::shared_ptr<ChunkedArray> Slice(int64_t offset) const;
70+
5671
std::shared_ptr<DataType> type() const;
5772

5873
bool Equals(const ChunkedArray& other) const;
@@ -67,8 +82,9 @@ class ARROW_EXPORT ChunkedArray {
6782
ARROW_DISALLOW_COPY_AND_ASSIGN(ChunkedArray);
6883
};
6984

85+
/// \class Column
7086
/// \brief An immutable column data structure consisting of a field (type
71-
/// metadata) and a logical chunked data array
87+
/// metadata) and a chunked data array
7288
class ARROW_EXPORT Column {
7389
public:
7490
Column(const std::shared_ptr<Field>& field, const ArrayVector& chunks);
@@ -97,6 +113,24 @@ class ARROW_EXPORT Column {
97113
/// \return the column's data as a chunked logical array
98114
std::shared_ptr<ChunkedArray> data() const { return data_; }
99115

116+
/// \brief Construct a zero-copy slice of the column with the indicated
117+
/// offset and length
118+
///
119+
/// \param[in] offset the position of the first element in the constructed
120+
/// slice
121+
/// \param[in] length the length of the slice. If there are not enough
122+
/// elements in the column, the length will be adjusted accordingly
123+
///
124+
/// \return a new object wrapped in std::shared_ptr<Column>
125+
std::shared_ptr<Column> Slice(int64_t offset, int64_t length) const {
126+
return std::make_shared<Column>(field_, data_->Slice(offset, length));
127+
}
128+
129+
/// \brief Slice from offset until end of the column
130+
std::shared_ptr<Column> Slice(int64_t offset) const {
131+
return std::make_shared<Column>(field_, data_->Slice(offset));
132+
}
133+
100134
bool Equals(const Column& other) const;
101135
bool Equals(const std::shared_ptr<Column>& other) const;
102136

0 commit comments

Comments
 (0)