Skip to content
This repository was archived by the owner on Jan 16, 2024. It is now read-only.

Commit c4a190b

Browse files
committed
arrow 0.17.0
1 parent 28943ce commit c4a190b

File tree

216 files changed

+12271
-7147
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

216 files changed

+12271
-7147
lines changed

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Apache Arrow 0.16.0
1+
# Apache Arrow 0.17.0
22

33
Backports for the R legacy toolchain [lib-4.9.3](lib-4.9.3) built with [rtools-backports](https://github.com/r-windows/rtools-backports/blob/master/mingw-w64-arrow/PKGBUILD).
44

@@ -13,13 +13,13 @@ CXX_STD = CXX11
1313
PKG_LIBS = \
1414
-L$(ARROW_LIBS) \
1515
-lparquet -larrow_dataset -larrow \
16-
-lthrift -lsnappy -lboost_regex-mt-s -lboost_system-mt-s -lz -lzstd -llz4 -lcrypto -lcrypt32
16+
-lthrift -lsnappy -lz -lzstd -llz4 -lcrypto -lcrypt32
1717
```
1818

1919
To test this make sure you install the arrow package from a release tag:
2020

2121
```r
22-
remotes::install_github("apache/arrow/r@apache-arrow-0.16.0")
22+
remotes::install_github("apache/arrow/r@apache-arrow-0.17.0")
2323
```
2424

2525
To install R package from the arrow master branch you also would need to rebuild the master branch arrow C++ library from source.

include/arrow/api.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@
1717

1818
// Coarse public API while the library is in development
1919

20-
#ifndef ARROW_API_H
21-
#define ARROW_API_H
20+
#pragma once
2221

2322
#include "arrow/array.h" // IYWU pragma: export
2423
#include "arrow/array/concatenate.h" // IYWU pragma: export
@@ -41,5 +40,3 @@
4140

4241
/// \brief Top-level namespace for Apache Arrow C++ API
4342
namespace arrow {}
44-
45-
#endif // ARROW_API_H

include/arrow/array.h

Lines changed: 126 additions & 106 deletions
Large diffs are not rendered by default.

include/arrow/array/builder_adaptive.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
9292

9393
class ARROW_EXPORT AdaptiveUIntBuilder : public internal::AdaptiveIntBuilderBase {
9494
public:
95-
explicit AdaptiveUIntBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
95+
explicit AdaptiveUIntBuilder(MemoryPool* pool = default_memory_pool());
9696

9797
using ArrayBuilder::Advance;
9898
using internal::AdaptiveIntBuilderBase::Reset;
@@ -126,7 +126,7 @@ class ARROW_EXPORT AdaptiveUIntBuilder : public internal::AdaptiveIntBuilderBase
126126

127127
class ARROW_EXPORT AdaptiveIntBuilder : public internal::AdaptiveIntBuilderBase {
128128
public:
129-
explicit AdaptiveIntBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
129+
explicit AdaptiveIntBuilder(MemoryPool* pool = default_memory_pool());
130130

131131
using ArrayBuilder::Advance;
132132
using internal::AdaptiveIntBuilderBase::Reset;

include/arrow/array/builder_base.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -190,13 +190,16 @@ class ARROW_EXPORT ArrayBuilder {
190190
return Status::OK();
191191
}
192192

193-
static Status CheckCapacity(int64_t new_capacity, int64_t old_capacity) {
194-
if (new_capacity < 0) {
195-
return Status::Invalid("Resize capacity must be positive");
193+
// Check the requested capacity for validity
194+
Status CheckCapacity(int64_t new_capacity) {
195+
if (ARROW_PREDICT_FALSE(new_capacity < 0)) {
196+
return Status::Invalid(
197+
"Resize capacity must be positive (requested: ", new_capacity, ")");
196198
}
197199

198-
if (new_capacity < old_capacity) {
199-
return Status::Invalid("Resize cannot downsize");
200+
if (ARROW_PREDICT_FALSE(new_capacity < length_)) {
201+
return Status::Invalid("Resize cannot downsize (requested: ", new_capacity,
202+
", current length: ", length_, ")");
200203
}
201204

202205
return Status::OK();

include/arrow/array/builder_binary.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class BaseBinaryBuilder : public ArrayBuilder {
4646
using TypeClass = TYPE;
4747
using offset_type = typename TypeClass::offset_type;
4848

49-
explicit BaseBinaryBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
49+
explicit BaseBinaryBuilder(MemoryPool* pool = default_memory_pool())
5050
: ArrayBuilder(pool), offsets_builder_(pool), value_data_builder_(pool) {}
5151

5252
BaseBinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
@@ -235,7 +235,7 @@ class BaseBinaryBuilder : public ArrayBuilder {
235235
return Status::CapacityError("BinaryBuilder cannot reserve space for more than ",
236236
memory_limit(), " child elements, got ", capacity);
237237
}
238-
ARROW_RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
238+
ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
239239

240240
// One more than requested for offsets
241241
ARROW_RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
@@ -400,7 +400,7 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
400400
using TypeClass = FixedSizeBinaryType;
401401

402402
explicit FixedSizeBinaryBuilder(const std::shared_ptr<DataType>& type,
403-
MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
403+
MemoryPool* pool = default_memory_pool());
404404

405405
Status Append(const uint8_t* value) {
406406
ARROW_RETURN_NOT_OK(Reserve(1));
@@ -515,10 +515,10 @@ namespace internal {
515515
class ARROW_EXPORT ChunkedBinaryBuilder {
516516
public:
517517
explicit ChunkedBinaryBuilder(int32_t max_chunk_value_length,
518-
MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
518+
MemoryPool* pool = default_memory_pool());
519519

520520
ChunkedBinaryBuilder(int32_t max_chunk_value_length, int32_t max_chunk_length,
521-
MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
521+
MemoryPool* pool = default_memory_pool());
522522

523523
virtual ~ChunkedBinaryBuilder() = default;
524524

include/arrow/array/builder_decimal.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class ARROW_EXPORT Decimal128Builder : public FixedSizeBinaryBuilder {
3131
using TypeClass = Decimal128Type;
3232

3333
explicit Decimal128Builder(const std::shared_ptr<DataType>& type,
34-
MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT);
34+
MemoryPool* pool = default_memory_pool());
3535

3636
using FixedSizeBinaryBuilder::Append;
3737
using FixedSizeBinaryBuilder::AppendValues;

include/arrow/array/builder_dict.h

Lines changed: 52 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -55,25 +55,24 @@ struct DictionaryScalar<FixedSizeBinaryType> {
5555

5656
class ARROW_EXPORT DictionaryMemoTable {
5757
public:
58-
explicit DictionaryMemoTable(const std::shared_ptr<DataType>& type);
59-
explicit DictionaryMemoTable(const std::shared_ptr<Array>& dictionary);
58+
DictionaryMemoTable(MemoryPool* pool, const std::shared_ptr<DataType>& type);
59+
DictionaryMemoTable(MemoryPool* pool, const std::shared_ptr<Array>& dictionary);
6060
~DictionaryMemoTable();
6161

62-
int32_t GetOrInsert(const bool& value);
63-
int32_t GetOrInsert(const int8_t& value);
64-
int32_t GetOrInsert(const int16_t& value);
65-
int32_t GetOrInsert(const int32_t& value);
66-
int32_t GetOrInsert(const int64_t& value);
67-
int32_t GetOrInsert(const uint8_t& value);
68-
int32_t GetOrInsert(const uint16_t& value);
69-
int32_t GetOrInsert(const uint32_t& value);
70-
int32_t GetOrInsert(const uint64_t& value);
71-
int32_t GetOrInsert(const float& value);
72-
int32_t GetOrInsert(const double& value);
73-
int32_t GetOrInsert(const util::string_view& value);
74-
75-
Status GetArrayData(MemoryPool* pool, int64_t start_offset,
76-
std::shared_ptr<ArrayData>* out);
62+
Status GetOrInsert(bool value, int32_t* out);
63+
Status GetOrInsert(int8_t value, int32_t* out);
64+
Status GetOrInsert(int16_t value, int32_t* out);
65+
Status GetOrInsert(int32_t value, int32_t* out);
66+
Status GetOrInsert(int64_t value, int32_t* out);
67+
Status GetOrInsert(uint8_t value, int32_t* out);
68+
Status GetOrInsert(uint16_t value, int32_t* out);
69+
Status GetOrInsert(uint32_t value, int32_t* out);
70+
Status GetOrInsert(uint64_t value, int32_t* out);
71+
Status GetOrInsert(float value, int32_t* out);
72+
Status GetOrInsert(double value, int32_t* out);
73+
Status GetOrInsert(util::string_view value, int32_t* out);
74+
75+
Status GetArrayData(int64_t start_offset, std::shared_ptr<ArrayData>* out);
7776

7877
/// \brief Insert new memo values
7978
Status InsertValues(const Array& values);
@@ -103,7 +102,7 @@ class DictionaryBuilderBase : public ArrayBuilder {
103102
value_type,
104103
MemoryPool* pool = default_memory_pool())
105104
: ArrayBuilder(pool),
106-
memo_table_(new internal::DictionaryMemoTable(value_type)),
105+
memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
107106
delta_offset_(0),
108107
byte_width_(-1),
109108
indices_builder_(pool),
@@ -114,7 +113,7 @@ class DictionaryBuilderBase : public ArrayBuilder {
114113
enable_if_fixed_size_binary<T1, const std::shared_ptr<DataType>&> value_type,
115114
MemoryPool* pool = default_memory_pool())
116115
: ArrayBuilder(pool),
117-
memo_table_(new internal::DictionaryMemoTable(value_type)),
116+
memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
118117
delta_offset_(0),
119118
byte_width_(static_cast<const T1&>(*value_type).byte_width()),
120119
indices_builder_(pool),
@@ -125,10 +124,11 @@ class DictionaryBuilderBase : public ArrayBuilder {
125124
enable_if_parameter_free<T1, MemoryPool*> pool = default_memory_pool())
126125
: DictionaryBuilderBase<BuilderType, T1>(TypeTraits<T1>::type_singleton(), pool) {}
127126

127+
// This constructor doesn't check for errors. Use InsertMemoValues instead.
128128
DictionaryBuilderBase(const std::shared_ptr<Array>& dictionary,
129129
MemoryPool* pool = default_memory_pool())
130130
: ArrayBuilder(pool),
131-
memo_table_(new internal::DictionaryMemoTable(dictionary)),
131+
memo_table_(new internal::DictionaryMemoTable(pool, dictionary)),
132132
delta_offset_(0),
133133
byte_width_(-1),
134134
indices_builder_(pool),
@@ -143,7 +143,8 @@ class DictionaryBuilderBase : public ArrayBuilder {
143143
Status Append(const Scalar& value) {
144144
ARROW_RETURN_NOT_OK(Reserve(1));
145145

146-
auto memo_index = memo_table_->GetOrInsert(value);
146+
int32_t memo_index;
147+
ARROW_RETURN_NOT_OK(memo_table_->GetOrInsert(value, &memo_index));
147148
ARROW_RETURN_NOT_OK(indices_builder_.Append(memo_index));
148149
length_ += 1;
149150

@@ -162,6 +163,24 @@ class DictionaryBuilderBase : public ArrayBuilder {
162163
return Append(util::string_view(value, byte_width_));
163164
}
164165

166+
/// \brief Append a string (only for binary types)
167+
template <typename T1 = T>
168+
enable_if_binary_like<T1, Status> Append(const uint8_t* value, int32_t length) {
169+
return Append(reinterpret_cast<const char*>(value), length);
170+
}
171+
172+
/// \brief Append a string (only for binary types)
173+
template <typename T1 = T>
174+
enable_if_binary_like<T1, Status> Append(const char* value, int32_t length) {
175+
return Append(util::string_view(value, length));
176+
}
177+
178+
/// \brief Append a string (only for string types)
179+
template <typename T1 = T>
180+
enable_if_string_like<T1, Status> Append(const char* value, int32_t length) {
181+
return Append(util::string_view(value, length));
182+
}
183+
165184
/// \brief Append a scalar null value
166185
Status AppendNull() final {
167186
length_ += 1;
@@ -231,11 +250,11 @@ class DictionaryBuilderBase : public ArrayBuilder {
231250
/// \brief Reset and also clear accumulated dictionary values in memo table
232251
void ResetFull() {
233252
Reset();
234-
memo_table_.reset(new internal::DictionaryMemoTable(value_type_));
253+
memo_table_.reset(new internal::DictionaryMemoTable(pool_, value_type_));
235254
}
236255

237256
Status Resize(int64_t capacity) override {
238-
ARROW_RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
257+
ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
239258
capacity = std::max(capacity, kMinBuilderCapacity);
240259
ARROW_RETURN_NOT_OK(indices_builder_.Resize(capacity));
241260
capacity_ = indices_builder_.capacity();
@@ -282,7 +301,7 @@ class DictionaryBuilderBase : public ArrayBuilder {
282301

283302
// Generate dictionary array from hash table contents
284303
std::shared_ptr<ArrayData> dictionary_data;
285-
ARROW_RETURN_NOT_OK(memo_table_->GetArrayData(pool_, dict_offset, &dictionary_data));
304+
ARROW_RETURN_NOT_OK(memo_table_->GetArrayData(dict_offset, &dictionary_data));
286305

287306
*out_dictionary = MakeArray(dictionary_data);
288307
delta_offset_ = memo_table_->size();
@@ -343,7 +362,7 @@ class DictionaryBuilderBase<BuilderType, NullType> : public ArrayBuilder {
343362
}
344363

345364
Status Resize(int64_t capacity) override {
346-
ARROW_RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
365+
ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
347366
capacity = std::max(capacity, kMinBuilderCapacity);
348367

349368
ARROW_RETURN_NOT_OK(indices_builder_.Resize(capacity));
@@ -421,71 +440,13 @@ class Dictionary32Builder : public internal::DictionaryBuilderBase<Int32Builder,
421440
};
422441

423442
// ----------------------------------------------------------------------
424-
// Binary / Unicode builders with slightly expanded APIs
425-
426-
namespace internal {
427-
428-
template <typename T>
429-
class BinaryDictionaryBuilderImpl : public DictionaryBuilder<T> {
430-
public:
431-
using BASE = DictionaryBuilder<T>;
432-
using BASE::Append;
433-
using BASE::AppendIndices;
434-
using BASE::BASE;
435-
436-
BinaryDictionaryBuilderImpl() : BinaryDictionaryBuilderImpl(default_memory_pool()) {}
437-
438-
Status Append(const uint8_t* value, int32_t length) {
439-
return Append(reinterpret_cast<const char*>(value), length);
440-
}
441-
442-
Status Append(const char* value, int32_t length) {
443-
return Append(util::string_view(value, length));
444-
}
445-
};
446-
447-
template <typename T>
448-
class BinaryDictionary32BuilderImpl : public Dictionary32Builder<T> {
449-
public:
450-
using BASE = Dictionary32Builder<T>;
451-
using BASE::Append;
452-
using BASE::AppendIndices;
453-
using BASE::BASE;
454-
455-
BinaryDictionary32BuilderImpl()
456-
: BinaryDictionary32BuilderImpl(default_memory_pool()) {}
457-
458-
Status Append(const uint8_t* value, int32_t length) {
459-
return Append(reinterpret_cast<const char*>(value), length);
460-
}
461-
462-
Status Append(const char* value, int32_t length) {
463-
return Append(util::string_view(value, length));
464-
}
465-
};
466-
467-
} // namespace internal
468-
469-
class BinaryDictionaryBuilder : public internal::BinaryDictionaryBuilderImpl<BinaryType> {
470-
using BASE = internal::BinaryDictionaryBuilderImpl<BinaryType>;
471-
using BASE::BASE;
472-
};
473-
474-
class StringDictionaryBuilder : public internal::BinaryDictionaryBuilderImpl<StringType> {
475-
using BASE = BinaryDictionaryBuilderImpl<StringType>;
476-
using BASE::BASE;
477-
};
478-
479-
class BinaryDictionary32Builder
480-
: public internal::BinaryDictionary32BuilderImpl<BinaryType> {
481-
using BASE = internal::BinaryDictionary32BuilderImpl<BinaryType>;
482-
using BASE::BASE;
483-
};
484-
485-
class StringDictionary32Builder
486-
: public internal::BinaryDictionary32BuilderImpl<StringType> {
487-
using BASE = internal::BinaryDictionary32BuilderImpl<StringType>;
488-
using BASE::BASE;
489-
};
443+
// Binary / Unicode builders
444+
// (compatibility aliases; those used to be derived classes with additional
445+
// Append() overloads, but they have been folded into DictionaryBuilderBase)
446+
447+
using BinaryDictionaryBuilder = DictionaryBuilder<BinaryType>;
448+
using StringDictionaryBuilder = DictionaryBuilder<StringType>;
449+
using BinaryDictionary32Builder = Dictionary32Builder<BinaryType>;
450+
using StringDictionary32Builder = Dictionary32Builder<StringType>;
490451

491452
} // namespace arrow

include/arrow/array/builder_nested.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,9 @@ class BaseListBuilder : public ArrayBuilder {
5454
return Status::CapacityError("List array cannot reserve space for more than ",
5555
maximum_elements(), " got ", capacity);
5656
}
57-
ARROW_RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
57+
ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
5858

59-
// one more then requested for offsets
59+
// One more than requested for offsets
6060
ARROW_RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
6161
return ArrayBuilder::Resize(capacity);
6262
}
@@ -162,7 +162,7 @@ class BaseListBuilder : public ArrayBuilder {
162162
/// To use this class, you must append values to the child array builder and use
163163
/// the Append function to delimit each distinct list value (once the values
164164
/// have been appended to the child array) or use the bulk API to append
165-
/// a sequence of offests and null values.
165+
/// a sequence of offsets and null values.
166166
///
167167
/// A note on types. Per arrow/type.h all types in the c++ implementation are
168168
/// logical so even though this class always builds list array, this can
@@ -203,7 +203,7 @@ class ARROW_EXPORT LargeListBuilder : public BaseListBuilder<LargeListType> {
203203
///
204204
/// To use this class, you must append values to the key and item array builders
205205
/// and use the Append function to delimit each distinct map (once the keys and items
206-
/// have been appended) or use the bulk API to append a sequence of offests and null
206+
/// have been appended) or use the bulk API to append a sequence of offsets and null
207207
/// maps.
208208
///
209209
/// Key uniqueness and ordering are not validated.

0 commit comments

Comments
 (0)