Skip to content
This repository was archived by the owner on Jan 16, 2024. It is now read-only.

Commit 1858abc

Browse files
committed
arrow 0.15.1
1 parent 866a17e commit 1858abc

24 files changed

+112
-48
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Apache Arrow 0.15.0
1+
# Apache Arrow 0.15.1
22

33
Backports for the R legacy toolchain [lib-4.9.3](lib-4.9.3) built with [rtools-backports](https://github.com/r-windows/rtools-backports/blob/master/mingw-w64-arrow/PKGBUILD).
44

@@ -21,7 +21,7 @@ PKG_LIBS = \
2121
To test this make sure you install the arrow package from a release tag:
2222

2323
```r
24-
remotes::install_github("apache/arrow/[email protected].0")
24+
remotes::install_github("apache/arrow/[email protected].1")
2525
```
2626

2727
To install R package from the arrow master branch you also would need to rebuild the master branch arrow C++ library from source.

include/arrow/array/builder_dict.h

Lines changed: 49 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,7 @@ class ARROW_EXPORT DictionaryMemoTable {
8989
/// dense array
9090
///
9191
/// Unlike other builders, dictionary builder does not completely
92-
/// reset the state on Finish calls. The arrays built after the
93-
/// initial Finish call will reuse the previously created encoding and
94-
/// build a delta dictionary when new terms occur.
95-
///
96-
/// data
92+
/// reset the state on Finish calls.
9793
template <typename BuilderType, typename T>
9894
class DictionaryBuilderBase : public ArrayBuilder {
9995
public:
@@ -230,44 +226,34 @@ class DictionaryBuilderBase : public ArrayBuilder {
230226
}
231227

232228
void Reset() override {
229+
// Perform a partial reset. Call ResetFull to also reset the accumulated
230+
// dictionary values
233231
ArrayBuilder::Reset();
234232
indices_builder_.Reset();
233+
}
234+
235+
/// \brief Reset and also clear accumulated dictionary values in memo table
236+
void ResetFull() {
237+
Reset();
235238
memo_table_.reset(new internal::DictionaryMemoTable(value_type_));
236-
delta_offset_ = 0;
237239
}
238240

239241
Status Resize(int64_t capacity) override {
240242
ARROW_RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
241243
capacity = std::max(capacity, kMinBuilderCapacity);
242-
243-
if (capacity_ == 0) {
244-
// Initialize hash table
245-
// XXX should we let the user pass additional size heuristics?
246-
delta_offset_ = 0;
247-
}
248244
ARROW_RETURN_NOT_OK(indices_builder_.Resize(capacity));
249245
capacity_ = indices_builder_.capacity();
250246
return Status::OK();
251247
}
252248

253-
Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
254-
// Finalize indices array
255-
ARROW_RETURN_NOT_OK(indices_builder_.FinishInternal(out));
256-
257-
// Generate dictionary array from hash table contents
258-
std::shared_ptr<ArrayData> dictionary_data;
259-
260-
ARROW_RETURN_NOT_OK(
261-
memo_table_->GetArrayData(pool_, delta_offset_, &dictionary_data));
262-
263-
// Set type of array data to the right dictionary type
264-
(*out)->type = type();
265-
(*out)->dictionary = MakeArray(dictionary_data);
266-
267-
// Update internals for further uses of this DictionaryBuilder
268-
delta_offset_ = memo_table_->size();
269-
indices_builder_.Reset();
270-
249+
/// \brief Return dictionary indices and a delta dictionary since the last
250+
/// time that Finish or FinishDelta were called, and reset state of builder
251+
/// (except the memo table)
252+
Status FinishDelta(std::shared_ptr<Array>* out_indices,
253+
std::shared_ptr<Array>* out_delta) {
254+
std::shared_ptr<ArrayData> indices_data;
255+
ARROW_RETURN_NOT_OK(FinishWithDictOffset(delta_offset_, &indices_data, out_delta));
256+
*out_indices = MakeArray(indices_data);
271257
return Status::OK();
272258
}
273259

@@ -277,17 +263,45 @@ class DictionaryBuilderBase : public ArrayBuilder {
277263

278264
Status Finish(std::shared_ptr<DictionaryArray>* out) { return FinishTyped(out); }
279265

280-
/// is the dictionary builder in the delta building mode
281-
bool is_building_delta() { return delta_offset_ > 0; }
282-
283266
std::shared_ptr<DataType> type() const override {
284267
return ::arrow::dictionary(indices_builder_.type(), value_type_);
285268
}
286269

287270
protected:
271+
Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
272+
std::shared_ptr<Array> dictionary;
273+
ARROW_RETURN_NOT_OK(FinishWithDictOffset(/*offset=*/0, out, &dictionary));
274+
275+
// Set type of array data to the right dictionary type
276+
(*out)->type = type();
277+
(*out)->dictionary = dictionary;
278+
return Status::OK();
279+
}
280+
281+
Status FinishWithDictOffset(int64_t dict_offset,
282+
std::shared_ptr<ArrayData>* out_indices,
283+
std::shared_ptr<Array>* out_dictionary) {
284+
// Finalize indices array
285+
ARROW_RETURN_NOT_OK(indices_builder_.FinishInternal(out_indices));
286+
287+
// Generate dictionary array from hash table contents
288+
std::shared_ptr<ArrayData> dictionary_data;
289+
ARROW_RETURN_NOT_OK(memo_table_->GetArrayData(pool_, dict_offset, &dictionary_data));
290+
291+
*out_dictionary = MakeArray(dictionary_data);
292+
delta_offset_ = memo_table_->size();
293+
294+
// Update internals for further uses of this DictionaryBuilder
295+
ArrayBuilder::Reset();
296+
return Status::OK();
297+
}
298+
288299
std::unique_ptr<DictionaryMemoTable> memo_table_;
289300

301+
// The size of the dictionary memo at last invocation of Finish, to use in
302+
// FinishDelta for computing dictionary deltas
290303
int32_t delta_offset_;
304+
291305
// Only used for FixedSizeBinaryType
292306
int32_t byte_width_;
293307

@@ -380,6 +394,7 @@ class DictionaryBuilder : public internal::DictionaryBuilderBase<AdaptiveIntBuil
380394
const uint8_t* valid_bytes = NULLPTR) {
381395
int64_t null_count_before = this->indices_builder_.null_count();
382396
ARROW_RETURN_NOT_OK(this->indices_builder_.AppendValues(values, length, valid_bytes));
397+
this->capacity_ = this->indices_builder_.capacity();
383398
this->length_ += length;
384399
this->null_count_ += this->indices_builder_.null_count() - null_count_before;
385400
return Status::OK();
@@ -402,6 +417,7 @@ class Dictionary32Builder : public internal::DictionaryBuilderBase<Int32Builder,
402417
const uint8_t* valid_bytes = NULLPTR) {
403418
int64_t null_count_before = this->indices_builder_.null_count();
404419
ARROW_RETURN_NOT_OK(this->indices_builder_.AppendValues(values, length, valid_bytes));
420+
this->capacity_ = this->indices_builder_.capacity();
405421
this->length_ += length;
406422
this->null_count_ += this->indices_builder_.null_count() - null_count_before;
407423
return Status::OK();

include/arrow/io/compressed.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ class ARROW_EXPORT CompressedOutputStream : public OutputStream {
6262
Status Tell(int64_t* position) const override;
6363

6464
Status Write(const void* data, int64_t nbytes) override;
65+
/// \cond FALSE
66+
using Writable::Write;
67+
/// \endcond
6568
Status Flush() override;
6669

6770
/// \brief Return the underlying raw output stream.

include/arrow/io/file.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,9 @@ class ARROW_EXPORT MemoryMappedFile : public ReadWriteFileInterface {
230230

231231
/// Write data at the current position in the file. Thread-safe
232232
Status Write(const void* data, int64_t nbytes) override;
233+
/// \cond FALSE
234+
using Writable::Write;
235+
/// \endcond
233236

234237
/// Set the size of the map to new_size.
235238
Status Resize(int64_t new_size);

include/arrow/io/memory.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ class ARROW_EXPORT MockOutputStream : public OutputStream {
104104
bool closed() const override;
105105
Status Tell(int64_t* position) const override;
106106
Status Write(const void* data, int64_t nbytes) override;
107+
/// \cond FALSE
108+
using Writable::Write;
109+
/// \endcond
107110

108111
int64_t GetExtentBytesWritten() const { return extent_bytes_written_; }
109112

@@ -124,6 +127,10 @@ class ARROW_EXPORT FixedSizeBufferWriter : public WritableFile {
124127
Status Seek(int64_t position) override;
125128
Status Tell(int64_t* position) const override;
126129
Status Write(const void* data, int64_t nbytes) override;
130+
/// \cond FALSE
131+
using Writable::Write;
132+
/// \endcond
133+
127134
Status WriteAt(int64_t position, const void* data, int64_t nbytes) override;
128135

129136
void set_memcopy_threads(int num_threads);

include/arrow/json/chunked_builder.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class ARROW_EXPORT ChunkedArrayBuilder {
7070
ARROW_EXPORT Status MakeChunkedArrayBuilder(
7171
const std::shared_ptr<internal::TaskGroup>& task_group, MemoryPool* pool,
7272
const PromotionGraph* promotion_graph, const std::shared_ptr<DataType>& type,
73-
std::unique_ptr<ChunkedArrayBuilder>* out);
73+
std::shared_ptr<ChunkedArrayBuilder>* out);
7474

7575
} // namespace json
7676
} // namespace arrow

include/arrow/json/chunker.h

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,23 +41,48 @@ class ARROW_EXPORT Chunker {
4141
virtual ~Chunker() = default;
4242

4343
/// \brief Carve up a chunk in a block of data to contain only whole objects
44+
///
45+
/// Post-conditions:
46+
/// - block == whole + partial
47+
/// - `whole` is a valid block of JSON data
48+
/// - `partial` doesn't contain an entire JSON object
49+
///
4450
/// \param[in] block json data to be chunked
4551
/// \param[out] whole subrange of block containing whole json objects
4652
/// \param[out] partial subrange of block a partial json object
47-
virtual Status Process(const std::shared_ptr<Buffer>& block,
48-
std::shared_ptr<Buffer>* whole,
53+
virtual Status Process(std::shared_ptr<Buffer> block, std::shared_ptr<Buffer>* whole,
4954
std::shared_ptr<Buffer>* partial) = 0;
5055

5156
/// \brief Carve the completion of a partial object out of a block
57+
///
58+
/// Post-conditions:
59+
/// - block == completion + rest
60+
/// - `partial + completion` is a valid block of JSON data
61+
/// - `completion` doesn't contain an entire JSON object
62+
///
5263
/// \param[in] partial incomplete json object
5364
/// \param[in] block json data
5465
/// \param[out] completion subrange of block containing the completion of partial
5566
/// \param[out] rest subrange of block containing what completion does not cover
56-
virtual Status ProcessWithPartial(const std::shared_ptr<Buffer>& partial,
57-
const std::shared_ptr<Buffer>& block,
67+
virtual Status ProcessWithPartial(std::shared_ptr<Buffer> partial,
68+
std::shared_ptr<Buffer> block,
5869
std::shared_ptr<Buffer>* completion,
5970
std::shared_ptr<Buffer>* rest) = 0;
6071

72+
/// \brief Like ProcessWithPartial, but for the lastblock of a file
73+
///
74+
/// This method allows for a final JSON object without a trailing newline
75+
/// (ProcessWithPartial would return an error in that case).
76+
///
77+
/// Post-conditions:
78+
/// - block == completion + rest
79+
/// - `partial + completion` is a valid block of JSON data
80+
/// - `completion` doesn't contain an entire JSON object
81+
virtual Status ProcessFinal(std::shared_ptr<Buffer> partial,
82+
std::shared_ptr<Buffer> block,
83+
std::shared_ptr<Buffer>* completion,
84+
std::shared_ptr<Buffer>* rest) = 0;
85+
6186
static std::unique_ptr<Chunker> Make(const ParseOptions& options);
6287

6388
protected:

include/arrow/json/options.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,7 @@ struct ARROW_EXPORT ParseOptions {
4646

4747
/// Whether objects may be printed across multiple lines (for example pretty-printed)
4848
///
49-
/// If true, parsing may be slower
50-
/// If false, input must end with an empty line
49+
/// If true, parsing may be slower.
5150
bool newlines_in_values = false;
5251

5352
/// How JSON fields outside of explicit_schema (if given) are treated

include/arrow/memory_pool.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,17 @@ ARROW_EXPORT MemoryPool* system_memory_pool();
160160
/// May return NotImplemented if jemalloc is not available.
161161
ARROW_EXPORT Status jemalloc_memory_pool(MemoryPool** out);
162162

163+
/// \brief Set jemalloc memory page purging behavior for future-created arenas
164+
/// to the indicated number of milliseconds. See dirty_decay_ms and
165+
/// muzzy_decay_ms options in jemalloc for a description of what these do. The
166+
/// default is configured to 1000 (1 second) which releases memory more
167+
/// aggressively to the operating system than the jemalloc default of 10
168+
/// seconds. If you set the value to 0, dirty / muzzy pages will be released
169+
/// immediately rather than with a time decay, but this may reduce application
170+
/// performance.
171+
ARROW_EXPORT
172+
Status jemalloc_set_decay_ms(int ms);
173+
163174
/// Return a process-wide memory pool based on mimalloc.
164175
///
165176
/// May return NotImplemented if mimalloc is not available.

include/arrow/util/config.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717

1818
#define ARROW_VERSION_MAJOR 0
1919
#define ARROW_VERSION_MINOR 15
20-
#define ARROW_VERSION_PATCH 0
20+
#define ARROW_VERSION_PATCH 1
2121
#define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH
2222

23-
/* #undef DOUBLE_CONVERSION_HAS_CASE_INSENSIBILITY */
23+
#define DOUBLE_CONVERSION_HAS_CASE_INSENSIBILITY
2424
/* #undef GRPCPP_PP_INCLUDE */

0 commit comments

Comments
 (0)