rwinlib
diff --git a/‎README.md‎
Lines changed: 32 additions & 25 deletions b/‎README.md‎
Lines changed: 32 additions & 25 deletions
diff --git a/‎include/arrow/api.h‎
Lines changed: 2 additions & 0 deletions b/‎include/arrow/api.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎include/arrow/array/array_base.h‎
Lines changed: 6 additions & 2 deletions b/‎include/arrow/array/array_base.h‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎include/arrow/array/array_nested.h‎
Lines changed: 20 additions & 0 deletions b/‎include/arrow/array/array_nested.h‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎include/arrow/array/builder_adaptive.h‎
Lines changed: 10 additions & 6 deletions b/‎include/arrow/array/builder_adaptive.h‎
Lines changed: 10 additions & 6 deletions
diff --git a/‎include/arrow/array/builder_base.h‎
Lines changed: 23 additions & 0 deletions b/‎include/arrow/array/builder_base.h‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎include/arrow/buffer.h‎
Lines changed: 7 additions & 0 deletions b/‎include/arrow/buffer.h‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎include/arrow/chunk_resolver.h‎
Lines changed: 104 additions & 0 deletions b/‎include/arrow/chunk_resolver.h‎
Lines changed: 104 additions & 0 deletions
diff --git a/‎include/arrow/chunked_array.h‎
Lines changed: 3 additions & 1 deletion b/‎include/arrow/chunked_array.h‎
Lines changed: 3 additions & 1 deletion
@@ -1,25 +1,32 @@
-# Apache Arrow 7.0.0
-
-Combined bundle with builds for rtools40 [mingw-w64-arrow](https://github.com/r-windows/rtools-packages/blob/master/mingw-w64-arrow/PKGBUILD) and [backports](https://github.com/r-windows/rtools-backports/blob/master/mingw-w64-arrow/PKGBUILD) for the R legacy toolchain in [lib-4.9.3](lib-4.9.3)
-
-Now supports parquet (thrift) and snappy. Example flags to compile and link the R bindings:
-
-```
-PKG_CPPFLAGS = -I$(ARROW_INCLUDE) \
-	-DARROW_R_WITH_ARROW -DARROW_DS_STATIC -DARROW_STATIC -DPARQUET_STATIC
-
-CXX_STD = CXX11
-
-PKG_LIBS = \
-	-L$(ARROW_LIBS) \
-	-lparquet -larrow_dataset -larrow \
-	-lthrift -lsnappy -lz -lzstd -llz4 -lcrypto -lcrypt32
-```
-
-To test this make sure you install the arrow package from a release tag:
-
-```r
-remotes::install_github("apache/arrow/[email protected]")
-```
-
-To install R package from the arrow master branch you also would need to rebuild the master branch arrow C++ library from source.
+# arrow 8.0.0-1
+
+ - mingw-w64-i686-arrow-8.0.0-1-any.pkg.tar.xz
+ - mingw-w64-i686-aws-sdk-cpp-1.7.365-1-any.pkg.tar.xz
+ - mingw-w64-i686-brotli-1.0.9-4-any.pkg.tar.xz
+ - mingw-w64-i686-openssl-1.1.1.m-9800-any.pkg.tar.xz
+ - mingw-w64-i686-lz4-1.8.2-1-any.pkg.tar.xz
+ - mingw-w64-i686-re2-20200801-1-any.pkg.tar.xz
+ - mingw-w64-i686-snappy-1.1.7-2-any.pkg.tar.xz
+ - mingw-w64-i686-thrift-0.13.0-1-any.pkg.tar.xz
+ - mingw-w64-i686-zstd-1.4.4-1-any.pkg.tar.xz
+ - mingw-w64-i686-libutf8proc-2.4.0-2-any.pkg.tar.xz
+ - mingw-w64-x86_64-arrow-8.0.0-1-any.pkg.tar.xz
+ - mingw-w64-x86_64-aws-sdk-cpp-1.7.365-1-any.pkg.tar.xz
+ - mingw-w64-x86_64-brotli-1.0.9-4-any.pkg.tar.xz
+ - mingw-w64-x86_64-openssl-1.1.1.m-9800-any.pkg.tar.xz
+ - mingw-w64-x86_64-lz4-1.8.2-1-any.pkg.tar.xz
+ - mingw-w64-x86_64-re2-20200801-1-any.pkg.tar.xz
+ - mingw-w64-x86_64-snappy-1.1.7-2-any.pkg.tar.xz
+ - mingw-w64-x86_64-thrift-0.13.0-1-any.pkg.tar.xz
+ - mingw-w64-x86_64-zstd-1.4.4-1-any.pkg.tar.xz
+ - mingw-w64-x86_64-libutf8proc-2.4.0-2-any.pkg.tar.xz
+ - mingw-w64-ucrt-x86_64-arrow-8.0.0-1-any.pkg.tar.xz
+ - mingw-w64-ucrt-x86_64-aws-sdk-cpp-1.7.365-1-any.pkg.tar.xz
+ - mingw-w64-ucrt-x86_64-brotli-1.0.9-4-any.pkg.tar.xz
+ - mingw-w64-ucrt-x86_64-openssl-1.1.1.m-9800-any.pkg.tar.xz
+ - mingw-w64-ucrt-x86_64-lz4-1.8.2-1-any.pkg.tar.xz
+ - mingw-w64-ucrt-x86_64-re2-20200801-1-any.pkg.tar.xz
+ - mingw-w64-ucrt-x86_64-snappy-1.1.7-2-any.pkg.tar.xz
+ - mingw-w64-ucrt-x86_64-thrift-0.13.0-1-any.pkg.tar.xz
+ - mingw-w64-ucrt-x86_64-zstd-1.4.4-1-any.pkg.tar.xz
+ - mingw-w64-ucrt-x86_64-libutf8proc-2.4.0-2-any.pkg.tar.xz
@@ -38,6 +38,8 @@
 #include "arrow/tensor.h"                   // IYWU pragma: export
 #include "arrow/type.h"                     // IYWU pragma: export
 #include "arrow/util/key_value_metadata.h"  // IWYU pragma: export
+#include "arrow/visit_array_inline.h"       // IYWU pragma: export
+#include "arrow/visit_scalar_inline.h"      // IYWU pragma: export
 #include "arrow/visitor.h"                  // IYWU pragma: export
 
 /// \brief Top-level namespace for Apache Arrow C++ API
 
@@ -133,6 +133,7 @@ class ARROW_EXPORT Array {
                    int64_t end_idx, int64_t other_start_idx,
                    const EqualOptions& = EqualOptions::Defaults()) const;
 
+  /// \brief Apply the ArrayVisitor::Visit() method specialized to the array type
   Status Accept(ArrayVisitor* visitor) const;
 
   /// Construct a zero-copy view of this array with the given type.
@@ -187,10 +188,11 @@ class ARROW_EXPORT Array {
   Status ValidateFull() const;
 
  protected:
-  Array() : null_bitmap_data_(NULLPTR) {}
+  Array() = default;
+  ARROW_DEFAULT_MOVE_AND_ASSIGN(Array);
 
   std::shared_ptr<ArrayData> data_;
-  const uint8_t* null_bitmap_data_;
+  const uint8_t* null_bitmap_data_ = NULLPTR;
 
   /// Protected method for constructors
   void SetData(const std::shared_ptr<ArrayData>& data) {
@@ -204,6 +206,8 @@ class ARROW_EXPORT Array {
 
  private:
   ARROW_DISALLOW_COPY_AND_ASSIGN(Array);
+
+  ARROW_EXPORT friend void PrintTo(const Array& x, std::ostream* os);
 };
 
 static inline std::ostream& operator<<(std::ostream& os, const Array& x) {
 
@@ -129,6 +129,10 @@ class ARROW_EXPORT ListArray : public BaseListArray<ListType> {
       const Array& offsets, const Array& values,
       MemoryPool* pool = default_memory_pool());
 
+  static Result<std::shared_ptr<ListArray>> FromArrays(
+      std::shared_ptr<DataType> type, const Array& offsets, const Array& values,
+      MemoryPool* pool = default_memory_pool());
+
   /// \brief Return an Array that is a concatenation of the lists in this array.
   ///
   /// Note that it's different from `values()` in that it takes into
@@ -138,6 +142,10 @@ class ARROW_EXPORT ListArray : public BaseListArray<ListType> {
       MemoryPool* memory_pool = default_memory_pool()) const;
 
   /// \brief Return list offsets as an Int32Array
+  ///
+  /// The returned array will not have a validity bitmap, so you cannot expect
+  /// to pass it to ListArray::FromArrays() and get back the same list array
+  /// if the original one has nulls.
   std::shared_ptr<Array> offsets() const;
 
  protected:
@@ -174,6 +182,10 @@ class ARROW_EXPORT LargeListArray : public BaseListArray<LargeListType> {
       const Array& offsets, const Array& values,
       MemoryPool* pool = default_memory_pool());
 
+  static Result<std::shared_ptr<LargeListArray>> FromArrays(
+      std::shared_ptr<DataType> type, const Array& offsets, const Array& values,
+      MemoryPool* pool = default_memory_pool());
+
   /// \brief Return an Array that is a concatenation of the lists in this array.
   ///
   /// Note that it's different from `values()` in that it takes into
@@ -311,6 +323,14 @@ class ARROW_EXPORT FixedSizeListArray : public Array {
   static Result<std::shared_ptr<Array>> FromArrays(const std::shared_ptr<Array>& values,
                                                    int32_t list_size);
 
+  /// \brief Construct FixedSizeListArray from child value array and type
+  ///
+  /// \param[in] values Array containing list values
+  /// \param[in] type The fixed sized list type
+  /// \return Will have length equal to values.length() / type.list_size()
+  static Result<std::shared_ptr<Array>> FromArrays(const std::shared_ptr<Array>& values,
+                                                   std::shared_ptr<DataType> type);
+
  protected:
   void SetData(const std::shared_ptr<ArrayData>& data);
   int32_t list_size_;
 
@@ -48,9 +48,11 @@ class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
   /// \param[in] length the number of nulls to append
   Status AppendNulls(int64_t length) final {
     ARROW_RETURN_NOT_OK(CommitPendingData());
-    ARROW_RETURN_NOT_OK(Reserve(length));
-    memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
-    UnsafeSetNull(length);
+    if (ARROW_PREDICT_TRUE(length > 0)) {
+      ARROW_RETURN_NOT_OK(Reserve(length));
+      memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
+      UnsafeSetNull(length);
+    }
     return Status::OK();
   }
 
@@ -70,9 +72,11 @@ class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
 
   Status AppendEmptyValues(int64_t length) final {
     ARROW_RETURN_NOT_OK(CommitPendingData());
-    ARROW_RETURN_NOT_OK(Reserve(length));
-    memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
-    UnsafeSetNotNull(length);
+    if (ARROW_PREDICT_TRUE(length > 0)) {
+      ARROW_RETURN_NOT_OK(Reserve(length));
+      memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
+      UnsafeSetNotNull(length);
+    }
     return Status::OK();
   }
 
 
@@ -28,6 +28,7 @@
 #include "arrow/array/array_primitive.h"
 #include "arrow/buffer.h"
 #include "arrow/buffer_builder.h"
+#include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/macros.h"
@@ -306,13 +307,27 @@ ARROW_EXPORT
 Status MakeBuilder(MemoryPool* pool, const std::shared_ptr<DataType>& type,
                    std::unique_ptr<ArrayBuilder>* out);
 
+inline Result<std::unique_ptr<ArrayBuilder>> MakeBuilder(
+    const std::shared_ptr<DataType>& type, MemoryPool* pool = default_memory_pool()) {
+  std::unique_ptr<ArrayBuilder> out;
+  ARROW_RETURN_NOT_OK(MakeBuilder(pool, type, &out));
+  return std::move(out);
+}
+
 /// \brief Construct an empty ArrayBuilder corresponding to the data
 /// type, where any top-level or nested dictionary builders return the
 /// exact index type specified by the type.
 ARROW_EXPORT
 Status MakeBuilderExactIndex(MemoryPool* pool, const std::shared_ptr<DataType>& type,
                              std::unique_ptr<ArrayBuilder>* out);
 
+inline Result<std::unique_ptr<ArrayBuilder>> MakeBuilderExactIndex(
+    const std::shared_ptr<DataType>& type, MemoryPool* pool = default_memory_pool()) {
+  std::unique_ptr<ArrayBuilder> out;
+  ARROW_RETURN_NOT_OK(MakeBuilderExactIndex(pool, type, &out));
+  return std::move(out);
+}
+
 /// \brief Construct an empty DictionaryBuilder initialized optionally
 /// with a pre-existing dictionary
 /// \param[in] pool the MemoryPool to use for allocations
@@ -324,4 +339,12 @@ Status MakeDictionaryBuilder(MemoryPool* pool, const std::shared_ptr<DataType>&
                              const std::shared_ptr<Array>& dictionary,
                              std::unique_ptr<ArrayBuilder>* out);
 
+inline Result<std::unique_ptr<ArrayBuilder>> MakeDictionaryBuilder(
+    const std::shared_ptr<DataType>& type, const std::shared_ptr<Array>& dictionary,
+    MemoryPool* pool = default_memory_pool()) {
+  std::unique_ptr<ArrayBuilder> out;
+  ARROW_RETURN_NOT_OK(MakeDictionaryBuilder(pool, type, dictionary, &out));
+  return std::move(out);
+}
+
 }  // namespace arrow
@@ -255,6 +255,13 @@ class ARROW_EXPORT Buffer {
   static Result<std::shared_ptr<Buffer>> Copy(std::shared_ptr<Buffer> source,
                                               const std::shared_ptr<MemoryManager>& to);
 
+  /// \brief Copy a non-owned buffer
+  ///
+  /// This is useful for cases where the source memory area is externally managed
+  /// (its lifetime not tied to the source Buffer), otherwise please use Copy().
+  static Result<std::unique_ptr<Buffer>> CopyNonOwned(
+      const Buffer& source, const std::shared_ptr<MemoryManager>& to);
+
   /// \brief View buffer
   ///
   /// Return a Buffer that reflects this buffer, seen potentially from another
 
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>
+#include <cstdint>
+#include <vector>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace internal {
+
+struct ChunkLocation {
+  int64_t chunk_index, index_in_chunk;
+};
+
+// An object that resolves an array chunk depending on a logical index
+struct ChunkResolver {
+  explicit ChunkResolver(const ArrayVector& chunks);
+
+  explicit ChunkResolver(const std::vector<const Array*>& chunks);
+
+  explicit ChunkResolver(const RecordBatchVector& batches);
+
+  ChunkResolver(ChunkResolver&& other)
+      : offsets_(std::move(other.offsets_)), cached_chunk_(other.cached_chunk_.load()) {}
+
+  ChunkResolver& operator=(ChunkResolver&& other) {
+    offsets_ = std::move(other.offsets_);
+    cached_chunk_.store(other.cached_chunk_.load());
+    return *this;
+  }
+
+  /// \brief Return a ChunkLocation containing the chunk index and in-chunk value index of
+  /// the chunked array at logical index
+  inline ChunkLocation Resolve(const int64_t index) const {
+    // It is common for the algorithms below to make consecutive accesses at
+    // a relatively small distance from each other, hence often falling in
+    // the same chunk.
+    // This is trivial when merging (assuming each side of the merge uses
+    // its own resolver), but also in the inner recursive invocations of
+    // partitioning.
+    if (offsets_.size() <= 1) {
+      return {0, index};
+    }
+    const auto cached_chunk = cached_chunk_.load();
+    const bool cache_hit =
+        (index >= offsets_[cached_chunk] && index < offsets_[cached_chunk + 1]);
+    if (ARROW_PREDICT_TRUE(cache_hit)) {
+      return {cached_chunk, index - offsets_[cached_chunk]};
+    }
+    auto chunk_index = Bisect(index);
+    cached_chunk_.store(chunk_index);
+    return {chunk_index, index - offsets_[chunk_index]};
+  }
+
+ protected:
+  // Find the chunk index corresponding to a value index using binary search
+  inline int64_t Bisect(const int64_t index) const {
+    // Like std::upper_bound(), but hand-written as it can help the compiler.
+    // Search [lo, lo + n)
+    int64_t lo = 0;
+    auto n = static_cast<int64_t>(offsets_.size());
+    while (n > 1) {
+      const int64_t m = n >> 1;
+      const int64_t mid = lo + m;
+      if (static_cast<int64_t>(index) >= offsets_[mid]) {
+        lo = mid;
+        n -= m;
+      } else {
+        n = m;
+      }
+    }
+    return lo;
+  }
+
+ private:
+  // Collection of starting offsets used for binary search
+  std::vector<int64_t> offsets_;
+
+  // Tracks the most recently used chunk index to allow fast
+  // access for consecutive indices corresponding to the same chunk
+  mutable std::atomic<int64_t> cached_chunk_;
+};
+
+}  // namespace internal
+}  // namespace arrow
@@ -23,6 +23,7 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/chunk_resolver.h"
 #include "arrow/compare.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
@@ -177,11 +178,12 @@ class ARROW_EXPORT ChunkedArray {
 
  protected:
   ArrayVector chunks_;
+  std::shared_ptr<DataType> type_;
   int64_t length_;
   int64_t null_count_;
-  std::shared_ptr<DataType> type_;
 
  private:
+  internal::ChunkResolver chunk_resolver_;
   ARROW_DISALLOW_COPY_AND_ASSIGN(ChunkedArray);
 };