@@ -55,25 +55,24 @@ struct DictionaryScalar<FixedSizeBinaryType> {
5555
5656class ARROW_EXPORT DictionaryMemoTable {
5757 public:
58- explicit DictionaryMemoTable (const std::shared_ptr<DataType>& type);
59- explicit DictionaryMemoTable (const std::shared_ptr<Array>& dictionary);
58+ DictionaryMemoTable (MemoryPool* pool, const std::shared_ptr<DataType>& type);
59+ DictionaryMemoTable (MemoryPool* pool, const std::shared_ptr<Array>& dictionary);
6060 ~DictionaryMemoTable ();
6161
62- int32_t GetOrInsert (const bool & value);
63- int32_t GetOrInsert (const int8_t & value);
64- int32_t GetOrInsert (const int16_t & value);
65- int32_t GetOrInsert (const int32_t & value);
66- int32_t GetOrInsert (const int64_t & value);
67- int32_t GetOrInsert (const uint8_t & value);
68- int32_t GetOrInsert (const uint16_t & value);
69- int32_t GetOrInsert (const uint32_t & value);
70- int32_t GetOrInsert (const uint64_t & value);
71- int32_t GetOrInsert (const float & value);
72- int32_t GetOrInsert (const double & value);
73- int32_t GetOrInsert (const util::string_view& value);
74-
75- Status GetArrayData (MemoryPool* pool, int64_t start_offset,
76- std::shared_ptr<ArrayData>* out);
62+ Status GetOrInsert (bool value, int32_t * out);
63+ Status GetOrInsert (int8_t value, int32_t * out);
64+ Status GetOrInsert (int16_t value, int32_t * out);
65+ Status GetOrInsert (int32_t value, int32_t * out);
66+ Status GetOrInsert (int64_t value, int32_t * out);
67+ Status GetOrInsert (uint8_t value, int32_t * out);
68+ Status GetOrInsert (uint16_t value, int32_t * out);
69+ Status GetOrInsert (uint32_t value, int32_t * out);
70+ Status GetOrInsert (uint64_t value, int32_t * out);
71+ Status GetOrInsert (float value, int32_t * out);
72+ Status GetOrInsert (double value, int32_t * out);
73+ Status GetOrInsert (util::string_view value, int32_t * out);
74+
75+ Status GetArrayData (int64_t start_offset, std::shared_ptr<ArrayData>* out);
7776
7877 // / \brief Insert new memo values
7978 Status InsertValues (const Array& values);
@@ -103,7 +102,7 @@ class DictionaryBuilderBase : public ArrayBuilder {
103102 value_type,
104103 MemoryPool* pool = default_memory_pool())
105104 : ArrayBuilder(pool),
106- memo_table_ (new internal::DictionaryMemoTable(value_type)),
105+ memo_table_ (new internal::DictionaryMemoTable(pool, value_type)),
107106 delta_offset_(0 ),
108107 byte_width_(-1 ),
109108 indices_builder_(pool),
@@ -114,7 +113,7 @@ class DictionaryBuilderBase : public ArrayBuilder {
114113 enable_if_fixed_size_binary<T1, const std::shared_ptr<DataType>&> value_type,
115114 MemoryPool* pool = default_memory_pool())
116115 : ArrayBuilder(pool),
117- memo_table_(new internal::DictionaryMemoTable(value_type)),
116+ memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
118117 delta_offset_(0 ),
119118 byte_width_(static_cast <const T1&>(*value_type).byte_width()),
120119 indices_builder_(pool),
@@ -125,10 +124,11 @@ class DictionaryBuilderBase : public ArrayBuilder {
125124 enable_if_parameter_free<T1, MemoryPool*> pool = default_memory_pool())
126125 : DictionaryBuilderBase<BuilderType, T1>(TypeTraits<T1>::type_singleton(), pool) {}
127126
127+ // This constructor doesn't check for errors. Use InsertMemoValues instead.
128128 DictionaryBuilderBase (const std::shared_ptr<Array>& dictionary,
129129 MemoryPool* pool = default_memory_pool())
130130 : ArrayBuilder(pool),
131- memo_table_(new internal::DictionaryMemoTable(dictionary)),
131+ memo_table_(new internal::DictionaryMemoTable(pool, dictionary)),
132132 delta_offset_(0 ),
133133 byte_width_(-1 ),
134134 indices_builder_(pool),
@@ -143,7 +143,8 @@ class DictionaryBuilderBase : public ArrayBuilder {
143143 Status Append (const Scalar& value) {
144144 ARROW_RETURN_NOT_OK (Reserve (1 ));
145145
146- auto memo_index = memo_table_->GetOrInsert (value);
146+ int32_t memo_index;
147+ ARROW_RETURN_NOT_OK (memo_table_->GetOrInsert (value, &memo_index));
147148 ARROW_RETURN_NOT_OK (indices_builder_.Append (memo_index));
148149 length_ += 1 ;
149150
@@ -162,6 +163,24 @@ class DictionaryBuilderBase : public ArrayBuilder {
162163 return Append (util::string_view (value, byte_width_));
163164 }
164165
166+ // / \brief Append a string (only for binary types)
167+ template <typename T1 = T>
168+ enable_if_binary_like<T1, Status> Append (const uint8_t * value, int32_t length) {
169+ return Append (reinterpret_cast <const char *>(value), length);
170+ }
171+
172+ // / \brief Append a string (only for binary types)
173+ template <typename T1 = T>
174+ enable_if_binary_like<T1, Status> Append (const char * value, int32_t length) {
175+ return Append (util::string_view (value, length));
176+ }
177+
178+ // / \brief Append a string (only for string types)
179+ template <typename T1 = T>
180+ enable_if_string_like<T1, Status> Append (const char * value, int32_t length) {
181+ return Append (util::string_view (value, length));
182+ }
183+
165184 // / \brief Append a scalar null value
166185 Status AppendNull () final {
167186 length_ += 1 ;
@@ -231,11 +250,11 @@ class DictionaryBuilderBase : public ArrayBuilder {
231250 // / \brief Reset and also clear accumulated dictionary values in memo table
232251 void ResetFull () {
233252 Reset ();
234- memo_table_.reset (new internal::DictionaryMemoTable (value_type_));
253+ memo_table_.reset (new internal::DictionaryMemoTable (pool_, value_type_));
235254 }
236255
237256 Status Resize (int64_t capacity) override {
238- ARROW_RETURN_NOT_OK (CheckCapacity (capacity, capacity_ ));
257+ ARROW_RETURN_NOT_OK (CheckCapacity (capacity));
239258 capacity = std::max (capacity, kMinBuilderCapacity );
240259 ARROW_RETURN_NOT_OK (indices_builder_.Resize (capacity));
241260 capacity_ = indices_builder_.capacity ();
@@ -282,7 +301,7 @@ class DictionaryBuilderBase : public ArrayBuilder {
282301
283302 // Generate dictionary array from hash table contents
284303 std::shared_ptr<ArrayData> dictionary_data;
285- ARROW_RETURN_NOT_OK (memo_table_->GetArrayData (pool_, dict_offset, &dictionary_data));
304+ ARROW_RETURN_NOT_OK (memo_table_->GetArrayData (dict_offset, &dictionary_data));
286305
287306 *out_dictionary = MakeArray (dictionary_data);
288307 delta_offset_ = memo_table_->size ();
@@ -343,7 +362,7 @@ class DictionaryBuilderBase<BuilderType, NullType> : public ArrayBuilder {
343362 }
344363
345364 Status Resize (int64_t capacity) override {
346- ARROW_RETURN_NOT_OK (CheckCapacity (capacity, capacity_ ));
365+ ARROW_RETURN_NOT_OK (CheckCapacity (capacity));
347366 capacity = std::max (capacity, kMinBuilderCapacity );
348367
349368 ARROW_RETURN_NOT_OK (indices_builder_.Resize (capacity));
@@ -421,71 +440,13 @@ class Dictionary32Builder : public internal::DictionaryBuilderBase<Int32Builder,
421440};
422441
423442// ----------------------------------------------------------------------
424- // Binary / Unicode builders with slightly expanded APIs
425-
426- namespace internal {
427-
428- template <typename T>
429- class BinaryDictionaryBuilderImpl : public DictionaryBuilder <T> {
430- public:
431- using BASE = DictionaryBuilder<T>;
432- using BASE::Append;
433- using BASE::AppendIndices;
434- using BASE::BASE;
435-
436- BinaryDictionaryBuilderImpl () : BinaryDictionaryBuilderImpl(default_memory_pool()) {}
437-
438- Status Append (const uint8_t * value, int32_t length) {
439- return Append (reinterpret_cast <const char *>(value), length);
440- }
441-
442- Status Append (const char * value, int32_t length) {
443- return Append (util::string_view (value, length));
444- }
445- };
446-
447- template <typename T>
448- class BinaryDictionary32BuilderImpl : public Dictionary32Builder <T> {
449- public:
450- using BASE = Dictionary32Builder<T>;
451- using BASE::Append;
452- using BASE::AppendIndices;
453- using BASE::BASE;
454-
455- BinaryDictionary32BuilderImpl ()
456- : BinaryDictionary32BuilderImpl(default_memory_pool()) {}
457-
458- Status Append (const uint8_t * value, int32_t length) {
459- return Append (reinterpret_cast <const char *>(value), length);
460- }
461-
462- Status Append (const char * value, int32_t length) {
463- return Append (util::string_view (value, length));
464- }
465- };
466-
467- } // namespace internal
468-
469- class BinaryDictionaryBuilder : public internal ::BinaryDictionaryBuilderImpl<BinaryType> {
470- using BASE = internal::BinaryDictionaryBuilderImpl<BinaryType>;
471- using BASE::BASE;
472- };
473-
474- class StringDictionaryBuilder : public internal ::BinaryDictionaryBuilderImpl<StringType> {
475- using BASE = BinaryDictionaryBuilderImpl<StringType>;
476- using BASE::BASE;
477- };
478-
479- class BinaryDictionary32Builder
480- : public internal::BinaryDictionary32BuilderImpl<BinaryType> {
481- using BASE = internal::BinaryDictionary32BuilderImpl<BinaryType>;
482- using BASE::BASE;
483- };
484-
485- class StringDictionary32Builder
486- : public internal::BinaryDictionary32BuilderImpl<StringType> {
487- using BASE = internal::BinaryDictionary32BuilderImpl<StringType>;
488- using BASE::BASE;
489- };
443+ // Binary / Unicode builders
444+ // (compatibility aliases; those used to be derived classes with additional
445+ // Append() overloads, but they have been folded into DictionaryBuilderBase)
446+
447+ using BinaryDictionaryBuilder = DictionaryBuilder<BinaryType>;
448+ using StringDictionaryBuilder = DictionaryBuilder<StringType>;
449+ using BinaryDictionary32Builder = Dictionary32Builder<BinaryType>;
450+ using StringDictionary32Builder = Dictionary32Builder<StringType>;
490451
491452} // namespace arrow
0 commit comments