@@ -48,7 +48,7 @@ using compute::threaded_exec_context;
4848
4949namespace acero {
5050
51- // / \addtogroup execnode-components
51+ // / \addtogroup acero-internals
5252// / @{
5353
5454class ARROW_ACERO_EXPORT ExecPlan : public std::enable_shared_from_this<ExecPlan> {
@@ -118,6 +118,10 @@ class ARROW_ACERO_EXPORT ExecPlan : public std::enable_shared_from_this<ExecPlan
118118 std::string ToString () const ;
119119};
120120
121+ // Acero can be extended by providing custom implementations of ExecNode. The methods
122+ // below are documented in detail and provide careful instruction on how to fulfill the
123+ // ExecNode contract. It's suggested you familiarize yourself with the Acero
124+ // documentation in the C++ user guide.
121125class ARROW_ACERO_EXPORT ExecNode {
122126 public:
123127 using NodeVector = std::vector<ExecNode*>;
@@ -173,9 +177,9 @@ class ARROW_ACERO_EXPORT ExecNode {
173177 // / non-deterministic. For example, a hash-join has no predictable output order.
174178 // /
175179 // / If the ordering is Ordering::Implicit then there is a meaningful order but that
176- // / odering is not represented by any column in the data. The most common case for this
177- // / is when reading data from an in-memory table. The data has an implicit "row order"
178- // / which is not neccesarily represented in the data set.
180+ // / ordering is not represented by any column in the data. The most common case for
181+ // / this is when reading data from an in-memory table. The data has an implicit "row
182+ // / order" which is not necessarily represented in the data set.
179183 // /
180184 // / A filter or project node will not modify the ordering. Nothing needs to be done
181185 // / other than ensure the index assigned to output batches is the same as the
@@ -321,7 +325,7 @@ class ARROW_ACERO_EXPORT ExecNode {
321325 // /
322326 // / This is not a pause. There will be no way to start the source again after this has
323327 // / been called.
324- Status StopProducing ();
328+ virtual Status StopProducing ();
325329
326330 std::string ToString (int indent = 0 ) const ;
327331
@@ -377,16 +381,36 @@ inline Result<ExecNode*> MakeExecNode(
377381 return factory (plan, std::move (inputs), options);
378382}
379383
380- // / \brief Helper class for declaring sets of ExecNodes efficiently
384+ // / @}
385+
386+ // / \addtogroup acero-api
387+ // / @{
388+
389+ // / \brief Helper class for declaring execution nodes
381390// /
382- // / A Declaration represents an unconstructed ExecNode (and potentially more since its
383- // / inputs may also be Declarations). The node can be constructed and added to a plan
384- // / with Declaration::AddToPlan, which will recursively construct any inputs as necessary.
391+ // / A Declaration represents an unconstructed ExecNode (and potentially an entire graph
392+ // / since its inputs may also be Declarations)
393+ // /
394+ // / A Declaration can be converted to a plan and executed using one of the
395+ // / DeclarationToXyz methods.
396+ // /
397+ // / For more direct control, a Declaration can be added to an existing execution
398+ // / plan with Declaration::AddToPlan, which will recursively construct any inputs as
399+ // / necessary.
385400struct ARROW_ACERO_EXPORT Declaration {
386401 using Input = std::variant<ExecNode*, Declaration>;
387402
388403 Declaration () {}
389404
405+ // / \brief construct a declaration
406+ // / \param factory_name the name of the exec node to construct. The node must have
407+ // / been added to the exec node registry with this name.
408+ // / \param inputs the inputs to the node, these should be other declarations
409+ // / \param options options that control the behavior of the node. You must use
410+ // / the appropriate subclass. For example, if `factory_name` is
411+ // / "project" then `options` should be ProjectNodeOptions.
412+ // / \param label a label to give the node. Can be used to distinguish it from other
413+ // / nodes of the same type in the plan.
390414 Declaration (std::string factory_name, std::vector<Input> inputs,
391415 std::shared_ptr<ExecNodeOptions> options, std::string label)
392416 : factory_name{std::move (factory_name)},
@@ -447,15 +471,28 @@ struct ARROW_ACERO_EXPORT Declaration {
447471 // / });
448472 static Declaration Sequence (std::vector<Declaration> decls);
449473
474+ // / \brief add the declaration to an already created execution plan
475+ // / \param plan the plan to add the node to
476+ // / \param registry the registry to use to lookup the node factory
477+ // /
478+ // / This method will recursively call AddToPlan on all of the declaration's inputs.
479+ // / This method is only for advanced use when the DeclarationToXyz methods are not
480+ // / sufficient.
481+ // /
482+ // / \return the instantiated execution node
450483 Result<ExecNode*> AddToPlan (ExecPlan* plan, ExecFactoryRegistry* registry =
451484 default_exec_factory_registry ()) const ;
452485
453486 // Validate a declaration
454487 bool IsValid (ExecFactoryRegistry* registry = default_exec_factory_registry()) const ;
455488
489+ // / \brief the name of the factory to use when creating a node
456490 std::string factory_name;
491+ // / \brief the declarations's inputs
457492 std::vector<Input> inputs;
493+ // / \brief options to control the behavior of the node
458494 std::shared_ptr<ExecNodeOptions> options;
495+ // / \brief a label to give the node in the plan
459496 std::string label;
460497};
461498
@@ -489,7 +526,7 @@ struct ARROW_ACERO_EXPORT QueryOptions {
489526 // / otherwise.
490527 // /
491528 // / If explicitly set to true then plan execution will fail if there is no
492- // / meaningful ordering. This can be useful to valdiate a query that should
529+ // / meaningful ordering. This can be useful to validate a query that should
493530 // / be emitting ordered results.
494531 // /
495532 // / If explicitly set to false then batches will be emit immediately even if there
@@ -513,6 +550,13 @@ struct ARROW_ACERO_EXPORT QueryOptions {
513550 // / the `use_threads` option.
514551 ::arrow::internal::Executor* custom_cpu_executor = NULLPTR;
515552
553+ // / \brief custom executor to use for IO work
554+ // /
555+ // / Must be null or remain valid for the duration of the plan. If this is null then
556+ // / the global io thread pool will be chosen whose behavior will be controlled by
557+ // / the "ARROW_IO_THREADS" environment.
558+ ::arrow::internal::Executor* custom_io_executor = NULLPTR;
559+
516560 // / \brief a memory pool to use for allocations
517561 // /
518562 // / Must remain valid for the duration of the plan.
@@ -707,6 +751,10 @@ DeclarationToBatchesAsync(Declaration declaration, ExecContext exec_context);
707751// / fills up.
708752// /
709753// / If a custom exec context is provided then the value of `use_threads` will be ignored.
754+ // /
755+ // / The returned RecordBatchReader can be closed early to cancel the computation of record
756+ // / batches. In this case, only errors encountered by the computation may be reported. In
757+ // / particular, no cancellation error may be reported.
710758ARROW_ACERO_EXPORT Result<std::unique_ptr<RecordBatchReader>> DeclarationToReader (
711759 Declaration declaration, bool use_threads = true ,
712760 MemoryPool* memory_pool = default_memory_pool(),
@@ -746,6 +794,8 @@ ARROW_ACERO_EXPORT Future<> DeclarationToStatusAsync(
746794ARROW_ACERO_EXPORT Future<> DeclarationToStatusAsync (Declaration declaration,
747795 ExecContext exec_context);
748796
797+ // / @}
798+
749799// / \brief Wrap an ExecBatch generator in a RecordBatchReader.
750800// /
751801// / The RecordBatchReader does not impose any ordering on emitted batches.
@@ -765,7 +815,5 @@ Result<std::function<Future<std::optional<ExecBatch>>()>> MakeReaderGenerator(
765815 std::shared_ptr<RecordBatchReader> reader, arrow::internal::Executor* io_executor,
766816 int max_q = kDefaultBackgroundMaxQ , int q_restart = kDefaultBackgroundQRestart );
767817
768- // / @}
769-
770818} // namespace acero
771819} // namespace arrow
0 commit comments