Skip to content

Commit 38395af

Browse files
committed
fix null behavior issue in hashJoin
change null equality to UNEQUAL handle empty input to build disable right joins (because design should be different)
1 parent 1dfb808 commit 38395af

File tree

2 files changed

+23
-15
lines changed

2 files changed

+23
-15
lines changed

velox/experimental/cudf/exec/CudfHashJoin.cpp

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "velox/experimental/cudf/exec/ExpressionEvaluator.h"
1919
#include "velox/experimental/cudf/exec/ToCudf.h"
2020
#include "velox/experimental/cudf/exec/Utilities.h"
21+
#include "velox/experimental/cudf/exec/VeloxCudfInterop.h"
2122

2223
#include "velox/exec/Task.h"
2324

@@ -147,7 +148,13 @@ void CudfHashJoinBuild::noMoreInput() {
147148
};
148149

149150
auto stream = cudfGlobalStreamPool().get_stream();
150-
auto tbl = getConcatenatedTable(inputs_, stream);
151+
std::unique_ptr<cudf::table> tbl;
152+
if (inputs_.size() == 0) {
153+
auto emptyRowVector = RowVector::createEmpty(joinNode_->sources()[1]->outputType(), operatorCtx_->pool());
154+
tbl = facebook::velox::cudf_velox::with_arrow::toCudfTable(emptyRowVector, operatorCtx_->pool(), stream);
155+
} else {
156+
tbl = getConcatenatedTable(inputs_, stream);
157+
}
151158

152159
// Release input data after synchronizing
153160
stream.synchronize();
@@ -176,7 +183,7 @@ void CudfHashJoinBuild::noMoreInput() {
176183
!joinNode_->filter();
177184
auto hashObject = (buildHashJoin) ? std::make_shared<cudf::hash_join>(
178185
tbl->view().select(buildKeyIndices),
179-
cudf::null_equality::EQUAL,
186+
cudf::null_equality::UNEQUAL,
180187
stream)
181188
: nullptr;
182189
if (buildHashJoin) {
@@ -367,6 +374,9 @@ RowVectorPtr CudfHashJoinProbe::getOutput() {
367374
}
368375
VELOX_NVTX_OPERATOR_FUNC_RANGE();
369376

377+
if (finished_) {
378+
return nullptr;
379+
}
370380
if (!input_) {
371381
return nullptr;
372382
}
@@ -440,7 +450,7 @@ RowVectorPtr CudfHashJoinProbe::getOutput() {
440450
leftTableView,
441451
rightTableView,
442452
tree_.back(),
443-
cudf::null_equality::EQUAL,
453+
cudf::null_equality::UNEQUAL,
444454
std::nullopt,
445455
stream);
446456
} else {
@@ -456,7 +466,7 @@ RowVectorPtr CudfHashJoinProbe::getOutput() {
456466
leftTableView,
457467
rightTableView,
458468
tree_.back(),
459-
cudf::null_equality::EQUAL,
469+
cudf::null_equality::UNEQUAL,
460470
std::nullopt,
461471
stream);
462472
} else {
@@ -472,14 +482,14 @@ RowVectorPtr CudfHashJoinProbe::getOutput() {
472482
rightTableView,
473483
leftTableView,
474484
tree_.back(),
475-
cudf::null_equality::EQUAL,
485+
cudf::null_equality::UNEQUAL,
476486
std::nullopt,
477487
stream);
478488
} else {
479489
std::tie(rightJoinIndices, leftJoinIndices) = cudf::left_join(
480490
rightTableView.select(rightKeyIndices_),
481491
leftTableView.select(leftKeyIndices_),
482-
cudf::null_equality::EQUAL,
492+
cudf::null_equality::UNEQUAL,
483493
stream,
484494
cudf::get_current_device_resource_ref());
485495
}
@@ -491,14 +501,14 @@ RowVectorPtr CudfHashJoinProbe::getOutput() {
491501
leftTableView,
492502
rightTableView,
493503
tree_.back(),
494-
cudf::null_equality::EQUAL,
504+
cudf::null_equality::UNEQUAL,
495505
stream,
496506
cudf::get_current_device_resource_ref());
497507
} else {
498508
leftJoinIndices = cudf::left_anti_join(
499509
leftTableView.select(leftKeyIndices_),
500510
rightTableView.select(rightKeyIndices_),
501-
cudf::null_equality::EQUAL,
511+
cudf::null_equality::UNEQUAL,
502512
stream,
503513
cudf::get_current_device_resource_ref());
504514
}
@@ -510,14 +520,14 @@ RowVectorPtr CudfHashJoinProbe::getOutput() {
510520
leftTableView,
511521
rightTableView,
512522
tree_.back(),
513-
cudf::null_equality::EQUAL,
523+
cudf::null_equality::UNEQUAL,
514524
stream,
515525
cudf::get_current_device_resource_ref());
516526
} else {
517527
leftJoinIndices = cudf::left_semi_join(
518528
leftTableView.select(leftKeyIndices_),
519529
rightTableView.select(rightKeyIndices_),
520-
cudf::null_equality::EQUAL,
530+
cudf::null_equality::UNEQUAL,
521531
stream,
522532
cudf::get_current_device_resource_ref());
523533
}
@@ -529,7 +539,7 @@ RowVectorPtr CudfHashJoinProbe::getOutput() {
529539
rightTableView,
530540
leftTableView,
531541
tree_.back(),
532-
cudf::null_equality::EQUAL,
542+
cudf::null_equality::UNEQUAL,
533543
stream,
534544
cudf::get_current_device_resource_ref());
535545
} else {
@@ -557,7 +567,7 @@ RowVectorPtr CudfHashJoinProbe::getOutput() {
557567

558568
rightTableView.select(rightKeyIndices_),
559569
leftTableView.select(leftKeyIndices_),
560-
cudf::null_equality::EQUAL,
570+
cudf::null_equality::UNEQUAL,
561571
stream,
562572
cudf::get_current_device_resource_ref());
563573
}

velox/experimental/cudf/exec/CudfHashJoin.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,8 @@ class CudfHashJoinProbe : public exec::Operator, public NvtxHelper {
8989
static bool isSupportedJoinType(core::JoinType joinType) {
9090
return joinType == core::JoinType::kInner ||
9191
joinType == core::JoinType::kLeft ||
92-
joinType == core::JoinType::kRight ||
9392
joinType == core::JoinType::kAnti ||
94-
joinType == core::JoinType::kLeftSemiFilter ||
95-
joinType == core::JoinType::kRightSemiFilter;
93+
joinType == core::JoinType::kLeftSemiFilter;
9694
}
9795

9896
bool isFinished() override;

0 commit comments

Comments
 (0)