Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 16 additions & 12 deletions src/CodeGen_LLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2090,12 +2090,7 @@ void CodeGen_LLVM::visit(const Load *op) {

Value *flipped = codegen(flipped_load);

vector<int> indices(ramp->lanes);
for (int i = 0; i < ramp->lanes; i++) {
indices[i] = ramp->lanes - 1 - i;
}

value = shuffle_vectors(flipped, indices);
value = reverse_vector(flipped);
} else if (ramp) {
// Gather without generating the indices as a vector
Value *ptr = codegen_buffer_pointer(op->name, op->type.element_of(), ramp->base);
Expand Down Expand Up @@ -2476,14 +2471,10 @@ void CodeGen_LLVM::codegen_predicated_load(const Load *op) {
op->alignment, vpred, true, llvm_stride);
} else if (ramp && stride && stride->value == -1) {
debug(4) << "Predicated dense vector load with stride -1\n\t" << Expr(op) << "\n";
vector<int> indices(ramp->lanes);
for (int i = 0; i < ramp->lanes; i++) {
indices[i] = ramp->lanes - 1 - i;
}

// Flip the predicate
Value *vpred = codegen(op->predicate);
vpred = shuffle_vectors(vpred, indices);
vpred = reverse_vector(vpred);

// Load the vector and then flip it in-place
Expr flipped_base = ramp->base - ramp->lanes + 1;
Expand All @@ -2496,7 +2487,7 @@ void CodeGen_LLVM::codegen_predicated_load(const Load *op) {
op->param, const_true(op->type.lanes()), align);

Value *flipped = codegen_dense_vector_load(flipped_load.as<Load>(), vpred);
value = shuffle_vectors(flipped, indices);
value = reverse_vector(flipped);
} else { // It's not dense vector load, we need to scalarize it
Expr load_expr = Load::make(op->type, op->name, op->index, op->image,
op->param, const_true(op->type.lanes()), op->alignment);
Expand Down Expand Up @@ -4973,6 +4964,19 @@ Value *CodeGen_LLVM::concat_vectors(const vector<Value *> &v) {
return vecs[0];
}

Value *CodeGen_LLVM::reverse_vector(llvm::Value *vec) {
if (effective_vscale > 0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this will affect RISC V as well and thus needs to be validated there. Is there test coverage for this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

correctness_predicated_store_load
correctness_vector_math
cover this route at least on aarch64 host.
Unfortunately, I don't have a setup to test RISCV host. It would be good if it is tested on CI.

return builder->CreateVectorReverse(vec);
} else {
const int lanes = get_vector_num_elements(vec->getType());
vector<int> indices(lanes);
for (int i = 0; i < lanes; i++) {
indices[i] = lanes - 1 - i;
}
return shuffle_vectors(vec, indices);
}
}

Value *CodeGen_LLVM::shuffle_vectors(Value *a, Value *b,
const std::vector<int> &indices) {
if (isa<llvm::ScalableVectorType>(a->getType())) {
Expand Down
3 changes: 3 additions & 0 deletions src/CodeGen_LLVM.h
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,9 @@ class CodeGen_LLVM : public IRVisitor {
/** Concatenate a bunch of llvm vectors. Must be of the same type. */
virtual llvm::Value *concat_vectors(const std::vector<llvm::Value *> &);

/** Reverse elements in a vector */
llvm::Value *reverse_vector(llvm::Value *vec);

/** Create an LLVM shuffle vectors instruction. Takes a combination of
* fixed or scalable vectors as input, so long as the effective lengths match,
* but always returns a fixed vector. */
Expand Down
Loading