-
Notifications
You must be signed in to change notification settings - Fork 72
Open
Labels
A-x86_64x86_64 architecturex86_64 architectureBlocked-LLVMBugs blocked on bugfixes in LLVMBugs blocked on bugfixes in LLVMPerformanceSomething isn't fastSomething isn't fast
Description
Just me or is this bad codegen?
extern crate packed_simd;
use std::arch::x86_64::*;
use packed_simd::*;
pub fn le_i8x16(x: i8x16, y: i8x16) -> bool {
x.le(y).all()
}
.section __TEXT,__text,regular,pure_instructions
.globl __ZN9temp_test8le_i8x1617h13e72ae756b29df4E
.p2align 4, 0x90
__ZN9temp_test8le_i8x1617h13e72ae756b29df4E:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
vmovdqa (%rdi), %xmm0
vpcmpgtb (%rsi), %xmm0, %xmm0
vpcmpeqd %xmm1, %xmm1, %xmm1
vpxor %xmm1, %xmm0, %xmm0
vptest %xmm1, %xmm0
setb %al
popq %rbp
retq
.cfi_endproc
vs
pub unsafe fn int_u8x16(x: __m128i, y: __m128i) -> bool {
let mask = _mm_cmpgt_epi8(x, y);
1 == _mm_test_all_zeros(mask, mask)
}
.globl __ZN9temp_test9int_u8x1617h00120f998e226c4eE
.p2align 4, 0x90
__ZN9temp_test9int_u8x1617h00120f998e226c4eE:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
vmovdqa (%rdi), %xmm0
vpcmpgtb (%rsi), %xmm0, %xmm0
vptest %xmm0, %xmm0
sete %al
popq %rbp
retq
.cfi_endproc
Metadata
Metadata
Assignees
Labels
A-x86_64x86_64 architecturex86_64 architectureBlocked-LLVMBugs blocked on bugfixes in LLVMBugs blocked on bugfixes in LLVMPerformanceSomething isn't fastSomething isn't fast
Type
Projects
Milestone
Relationships
Development
Select code repository
Activity
TheIronBorn commentedon Aug 6, 2018
with
target-cpu=sandybridge
TheIronBorn commentedon Aug 6, 2018
x.gt(y).none()
appears to match the intrinsicsgnzlbg commentedon Aug 7, 2018
I think we'd need to fill this an an LLVM bug. LLVM has many of these.
gnzlbg commentedon Aug 10, 2018
This is now: https://bugs.llvm.org/show_bug.cgi?id=38522
gnzlbg commentedon Sep 9, 2018
@TheIronBorn we should re-check this once #156 is merged. Also I've opened #157 for exploring other things that we could do here codegen wise.