Skip to content

Bad compare codegen #67

@TheIronBorn

Description

@TheIronBorn
Contributor

Just me or is this bad codegen?

extern crate packed_simd;
use std::arch::x86_64::*;
use packed_simd::*;

pub fn le_i8x16(x: i8x16, y: i8x16) -> bool {
    x.le(y).all()
}
	.section	__TEXT,__text,regular,pure_instructions
	.globl	__ZN9temp_test8le_i8x1617h13e72ae756b29df4E
	.p2align	4, 0x90
__ZN9temp_test8le_i8x1617h13e72ae756b29df4E:
	.cfi_startproc
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
	.cfi_def_cfa_register %rbp
	vmovdqa	(%rdi), %xmm0
	vpcmpgtb	(%rsi), %xmm0, %xmm0
	vpcmpeqd	%xmm1, %xmm1, %xmm1
	vpxor	%xmm1, %xmm0, %xmm0
	vptest	%xmm1, %xmm0
	setb	%al
	popq	%rbp
	retq
	.cfi_endproc

vs

pub unsafe fn int_u8x16(x: __m128i, y: __m128i) -> bool {
    let mask = _mm_cmpgt_epi8(x, y);
    1 == _mm_test_all_zeros(mask, mask)
}
	.globl	__ZN9temp_test9int_u8x1617h00120f998e226c4eE
	.p2align	4, 0x90
__ZN9temp_test9int_u8x1617h00120f998e226c4eE:
	.cfi_startproc
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset %rbp, -16
	movq	%rsp, %rbp
	.cfi_def_cfa_register %rbp
	vmovdqa	(%rdi), %xmm0
	vpcmpgtb	(%rsi), %xmm0, %xmm0
	vptest	%xmm0, %xmm0
	sete	%al
	popq	%rbp
	retq
	.cfi_endproc

Activity

TheIronBorn

TheIronBorn commented on Aug 6, 2018

@TheIronBorn
ContributorAuthor

with target-cpu=sandybridge

TheIronBorn

TheIronBorn commented on Aug 6, 2018

@TheIronBorn
ContributorAuthor

x.gt(y).none() appears to match the intrinsics

gnzlbg

gnzlbg commented on Aug 7, 2018

@gnzlbg
Contributor

this bad codegen

I think we'd need to fill this an an LLVM bug. LLVM has many of these.

gnzlbg

gnzlbg commented on Aug 10, 2018

@gnzlbg
Contributor
gnzlbg

gnzlbg commented on Sep 9, 2018

@gnzlbg
Contributor

@TheIronBorn we should re-check this once #156 is merged. Also I've opened #157 for exploring other things that we could do here codegen wise.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-x86_64x86_64 architectureBlocked-LLVMBugs blocked on bugfixes in LLVMPerformanceSomething isn't fast

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

      Development

      No branches or pull requests

        Participants

        @gnzlbg@TheIronBorn

        Issue actions

          Bad compare codegen · Issue #67 · rust-lang/packed_simd