Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions src/simd/lines_bwd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,17 +192,16 @@ unsafe fn lines_bwd_lasx(
) -> (*const u8, CoordType) {
unsafe {
use std::arch::loongarch64::*;
use std::mem::transmute as T;

#[inline(always)]
unsafe fn horizontal_sum(sum: v32i8) -> u32 {
unsafe fn horizontal_sum(sum: m256i) -> u32 {
unsafe {
let sum = lasx_xvhaddw_h_b(sum, sum);
let sum = lasx_xvhaddw_w_h(sum, sum);
let sum = lasx_xvhaddw_d_w(sum, sum);
let sum = lasx_xvhaddw_q_d(sum, sum);
let tmp = lasx_xvpermi_q::<1>(T(sum), T(sum));
let sum = lasx_xvadd_w(T(sum), T(tmp));
let tmp = lasx_xvpermi_q::<1>(sum, sum);
let sum = lasx_xvadd_w(sum, tmp);
lasx_xvpickve2gr_wu::<0>(sum)
}
}
Expand Down Expand Up @@ -243,8 +242,8 @@ unsafe fn lines_bwd_lasx(
let v = lasx_xvld::<0>(chunk_start as *const _);
let c = lasx_xvseq_b(v, lf);

let ones = lasx_xvand_v(T(c), T(lasx_xvrepli_b(1)));
let sum = horizontal_sum(T(ones));
let ones = lasx_xvand_v(c, lasx_xvrepli_b(1));
let sum = horizontal_sum(ones);

let line_next = line - sum as CoordType;
if line_next <= line_stop {
Expand All @@ -269,16 +268,15 @@ unsafe fn lines_bwd_lsx(
) -> (*const u8, CoordType) {
unsafe {
use std::arch::loongarch64::*;
use std::mem::transmute as T;

#[inline(always)]
unsafe fn horizontal_sum(sum: v16i8) -> u32 {
unsafe fn horizontal_sum(sum: m128i) -> u32 {
unsafe {
let sum = lsx_vhaddw_h_b(sum, sum);
let sum = lsx_vhaddw_w_h(sum, sum);
let sum = lsx_vhaddw_d_w(sum, sum);
let sum = lsx_vhaddw_q_d(sum, sum);
lsx_vpickve2gr_wu::<0>(T(sum))
lsx_vpickve2gr_wu::<0>(sum)
}
}

Expand Down Expand Up @@ -318,8 +316,8 @@ unsafe fn lines_bwd_lsx(
let v = lsx_vld::<0>(chunk_start as *const _);
let c = lsx_vseq_b(v, lf);

let ones = lsx_vand_v(T(c), T(lsx_vrepli_b(1)));
let sum = horizontal_sum(T(ones));
let ones = lsx_vand_v(c, lsx_vrepli_b(1));
let sum = horizontal_sum(ones);

let line_next = line - sum as CoordType;
if line_next <= line_stop {
Expand Down
20 changes: 9 additions & 11 deletions src/simd/lines_fwd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,17 +199,16 @@ unsafe fn lines_fwd_lasx(
) -> (*const u8, CoordType) {
unsafe {
use std::arch::loongarch64::*;
use std::mem::transmute as T;

#[inline(always)]
unsafe fn horizontal_sum(sum: v32i8) -> u32 {
unsafe fn horizontal_sum(sum: m256i) -> u32 {
unsafe {
let sum = lasx_xvhaddw_h_b(sum, sum);
let sum = lasx_xvhaddw_w_h(sum, sum);
let sum = lasx_xvhaddw_d_w(sum, sum);
let sum = lasx_xvhaddw_q_d(sum, sum);
let tmp = lasx_xvpermi_q::<1>(T(sum), T(sum));
let sum = lasx_xvadd_w(T(sum), T(tmp));
let tmp = lasx_xvpermi_q::<1>(sum, sum);
let sum = lasx_xvadd_w(sum, tmp);
lasx_xvpickve2gr_wu::<0>(sum)
}
}
Expand Down Expand Up @@ -247,8 +246,8 @@ unsafe fn lines_fwd_lasx(
let v = lasx_xvld::<0>(beg as *const _);
let c = lasx_xvseq_b(v, lf);

let ones = lasx_xvand_v(T(c), T(lasx_xvrepli_b(1)));
let sum = horizontal_sum(T(ones));
let ones = lasx_xvand_v(c, lasx_xvrepli_b(1));
let sum = horizontal_sum(ones);

let line_next = line + sum as CoordType;
if line_next >= line_stop {
Expand All @@ -274,16 +273,15 @@ unsafe fn lines_fwd_lsx(
) -> (*const u8, CoordType) {
unsafe {
use std::arch::loongarch64::*;
use std::mem::transmute as T;

#[inline(always)]
unsafe fn horizontal_sum(sum: v16i8) -> u32 {
unsafe fn horizontal_sum(sum: m128i) -> u32 {
unsafe {
let sum = lsx_vhaddw_h_b(sum, sum);
let sum = lsx_vhaddw_w_h(sum, sum);
let sum = lsx_vhaddw_d_w(sum, sum);
let sum = lsx_vhaddw_q_d(sum, sum);
lsx_vpickve2gr_wu::<0>(T(sum))
lsx_vpickve2gr_wu::<0>(sum)
}
}

Expand Down Expand Up @@ -320,8 +318,8 @@ unsafe fn lines_fwd_lsx(
let v = lsx_vld::<0>(beg as *const _);
let c = lsx_vseq_b(v, lf);

let ones = lsx_vand_v(T(c), T(lsx_vrepli_b(1)));
let sum = horizontal_sum(T(ones));
let ones = lsx_vand_v(c, lsx_vrepli_b(1));
let sum = horizontal_sum(ones);

let line_next = line + sum as CoordType;
if line_next >= line_stop {
Expand Down
16 changes: 7 additions & 9 deletions src/simd/memchr2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,6 @@ unsafe fn memchr2_dispatch(needle1: u8, needle2: u8, beg: *const u8, end: *const
unsafe fn memchr2_lasx(needle1: u8, needle2: u8, mut beg: *const u8, end: *const u8) -> *const u8 {
unsafe {
use std::arch::loongarch64::*;
use std::mem::transmute as T;

let n1 = lasx_xvreplgr2vr_b(needle1 as i32);
let n2 = lasx_xvreplgr2vr_b(needle2 as i32);
Expand All @@ -136,10 +135,10 @@ unsafe fn memchr2_lasx(needle1: u8, needle2: u8, mut beg: *const u8, end: *const
let v = lasx_xvld::<0>(beg as *const _);
let a = lasx_xvseq_b(v, n1);
let b = lasx_xvseq_b(v, n2);
let c = lasx_xvor_v(T(a), T(b));
let m = lasx_xvmskltz_b(T(c));
let l = lasx_xvpickve2gr_wu::<0>(T(m));
let h = lasx_xvpickve2gr_wu::<4>(T(m));
let c = lasx_xvor_v(a, b);
let m = lasx_xvmskltz_b(c);
let l = lasx_xvpickve2gr_wu::<0>(m);
let h = lasx_xvpickve2gr_wu::<4>(m);
let m = (h << 16) | l;

if m != 0 {
Expand All @@ -158,7 +157,6 @@ unsafe fn memchr2_lasx(needle1: u8, needle2: u8, mut beg: *const u8, end: *const
unsafe fn memchr2_lsx(needle1: u8, needle2: u8, mut beg: *const u8, end: *const u8) -> *const u8 {
unsafe {
use std::arch::loongarch64::*;
use std::mem::transmute as T;

let n1 = lsx_vreplgr2vr_b(needle1 as i32);
let n2 = lsx_vreplgr2vr_b(needle2 as i32);
Expand All @@ -172,9 +170,9 @@ unsafe fn memchr2_lsx(needle1: u8, needle2: u8, mut beg: *const u8, end: *const
let v = lsx_vld::<0>(beg as *const _);
let a = lsx_vseq_b(v, n1);
let b = lsx_vseq_b(v, n2);
let c = lsx_vor_v(T(a), T(b));
let m = lsx_vmskltz_b(T(c));
let m = lsx_vpickve2gr_wu::<0>(T(m));
let c = lsx_vor_v(a, b);
let m = lsx_vmskltz_b(c);
let m = lsx_vpickve2gr_wu::<0>(m);

if m != 0 {
return beg.add(m.trailing_zeros() as usize);
Expand Down
8 changes: 3 additions & 5 deletions src/simd/memset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,8 @@ fn memset_dispatch(beg: *mut u8, end: *mut u8, val: u64) {
fn memset_lasx(mut beg: *mut u8, end: *mut u8, val: u64) {
unsafe {
use std::arch::loongarch64::*;
use std::mem::transmute as T;

let fill: v32i8 = T(lasx_xvreplgr2vr_d(val as i64));
let fill = lasx_xvreplgr2vr_d(val as i64);

if end.offset_from_unsigned(beg) >= 32 {
lasx_xvst::<0>(fill, beg as *mut _);
Expand All @@ -280,7 +279,7 @@ fn memset_lasx(mut beg: *mut u8, end: *mut u8, val: u64) {
}

if end.offset_from_unsigned(beg) >= 16 {
let fill: v16i8 = T(lsx_vreplgr2vr_d(val as i64));
let fill = lsx_vreplgr2vr_d(val as i64);

loop {
lsx_vst::<0>(fill, beg as *mut _);
Expand Down Expand Up @@ -316,10 +315,9 @@ fn memset_lasx(mut beg: *mut u8, end: *mut u8, val: u64) {
unsafe fn memset_lsx(mut beg: *mut u8, end: *mut u8, val: u64) {
unsafe {
use std::arch::loongarch64::*;
use std::mem::transmute as T;

if end.offset_from_unsigned(beg) >= 16 {
let fill: v16i8 = T(lsx_vreplgr2vr_d(val as i64));
let fill = lsx_vreplgr2vr_d(val as i64);

lsx_vst::<0>(fill, beg as *mut _);
let off = beg.align_offset(16);
Expand Down
Loading