Skip to content

Commit 2bcfb5a

Browse files
refactor: Kupyna - replace byte-level operations with word-level operations (#693)
1 parent f76b696 commit 2bcfb5a

File tree

6 files changed

+163
-235
lines changed

6 files changed

+163
-235
lines changed

.github/workflows/kupyna.yml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,31 @@ jobs:
6868
uses: RustCrypto/actions/.github/workflows/minimal-versions.yml@master
6969
with:
7070
working-directory: ${{ github.workflow }}
71+
72+
# Cross-compiled tests
73+
cross:
74+
needs: set-msrv
75+
strategy:
76+
matrix:
77+
rust:
78+
- ${{needs.set-msrv.outputs.msrv}}
79+
- stable
80+
target:
81+
- aarch64-unknown-linux-gnu
82+
- powerpc-unknown-linux-gnu
83+
features:
84+
- default
85+
86+
runs-on: ubuntu-latest
87+
defaults:
88+
run:
89+
# Cross mounts only current package, i.e. by default it ignores workspace's Cargo.toml
90+
working-directory: .
91+
steps:
92+
- uses: actions/checkout@v4
93+
- uses: ./.github/actions/cross-tests
94+
with:
95+
rust: ${{ matrix.rust }}
96+
package: ${{ github.workflow }}
97+
target: ${{ matrix.target }}
98+
features: ${{ matrix.features }}

kupyna/src/block_api.rs

Lines changed: 9 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::{
22
long, short,
3-
utils::{read_u64_le, write_u64_le, xor_bytes},
3+
utils::{read_u64_le, write_u64_be, write_u64_le, xor},
44
};
55
use core::fmt;
66
use digest::{
@@ -72,25 +72,12 @@ impl VariableOutputCore for KupynaShortVarCore {
7272
short::compress(&mut self.state, block.as_ref());
7373
});
7474

75-
let mut state_u8 = [0u8; 64];
76-
for (src, dst) in self.state.iter().zip(state_u8.chunks_exact_mut(8)) {
77-
dst.copy_from_slice(&src.to_be_bytes());
78-
}
79-
80-
// Call t_xor_l with u8 array
81-
let t_xor_ult_processed_block = short::t_xor_l(state_u8);
75+
// Process final state with t_xor_l
76+
let t_xor_ult_processed_block = short::t_xor_l(self.state);
8277

83-
let result_u8 = xor_bytes(state_u8, t_xor_ult_processed_block);
78+
let result_state = xor(self.state, t_xor_ult_processed_block);
8479

85-
// Convert result back to u64s
86-
let mut res = [0u64; 8];
87-
for (dst, src) in res.iter_mut().zip(result_u8.chunks_exact(8)) {
88-
*dst = u64::from_be_bytes(src.try_into().unwrap());
89-
}
90-
let n = short::COLS / 2;
91-
for (chunk, v) in out.chunks_exact_mut(8).zip(res[n..].iter()) {
92-
chunk.copy_from_slice(&v.to_be_bytes());
93-
}
80+
write_u64_be(&result_state[short::COLS / 2..], out);
9481
}
9582
}
9683

@@ -204,25 +191,12 @@ impl VariableOutputCore for KupynaLongVarCore {
204191
long::compress(&mut self.state, block.as_ref());
205192
});
206193

207-
let mut state_u8 = [0u8; 128];
208-
for (src, dst) in self.state.iter().zip(state_u8.chunks_exact_mut(8)) {
209-
dst.copy_from_slice(&src.to_be_bytes());
210-
}
211-
212-
// Call t_xor_l with u8 array
213-
let t_xor_ult_processed_block = long::t_xor_l(state_u8);
194+
// Process final state with t_xor_l
195+
let t_xor_ult_processed_block = long::t_xor_l(self.state);
214196

215-
let result_u8 = xor_bytes(state_u8, t_xor_ult_processed_block);
197+
let result_state = xor(self.state, t_xor_ult_processed_block);
216198

217-
// Convert result back to u64s
218-
let mut res = [0u64; 16];
219-
for (dst, src) in res.iter_mut().zip(result_u8.chunks_exact(8)) {
220-
*dst = u64::from_be_bytes(src.try_into().unwrap());
221-
}
222-
let n = long::COLS / 2;
223-
for (chunk, v) in out.chunks_exact_mut(8).zip(res[n..].iter()) {
224-
chunk.copy_from_slice(&v.to_be_bytes());
225-
}
199+
write_u64_be(&result_state[long::COLS / 2..], out);
226200
}
227201
}
228202

kupyna/src/consts.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
pub const MDS_MATRIX: [[u8; 8]; 8] = [
2-
[0x01, 0x01, 0x05, 0x01, 0x08, 0x06, 0x07, 0x04],
3-
[0x04, 0x01, 0x01, 0x05, 0x01, 0x08, 0x06, 0x07],
4-
[0x07, 0x04, 0x01, 0x01, 0x05, 0x01, 0x08, 0x06],
5-
[0x06, 0x07, 0x04, 0x01, 0x01, 0x05, 0x01, 0x08],
6-
[0x08, 0x06, 0x07, 0x04, 0x01, 0x01, 0x05, 0x01],
7-
[0x01, 0x08, 0x06, 0x07, 0x04, 0x01, 0x01, 0x05],
8-
[0x05, 0x01, 0x08, 0x06, 0x07, 0x04, 0x01, 0x01],
9-
[0x01, 0x05, 0x01, 0x08, 0x06, 0x07, 0x04, 0x01],
1+
pub const MDS_MATRIX: [u64; 8] = [
2+
0x0101050108060704,
3+
0x0401010501080607,
4+
0x0704010105010806,
5+
0x0607040101050108,
6+
0x0806070401010501,
7+
0x0108060704010105,
8+
0x0501080607040101,
9+
0x0105010806070401,
1010
];
1111

1212
pub const SBOXES: [[u8; 256]; 4] = [

kupyna/src/long.rs

Lines changed: 34 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,95 +1,53 @@
1-
use crate::utils::{add_constant_plus, add_constant_xor, apply_s_box, mix_columns, xor_bytes};
1+
use crate::utils::{
2+
add_constant_plus, add_constant_xor, apply_s_box, mix_columns, read_u64s_be, xor,
3+
};
4+
use core::array;
25

36
pub(crate) const COLS: usize = 16;
47
const ROUNDS: u64 = 14;
58

6-
type Matrix = [[u8; 8]; 16];
7-
89
pub(crate) fn compress(prev_vector: &mut [u64; COLS], message_block: &[u8; 128]) {
9-
let mut prev_vector_u8 = [0u8; 128];
10-
for (src, dst) in prev_vector.iter().zip(prev_vector_u8.chunks_exact_mut(8)) {
11-
dst.copy_from_slice(&src.to_be_bytes());
12-
}
13-
14-
let m_xor_p = xor_bytes(*message_block, prev_vector_u8);
15-
10+
// Convert message block from u8 to u64 (column-major order as per paper)
11+
let message_u64 = read_u64s_be::<128, COLS>(message_block);
12+
let m_xor_p = xor(*prev_vector, message_u64);
1613
let t_xor_mp = t_xor_l(m_xor_p);
17-
18-
let t_plus_m = t_plus_l(*message_block);
19-
20-
prev_vector_u8 = xor_bytes(xor_bytes(t_xor_mp, t_plus_m), prev_vector_u8);
21-
22-
for (dst, src) in prev_vector.iter_mut().zip(prev_vector_u8.chunks_exact(8)) {
23-
*dst = u64::from_be_bytes(src.try_into().unwrap());
24-
}
14+
let t_plus_m = t_plus_l(message_u64);
15+
*prev_vector = xor(xor(t_xor_mp, t_plus_m), *prev_vector);
2516
}
2617

27-
pub(crate) fn t_plus_l(block: [u8; 128]) -> [u8; 128] {
28-
let mut state = block_to_matrix(block);
18+
fn t_plus_l(state: [u64; COLS]) -> [u64; COLS] {
19+
let mut state = state;
2920
for nu in 0..ROUNDS {
30-
state = add_constant_plus(state, nu as usize);
31-
state = apply_s_box(state);
21+
add_constant_plus(&mut state, nu as usize);
22+
apply_s_box(&mut state);
3223
state = rotate_rows(state);
33-
state = mix_columns(state);
24+
mix_columns(&mut state);
3425
}
35-
matrix_to_block(state)
36-
}
37-
38-
fn block_to_matrix(block: [u8; 128]) -> Matrix {
39-
const ROWS: usize = 16;
40-
const COLS: usize = 8;
41-
42-
let mut matrix = [[0u8; COLS]; ROWS];
43-
for i in 0..ROWS {
44-
for j in 0..COLS {
45-
matrix[i][j] = block[i * COLS + j];
46-
}
47-
}
48-
matrix
49-
}
50-
51-
fn matrix_to_block(matrix: Matrix) -> [u8; 128] {
52-
const ROWS: usize = 16;
53-
const COLS: usize = 8;
54-
55-
let mut block = [0u8; ROWS * COLS];
56-
for i in 0..ROWS {
57-
for j in 0..COLS {
58-
block[i * COLS + j] = matrix[i][j];
59-
}
60-
}
61-
block
26+
state
6227
}
6328

64-
fn rotate_rows(mut state: Matrix) -> Matrix {
65-
const ROWS: usize = 16;
66-
let cols = 8;
67-
68-
let mut temp = [0u8; ROWS];
69-
let mut shift: i32 = -1;
70-
for i in 0..cols {
71-
if i == cols - 1 {
72-
shift = 11;
73-
} else {
74-
shift += 1;
75-
}
76-
for col in 0..ROWS {
77-
temp[(col + shift as usize) % ROWS] = state[col][i];
78-
}
79-
for col in 0..ROWS {
80-
state[col][i] = temp[col];
81-
}
82-
}
83-
state
29+
fn rotate_rows(state: [u64; COLS]) -> [u64; COLS] {
30+
//shift amounts for each row (0-6: row index, 7: special case = 11)
31+
const SHIFTS: [usize; 8] = [0, 1, 2, 3, 4, 5, 6, 11];
32+
33+
array::from_fn(|col| {
34+
let rotated_bytes = array::from_fn(|row| {
35+
let shift = SHIFTS[row];
36+
let src_col = (col + COLS - shift) % COLS;
37+
let src_bytes = state[src_col].to_be_bytes();
38+
src_bytes[row]
39+
});
40+
u64::from_be_bytes(rotated_bytes)
41+
})
8442
}
8543

86-
pub(crate) fn t_xor_l(block: [u8; 128]) -> [u8; 128] {
87-
let mut state = block_to_matrix(block);
44+
pub(crate) fn t_xor_l(state: [u64; COLS]) -> [u64; COLS] {
45+
let mut state = state;
8846
for nu in 0..ROUNDS {
89-
state = add_constant_xor(state, nu as usize);
90-
state = apply_s_box(state);
47+
add_constant_xor(&mut state, nu as usize);
48+
apply_s_box(&mut state);
9149
state = rotate_rows(state);
92-
state = mix_columns(state);
50+
mix_columns(&mut state);
9351
}
94-
matrix_to_block(state)
52+
state
9553
}

kupyna/src/short.rs

Lines changed: 34 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,95 +1,53 @@
1-
use crate::utils::{add_constant_plus, add_constant_xor, apply_s_box, mix_columns, xor_bytes};
1+
use crate::utils::{
2+
add_constant_plus, add_constant_xor, apply_s_box, mix_columns, read_u64s_be, xor,
3+
};
4+
use core::array;
25

36
pub(crate) const COLS: usize = 8;
47
const ROUNDS: u64 = 10;
58

6-
type Matrix = [[u8; 8]; 8];
7-
89
pub(crate) fn compress(prev_vector: &mut [u64; COLS], message_block: &[u8; 64]) {
9-
let mut prev_vector_u8 = [0u8; 64];
10-
for (src, dst) in prev_vector.iter().zip(prev_vector_u8.chunks_exact_mut(8)) {
11-
dst.copy_from_slice(&src.to_be_bytes());
12-
}
13-
14-
let m_xor_p = xor_bytes(*message_block, prev_vector_u8);
15-
10+
// Convert message block from u8 to u64 (column-major order as per paper)
11+
let message_u64 = read_u64s_be::<64, COLS>(message_block);
12+
let m_xor_p = xor(*prev_vector, message_u64);
1613
let t_xor_mp = t_xor_l(m_xor_p);
17-
18-
let t_plus_m = t_plus_l(*message_block);
19-
20-
prev_vector_u8 = xor_bytes(xor_bytes(t_xor_mp, t_plus_m), prev_vector_u8);
21-
22-
for (dst, src) in prev_vector.iter_mut().zip(prev_vector_u8.chunks_exact(8)) {
23-
*dst = u64::from_be_bytes(src.try_into().unwrap());
24-
}
14+
let t_plus_m = t_plus_l(message_u64);
15+
*prev_vector = xor(xor(t_xor_mp, t_plus_m), *prev_vector);
2516
}
2617

27-
fn t_plus_l(block: [u8; 64]) -> [u8; 64] {
28-
let mut state = block_to_matrix(block);
18+
fn t_plus_l(state: [u64; COLS]) -> [u64; COLS] {
19+
let mut state = state;
2920
for nu in 0..ROUNDS {
30-
state = add_constant_plus(state, nu as usize);
31-
state = apply_s_box(state);
21+
add_constant_plus(&mut state, nu as usize);
22+
apply_s_box(&mut state);
3223
state = rotate_rows(state);
33-
state = mix_columns(state);
24+
mix_columns(&mut state);
3425
}
35-
matrix_to_block(state)
36-
}
37-
38-
fn block_to_matrix(block: [u8; 64]) -> Matrix {
39-
const ROWS: usize = 8;
40-
const COLS: usize = 8;
41-
42-
let mut matrix = [[0u8; COLS]; ROWS];
43-
for i in 0..ROWS {
44-
for j in 0..COLS {
45-
matrix[i][j] = block[i * COLS + j];
46-
}
47-
}
48-
matrix
49-
}
50-
51-
fn matrix_to_block(matrix: Matrix) -> [u8; 64] {
52-
const ROWS: usize = 8;
53-
const COLS: usize = 8;
54-
55-
let mut block = [0u8; ROWS * COLS];
56-
for i in 0..ROWS {
57-
for j in 0..COLS {
58-
block[i * COLS + j] = matrix[i][j];
59-
}
60-
}
61-
block
26+
state
6227
}
6328

64-
fn rotate_rows(mut state: Matrix) -> Matrix {
65-
const ROWS: usize = 8;
66-
let cols = 8;
67-
68-
let mut temp = [0u8; ROWS];
69-
let mut shift: i32 = -1;
70-
for i in 0..cols {
71-
if i == cols - 1 {
72-
shift = 7;
73-
} else {
74-
shift += 1;
75-
}
76-
for col in 0..ROWS {
77-
temp[(col + shift as usize) % ROWS] = state[col][i];
78-
}
79-
for col in 0..ROWS {
80-
state[col][i] = temp[col];
81-
}
82-
}
83-
state
29+
fn rotate_rows(state: [u64; COLS]) -> [u64; COLS] {
30+
//shift amounts for each row (0-6: row index, 7: special case)
31+
const SHIFTS: [usize; 8] = [0, 1, 2, 3, 4, 5, 6, 7];
32+
33+
array::from_fn(|col| {
34+
let rotated_bytes = array::from_fn(|row| {
35+
let shift = SHIFTS[row];
36+
let src_col = (col + COLS - shift) % COLS;
37+
let src_bytes = state[src_col].to_be_bytes();
38+
src_bytes[row]
39+
});
40+
u64::from_be_bytes(rotated_bytes)
41+
})
8442
}
8543

86-
pub(crate) fn t_xor_l(block: [u8; 64]) -> [u8; 64] {
87-
let mut state = block_to_matrix(block);
44+
pub(crate) fn t_xor_l(state: [u64; COLS]) -> [u64; COLS] {
45+
let mut state = state;
8846
for nu in 0..ROUNDS {
89-
state = add_constant_xor(state, nu as usize);
90-
state = apply_s_box(state);
47+
add_constant_xor(&mut state, nu as usize);
48+
apply_s_box(&mut state);
9149
state = rotate_rows(state);
92-
state = mix_columns(state);
50+
mix_columns(&mut state);
9351
}
94-
matrix_to_block(state)
52+
state
9553
}

0 commit comments

Comments
 (0)