Skip to content

Commit e3021f1

Browse files
authored
perf: CRP-2900 Use optimized G2 generator point multiplication (#219)
This saves 123M cycles during execution in a canister, as reported by the canbench results. The implementation is just a port of the existing algorithm in https://github.com/dfinity/ic/blob/master/rs/crypto/internal/crypto_lib/bls12_381/type/src/lib.rs used for multiplication by fixed points in the replica.
1 parent 483d1f5 commit e3021f1

File tree

3 files changed

+183
-8
lines changed

3 files changed

+183
-8
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

backend/rs/benchmarks/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,5 @@ candid = "0.10.0"
1313
getrandom = { version = "0.2", features = ["custom"] }
1414
hex = { workspace = true }
1515
ic-cdk = "0.18.5"
16-
ic-vetkeys = "0.3.0"
16+
ic-vetkeys = { path = "../ic_vetkeys" }
17+

backend/rs/ic_vetkeys/src/utils/mod.rs

Lines changed: 180 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,180 @@ lazy_static::lazy_static! {
3232
const G1AFFINE_BYTES: usize = 48; // Size of compressed form
3333
const G2AFFINE_BYTES: usize = 96; // Size of compressed form
3434

35+
struct G2PrecomputedTable {
36+
tbl: Vec<G2Affine>,
37+
}
38+
39+
impl G2PrecomputedTable {
40+
/// The size of the windows
41+
///
42+
/// This algorithm uses just `SUBGROUP_BITS/WINDOW_BITS` additions in
43+
/// the online phase, at the cost of storing a table of size
44+
/// `(SUBGROUP_BITS + WINDOW_BITS - 1)/WINDOW_BITS * (1 << WINDOW_BITS - 1)`
45+
///
46+
/// This constant is configurable and can take values between 1 and 7
47+
/// (inclusive)
48+
///
49+
/// | WINDOW_BITS | TABLE_SIZE | online additions |
50+
/// | ----------- | ---------- | ---------------- |
51+
/// | 1 | 255 | 255 |
52+
/// | 2 | 384 | 128 |
53+
/// | 3 | 595 | 85 |
54+
/// | 4 | 960 | 64 |
55+
/// | 5 | 1581 | 51 |
56+
/// | 6 | 2709 | 43 |
57+
/// | 7 | 4699 | 37 |
58+
///
59+
const WINDOW_BITS: usize = 4;
60+
61+
/// The bit length of the BLS12-381 subgroup
62+
const SUBGROUP_BITS: usize = 255;
63+
64+
// A bitmask of all 1s that is WINDOW_BITS long
65+
const WINDOW_MASK: u8 = (1 << Self::WINDOW_BITS) - 1;
66+
67+
// The total number of windows in a scalar
68+
const WINDOWS: usize = Self::SUBGROUP_BITS.div_ceil(Self::WINDOW_BITS);
69+
70+
// We must select from 2^WINDOW_BITS elements in each table
71+
// group. However one element of the table group is always the
72+
// identity, and so can be omitted, which is the reason for the
73+
// subtraction by 1 here.
74+
const WINDOW_ELEMENTS: usize = (1 << Self::WINDOW_BITS) - 1;
75+
76+
// The total size of the table we will use
77+
const TABLE_SIZE: usize = Self::WINDOW_ELEMENTS * Self::WINDOWS;
78+
79+
/// Precompute a table for fast multiplication
80+
fn new(pt: &G2Affine) -> Self {
81+
let mut ptbl = vec![ic_bls12_381::G2Projective::identity(); Self::TABLE_SIZE];
82+
83+
let mut accum = ic_bls12_381::G2Projective::from(pt);
84+
85+
for i in 0..Self::WINDOWS {
86+
let tbl_i = &mut ptbl[Self::WINDOW_ELEMENTS * i..Self::WINDOW_ELEMENTS * (i + 1)];
87+
88+
tbl_i[0] = accum;
89+
for j in 1..Self::WINDOW_ELEMENTS {
90+
// Our table indexes are off by one due to the omitted
91+
// identity element. So here we are checking if we are
92+
// about to compute a point that is a doubling of a point
93+
// we have previously computed. If so we can compute it
94+
// using a (faster) doubling rather than using addition.
95+
96+
tbl_i[j] = if j % 2 == 1 {
97+
tbl_i[j / 2].double()
98+
} else {
99+
tbl_i[j - 1] + tbl_i[0]
100+
};
101+
}
102+
103+
// move on to the next power
104+
accum = tbl_i[Self::WINDOW_ELEMENTS / 2].double();
105+
}
106+
107+
// batch convert the table to affine form, so we can use mixed addition
108+
// in the online phase.
109+
let mut tbl = vec![ic_bls12_381::G2Affine::identity(); Self::TABLE_SIZE];
110+
ic_bls12_381::G2Projective::batch_normalize(&ptbl, &mut tbl);
111+
112+
Self { tbl }
113+
}
114+
115+
/// Perform variable-time scalar multiplication using the precomputed table plus extra addition
116+
fn mul_vartime(&self, scalar: &Scalar, extra_add: Option<&G2Affine>) -> ic_bls12_381::G2Affine {
117+
let s = {
118+
let mut s = scalar.to_bytes();
119+
s.reverse(); // zkcrypto/bls12_381 uses little-endian
120+
s
121+
};
122+
123+
let mut accum = if let Some(add) = extra_add {
124+
ic_bls12_381::G2Projective::from(add)
125+
} else {
126+
ic_bls12_381::G2Projective::identity()
127+
};
128+
129+
for i in 0..Self::WINDOWS {
130+
let tbl_for_i = &self.tbl[Self::WINDOW_ELEMENTS * i..Self::WINDOW_ELEMENTS * (i + 1)];
131+
132+
let b = Self::get_window(&s, Self::WINDOW_BITS * i);
133+
if b > 0 {
134+
accum += tbl_for_i[b as usize - 1];
135+
}
136+
}
137+
138+
G2Affine::from(accum)
139+
}
140+
141+
/// Perform scalar multiplication using the precomputed table
142+
fn mul(&self, scalar: &Scalar) -> ic_bls12_381::G2Affine {
143+
let s = {
144+
let mut s = scalar.to_bytes();
145+
s.reverse(); // zkcrypto/bls12_381 uses little-endian
146+
s
147+
};
148+
149+
let mut accum = ic_bls12_381::G2Projective::identity();
150+
151+
for i in 0..Self::WINDOWS {
152+
let tbl_for_i = &self.tbl[Self::WINDOW_ELEMENTS * i..Self::WINDOW_ELEMENTS * (i + 1)];
153+
154+
let b = Self::get_window(&s, Self::WINDOW_BITS * i);
155+
accum += Self::ct_select(tbl_for_i, b as usize);
156+
}
157+
158+
G2Affine::from(accum)
159+
}
160+
161+
// Extract a WINDOW_BITS sized window out of s, depending on offset.
162+
#[inline(always)]
163+
fn get_window(s: &[u8], offset: usize) -> u8 {
164+
const BITS_IN_BYTE: usize = 8;
165+
166+
let shift = offset % BITS_IN_BYTE;
167+
let byte_offset = s.len() - 1 - (offset / BITS_IN_BYTE);
168+
169+
let w0 = s[byte_offset];
170+
171+
let single_byte_window = shift <= (BITS_IN_BYTE - Self::WINDOW_BITS) || byte_offset == 0;
172+
173+
let bits = if single_byte_window {
174+
// If we can get the window out of single byte, do so
175+
w0 >> shift
176+
} else {
177+
// Otherwise we must join two bytes and extract the result
178+
let w1 = s[byte_offset - 1];
179+
(w0 >> shift) | (w1 << (BITS_IN_BYTE - shift))
180+
};
181+
182+
bits & Self::WINDOW_MASK
183+
}
184+
185+
// Constant time table lookup
186+
//
187+
// This version is specifically adapted to this algorithm. If
188+
// index is zero, then it returns the identity element. Otherwise
189+
// it returns from[index-1].
190+
#[inline(always)]
191+
fn ct_select(from: &[ic_bls12_381::G2Affine], index: usize) -> ic_bls12_381::G2Affine {
192+
use subtle::{ConditionallySelectable, ConstantTimeEq};
193+
194+
let mut val = ic_bls12_381::G2Affine::identity();
195+
196+
let index = index.wrapping_sub(1);
197+
for (idx, v) in from.iter().enumerate() {
198+
val.conditional_assign(v, usize::ct_eq(&idx, &index));
199+
}
200+
201+
val
202+
}
203+
}
204+
205+
lazy_static::lazy_static! {
206+
static ref G2_MUL_TABLE: G2PrecomputedTable = G2PrecomputedTable::new(&G2Affine::generator());
207+
}
208+
35209
/// Derive a symmetric key using HKDF-SHA256
36210
fn hkdf(okm: &mut [u8], input: &[u8], domain_sep: &str) {
37211
let hk = hkdf::Hkdf::<sha2::Sha256>::new(None, input);
@@ -206,7 +380,7 @@ impl MasterPublicKey {
206380

207381
let offset = hash_to_scalar_two_inputs(&self.serialize(), canister_id, dst);
208382

209-
let derived_key = G2Affine::from(self.point + G2Affine::generator() * offset);
383+
let derived_key = G2_MUL_TABLE.mul_vartime(&offset, Some(&self.point));
210384
DerivedPublicKey { point: derived_key }
211385
}
212386

@@ -290,7 +464,7 @@ impl DerivedPublicKey {
290464

291465
let offset = hash_to_scalar_two_inputs(&self.serialize(), context, dst);
292466

293-
let derived_key = G2Affine::from(self.point + G2Affine::generator() * offset);
467+
let derived_key = G2_MUL_TABLE.mul_vartime(&offset, Some(&self.point));
294468
Self { point: derived_key }
295469
}
296470

@@ -733,7 +907,7 @@ impl IbeCiphertext {
733907

734908
let tsig = ic_bls12_381::pairing(&pt, &dpk.point) * t;
735909

736-
let c1 = G2Affine::from(G2Affine::generator() * t);
910+
let c1 = G2_MUL_TABLE.mul(&t);
737911
let c2 = Self::mask_seed(seed.value(), &tsig);
738912
let c3 = Self::mask_msg(msg, seed.value());
739913

@@ -753,15 +927,15 @@ impl IbeCiphertext {
753927
///
754928
/// Returns the plaintext, or Err if decryption failed
755929
pub fn decrypt(&self, vetkey: &VetKey) -> Result<Vec<u8>, String> {
756-
let t = ic_bls12_381::pairing(vetkey.point(), &self.c1);
930+
let tsig = ic_bls12_381::pairing(vetkey.point(), &self.c1);
757931

758-
let seed = Self::mask_seed(&self.c2, &t);
932+
let seed = Self::mask_seed(&self.c2, &tsig);
759933

760934
let msg = Self::mask_msg(&self.c3, &seed);
761935

762936
let t = Self::hash_to_mask(&self.header, &seed, &msg);
763937

764-
let g_t = G2Affine::from(G2Affine::generator() * t);
938+
let g_t = G2_MUL_TABLE.mul(&t);
765939

766940
if self.c1 == g_t {
767941
Ok(msg)

0 commit comments

Comments
 (0)