Skip to content

Commit 4840c90

Browse files
committed
Added N-R method for testing
1 parent 4c356b8 commit 4840c90

File tree

1 file changed

+54
-9
lines changed

1 file changed

+54
-9
lines changed

s_mp_faster_to_radix.c

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w
9797
mp_err err;
9898
int32_t n = 0, k, t = 0, steps = 0;
9999
int ilog2a;
100+
#ifdef MP_TO_RADIX_USE_NEWTON_RAPHSON
101+
int s, g = 3;
102+
mp_int M2, M4;
103+
bool use_newton_raphson = true;
104+
#endif
100105

101106
/* Use given buffer directly, no temporary buffers for the individual chunks */
102107
char **sptr = &str;
@@ -109,6 +114,13 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w
109114
/* List of moduli */
110115
mp_int *P = NULL;
111116

117+
#ifdef MP_TO_RADIX_USE_NEWTON_RAPHSON
118+
/* Be nice and utter a warning. For now. */
119+
if (radix != 10) {
120+
fprintf(stderr,"The Newton-Raphson method is for base 10 only!\n");
121+
}
122+
#endif
123+
112124
/* Denominator for the reciprocal: b^y */
113125
n = s_pow((int32_t)radix, (int32_t)s_mp_radix_exponent_y[radix]);
114126

@@ -163,6 +175,10 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w
163175
if ((err = mp_div(&R[0], &P[0], &R[0], NULL)) != MP_OKAY) goto LTM_ERR;
164176
if ((err = mp_incr(&R[0])) != MP_OKAY) goto LTM_ERR;
165177

178+
#ifdef MP_TO_RADIX_USE_NEWTON_RAPHSON
179+
if ((err = mp_init_multi(&M2, &M4, NULL)) != MP_OKAY) goto LTM_ERR;
180+
#endif
181+
166182
/* Compute the rest of the reciprocals if as needed */
167183
for (t = 1; t < steps; t++) {
168184
/* P_t = (b^y)^(2^t) = n^(2^t) */
@@ -208,15 +224,41 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w
208224
/* Compute numerator */
209225
if ((err = mp_init(&R[t])) != MP_OKAY) goto LTM_ERR;
210226

211-
/* R[t] = R[t] << (2^t * k) The factor cannot overflow, we checked that above */
212-
/* TODO: these are more castings than in the ER in Mayrhofen at New Year's Eve! */
213-
if ((err = mp_2expt(&(R[t]), (int)((int32_t)((uint32_t)1 << (t+1)) * k))) != MP_OKAY) goto LTM_ERR;
214-
215-
/* Compute reciprocal */
216-
/* R[t] = floor(2^(2^t * k) / P[t] */
217-
if ((err = mp_div(&R[t], &P[t], &R[t], NULL)) != MP_OKAY) goto LTM_ERR;
218-
/* Ceiling if P[t] is not a power of two but it is not a problem if P[t] is a power of two. */
219-
if ((err = mp_incr(&R[t])) != MP_OKAY) goto LTM_ERR;
227+
#ifdef MP_TO_RADIX_USE_NEWTON_RAPHSON
228+
if (use_newton_raphson && (radix == 10)) {
229+
/* Use a round of Newton-Raphson to compute the next reciprocal */
230+
/* s = 2^t*k */
231+
s = (int)((int32_t)((uint32_t)1 << (t)) * k);
232+
/* M = R[t-1] * 2^g */
233+
if ((err = mp_mul_2d(&R[t-1], (mp_digit)3, &M2)) != MP_OKAY) goto LTM_ERR;
234+
if ((err = mp_sub_d(&M2, (mp_digit)2, &M2)) != MP_OKAY) goto LTM_ERR;
235+
/* Do the M-R round: M = floor( ( 2*M^2 - floor((n^2*M^4) / (2^(2*(s+g)))) ) / 2^(2*g)) + 1 */
236+
/* M2 = (M * 2^g)^2*/
237+
if ((err = mp_sqr(&M2, &M2)) != MP_OKAY) goto LTM_ERR;
238+
/* M4 = M2^2 */
239+
if ((err = mp_sqr(&M2, &M4)) != MP_OKAY) goto LTM_ERR;
240+
/* Compute numerator: n^2*M^4 = (P[t]) * M4*/
241+
if ((err = mp_mul(&P[t], &M4, &M4)) != MP_OKAY) goto LTM_ERR;
242+
/* Compute fraction by shifting right: M4>>2*(s+g) where 2*(s+g) < MAX_INT */
243+
if ((err = mp_div_2d(&M4, 2*(s+g), &M4, NULL)) != MP_OKAY) goto LTM_ERR;
244+
if ((err = mp_mul_2(&M2,&M2)) != MP_OKAY) goto LTM_ERR;
245+
if ((err = mp_sub(&M2,&M4,&M4)) != MP_OKAY) goto LTM_ERR;
246+
/* R[t] = M / 2^(2*g) remove extra bits before storage */
247+
if ((err = mp_div_2d(&M4, 2*g, &(R[t]), &M4)) != MP_OKAY) goto LTM_ERR;
248+
if ((err = mp_incr(&R[t])) != MP_OKAY) goto LTM_ERR;
249+
} else {
250+
#endif
251+
/* R[t] = R[t] << (2^t * k) The factor cannot overflow, we checked that above */
252+
/* TODO: these are more castings than in the ER in Mayrhofen at New Year's Eve! */
253+
if ((err = mp_2expt(&(R[t]), (int)((int32_t)((uint32_t)1 << (t+1)) * k))) != MP_OKAY) goto LTM_ERR;
254+
/* Compute reciprocal */
255+
/* R[t] = floor(2^(2^t * k) / P[t] */
256+
if ((err = mp_div(&R[t], &P[t], &R[t], NULL)) != MP_OKAY) goto LTM_ERR;
257+
/* Ceiling if P[t] is not a power of two but it is not a problem if P[t] is a power of two. */
258+
if ((err = mp_incr(&R[t])) != MP_OKAY) goto LTM_ERR;
259+
#ifdef MP_TO_RADIX_USE_NEWTON_RAPHSON
260+
}
261+
#endif
220262
}
221263

222264
/* And finally: start the recursion. */
@@ -233,6 +275,9 @@ mp_err s_mp_faster_to_radix(const mp_int *a, char *str, size_t maxlen, size_t *w
233275
} while (t--);
234276
MP_FREE_BUF(P, (size_t) steps * sizeof(mp_int));
235277
MP_FREE_BUF(R, (size_t) steps * sizeof(mp_int));
278+
#ifdef MP_TO_RADIX_USE_NEWTON_RAPHSON
279+
mp_clear_multi(&M2, &M4, NULL);
280+
#endif
236281
return err;
237282
}
238283

0 commit comments

Comments
 (0)