Skip to content

Commit 1de2a01

Browse files
committed
Native jacobi symbol algorithm
This introduces variants of the divsteps-based GCD algorithm used for modular inverses to compute Jacobi symbols. Changes compared to the normal vartime divsteps: * Only positive matrices are used, guaranteeing that f and g remain positive. * An additional jac variable is updated to track sign changes during matrix computation. * There is (so far) no proof that this algorithm terminates within reasonable amount of time for every input, but experimentally it appears to almost always need less than 900 iterations. To account for that, only a bounded number of iterations is performed (1500), after which failure is returned. In VERIFY mode a lower iteration count is used to make sure that callers exercise their fallback. * The algorithm converges to f=g=gcd(f0,g0) rather than g=0. To keep this test simple, the end condition is f=1, which won't be reached if started with non-coprime or g=0 inputs. Because of that we only support coprime non-zero inputs.
1 parent 04c6c1b commit 1de2a01

File tree

5 files changed

+381
-19
lines changed

5 files changed

+381
-19
lines changed

src/modinv32.h

+5
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,9 @@ static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256
3535
/* Same as secp256k1_modinv32_var, but constant time in x (not in the modulus). */
3636
static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo);
3737

38+
/* Compute the Jacobi symbol for (x | modinfo->modulus). x must be coprime with modulus (and thus
39+
* cannot be 0, as modulus >= 3). All limbs of x must be non-negative. Returns 0 if the result
40+
* cannot be computed. */
41+
static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo);
42+
3843
#endif /* SECP256K1_MODINV32_H */

src/modinv32_impl.h

+166-16
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,21 @@ static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_
232232
return zeta;
233233
}
234234

235+
/* inv256[i] = -(2*i+1)^-1 (mod 256) */
236+
static const uint8_t secp256k1_modinv32_inv256[128] = {
237+
0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
238+
0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
239+
0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
240+
0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
241+
0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
242+
0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
243+
0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
244+
0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
245+
0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
246+
0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
247+
0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
248+
};
249+
235250
/* Compute the transition matrix and eta for 30 divsteps (variable time).
236251
*
237252
* Input: eta: initial eta
@@ -243,21 +258,6 @@ static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_
243258
* Implements the divsteps_n_matrix_var function from the explanation.
244259
*/
245260
static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
246-
/* inv256[i] = -(2*i+1)^-1 (mod 256) */
247-
static const uint8_t inv256[128] = {
248-
0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
249-
0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
250-
0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
251-
0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
252-
0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
253-
0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
254-
0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
255-
0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
256-
0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
257-
0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
258-
0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
259-
};
260-
261261
/* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */
262262
uint32_t u = 1, v = 0, q = 0, r = 1;
263263
uint32_t f = f0, g = g0, m;
@@ -297,7 +297,7 @@ static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint
297297
VERIFY_CHECK(limit > 0 && limit <= 30);
298298
m = (UINT32_MAX >> (32 - limit)) & 255U;
299299
/* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
300-
w = (g * inv256[(f >> 1) & 127]) & m;
300+
w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
301301
/* Do so. */
302302
g += f * w;
303303
q += u * w;
@@ -317,6 +317,86 @@ static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint
317317
return eta;
318318
}
319319

320+
/* Compute the transition matrix and eta for 30 posdivsteps (variable time, eta=-delta), and keeps track
321+
* of the Jacobi symbol along the way. f0 and g0 must be f and g mod 2^32 rather than 2^30, because
322+
* Jacobi tracking requires knowing (f mod 8) rather than just (f mod 2).
323+
*
324+
* Input: eta: initial eta
325+
* f0: bottom limb of initial f
326+
* g0: bottom limb of initial g
327+
* Output: t: transition matrix
328+
* Input/Output: (*jacp & 1) is bitflipped if and only if the Jacobi symbol of (f | g) changes sign
329+
* by applying the returned transformation matrix to it. The other bits of *jacp may
330+
* change, but are meaningless.
331+
* Return: final eta
332+
*/
333+
static int32_t secp256k1_modinv32_posdivsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t, int *jacp) {
334+
/* Transformation matrix. */
335+
uint32_t u = 1, v = 0, q = 0, r = 1;
336+
uint32_t f = f0, g = g0, m;
337+
uint16_t w;
338+
int i = 30, limit, zeros;
339+
int jac = *jacp;
340+
341+
for (;;) {
342+
/* Use a sentinel bit to count zeros only up to i. */
343+
zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
344+
/* Perform zeros divsteps at once; they all just divide g by two. */
345+
g >>= zeros;
346+
u <<= zeros;
347+
v <<= zeros;
348+
eta -= zeros;
349+
i -= zeros;
350+
/* Update the bottom bit of jac: when dividing g by an odd power of 2,
351+
* if (f mod 8) is 3 or 5, the Jacobi symbol changes sign. */
352+
jac ^= (zeros & ((f >> 1) ^ (f >> 2)));
353+
/* We're done once we've done 30 posdivsteps. */
354+
if (i == 0) break;
355+
VERIFY_CHECK((f & 1) == 1);
356+
VERIFY_CHECK((g & 1) == 1);
357+
VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
358+
VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
359+
/* If eta is negative, negate it and replace f,g with g,f. */
360+
if (eta < 0) {
361+
uint32_t tmp;
362+
eta = -eta;
363+
/* Update bottom bit of jac: when swapping f and g, the Jacobi symbol changes sign
364+
* if both f and g are 3 mod 4. */
365+
jac ^= ((f & g) >> 1);
366+
tmp = f; f = g; g = tmp;
367+
tmp = u; u = q; q = tmp;
368+
tmp = v; v = r; r = tmp;
369+
}
370+
/* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
371+
* than i can be cancelled out (as we'd be done before that point), and no more than eta+1
372+
* can be done as its sign will flip once that happens. */
373+
limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
374+
/* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
375+
VERIFY_CHECK(limit > 0 && limit <= 30);
376+
m = (UINT32_MAX >> (32 - limit)) & 255U;
377+
/* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
378+
w = (g * secp256k1_modinv32_inv256[(f >> 1) & 127]) & m;
379+
/* Do so. */
380+
g += f * w;
381+
q += u * w;
382+
r += v * w;
383+
VERIFY_CHECK((g & m) == 0);
384+
}
385+
/* Return data in t and return value. */
386+
t->u = (int32_t)u;
387+
t->v = (int32_t)v;
388+
t->q = (int32_t)q;
389+
t->r = (int32_t)r;
390+
/* The determinant of t must be a power of two. This guarantees that multiplication with t
391+
* does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
392+
* will be divided out again). As each divstep's individual matrix has determinant 2 or -2,
393+
* the aggregate of 30 of them will have determinant 2^30 or -2^30. */
394+
VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30 ||
395+
(int64_t)t->u * t->r - (int64_t)t->v * t->q == -(((int64_t)1) << 30));
396+
*jacp = jac;
397+
return eta;
398+
}
399+
320400
/* Compute (t/2^30) * [d, e] mod modulus, where t is a transition matrix for 30 divsteps.
321401
*
322402
* On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range
@@ -584,4 +664,74 @@ static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256
584664
*x = d;
585665
}
586666

667+
/* Do up to 50 iterations of 30 posdivsteps (up to 1500 steps; more is extremely rare) each until f=1.
668+
* In VERIFY mode use a lower number of iterations (750, close to the median 756), so failure actually occurs. */
669+
#ifdef VERIFY
670+
#define JACOBI32_ITERATIONS 25
671+
#else
672+
#define JACOBI32_ITERATIONS 50
673+
#endif
674+
675+
/* Compute the Jacobi symbol of x modulo modinfo->modulus (variable time). gcd(x,modulus) must be 1. */
676+
static int secp256k1_jacobi32_maybe_var(const secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) {
677+
/* Start with f=modulus, g=x, eta=-1. */
678+
secp256k1_modinv32_signed30 f = modinfo->modulus;
679+
secp256k1_modinv32_signed30 g = *x;
680+
int j, len = 9;
681+
int32_t eta = -1; /* eta = -delta; delta is initially 1 */
682+
int32_t cond, fn, gn;
683+
int jac = 0;
684+
int count;
685+
686+
/* The input limbs must all be non-negative. */
687+
VERIFY_CHECK(g.v[0] >= 0 && g.v[1] >= 0 && g.v[2] >= 0 && g.v[3] >= 0 && g.v[4] >= 0 && g.v[5] >= 0 && g.v[6] >= 0 && g.v[7] >= 0 && g.v[8] >= 0);
688+
689+
/* If x > 0, then if the loop below converges, it converges to f=g=gcd(x,modulus). Since we
690+
* require that gcd(x,modulus)=1 and modulus>=3, x cannot be 0. Thus, we must reach f=1 (or
691+
* time out). */
692+
VERIFY_CHECK((g.v[0] | g.v[1] | g.v[2] | g.v[3] | g.v[4] | g.v[5] | g.v[6] | g.v[7] | g.v[8]) != 0);
693+
694+
for (count = 0; count < JACOBI32_ITERATIONS; ++count) {
695+
/* Compute transition matrix and new eta after 30 posdivsteps. */
696+
secp256k1_modinv32_trans2x2 t;
697+
eta = secp256k1_modinv32_posdivsteps_30_var(eta, f.v[0] | ((uint32_t)f.v[1] << 30), g.v[0] | ((uint32_t)g.v[1] << 30), &t, &jac);
698+
/* Update f,g using that transition matrix. */
699+
#ifdef VERIFY
700+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
701+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
702+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
703+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
704+
#endif
705+
secp256k1_modinv32_update_fg_30_var(len, &f, &g, &t);
706+
/* If the bottom limb of f is 1, there is a chance that f=1. */
707+
if (f.v[0] == 1) {
708+
cond = 0;
709+
/* Check if the other limbs are also 0. */
710+
for (j = 1; j < len; ++j) {
711+
cond |= f.v[j];
712+
}
713+
/* If so, we're done. If f=1, the Jacobi symbol (g | f)=1. */
714+
if (cond == 0) return 1 - 2*(jac & 1);
715+
}
716+
717+
/* Determine if len>1 and limb (len-1) of both f and g is 0. */
718+
fn = f.v[len - 1];
719+
gn = g.v[len - 1];
720+
cond = ((int32_t)len - 2) >> 31;
721+
cond |= fn;
722+
cond |= gn;
723+
/* If so, reduce length. */
724+
if (cond == 0) --len;
725+
#ifdef VERIFY
726+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 0) > 0); /* f > 0 */
727+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
728+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 0) > 0); /* g > 0 */
729+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
730+
#endif
731+
}
732+
733+
/* The loop failed to converge to f=g after 1500 iterations. Return 0, indicating unknown result. */
734+
return 0;
735+
}
736+
587737
#endif /* SECP256K1_MODINV32_IMPL_H */

src/modinv64.h

+5
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,9 @@ static void secp256k1_modinv64_var(secp256k1_modinv64_signed62 *x, const secp256
3939
/* Same as secp256k1_modinv64_var, but constant time in x (not in the modulus). */
4040
static void secp256k1_modinv64(secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo);
4141

42+
/* Compute the Jacobi symbol for (x | modinfo->modulus). x must be coprime with modulus (and thus
43+
* cannot be 0, as modulus >= 3). All limbs of x must be non-negative. Returns 0 if the result
44+
* cannot be computed. */
45+
static int secp256k1_jacobi64_maybe_var(const secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo);
46+
4247
#endif /* SECP256K1_MODINV64_H */

0 commit comments

Comments
 (0)