@@ -232,6 +232,21 @@ static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_
232
232
return zeta ;
233
233
}
234
234
235
+ /* inv256[i] = -(2*i+1)^-1 (mod 256) */
236
+ static const uint8_t secp256k1_modinv32_inv256 [128 ] = {
237
+ 0xFF , 0x55 , 0x33 , 0x49 , 0xC7 , 0x5D , 0x3B , 0x11 , 0x0F , 0xE5 , 0xC3 , 0x59 ,
238
+ 0xD7 , 0xED , 0xCB , 0x21 , 0x1F , 0x75 , 0x53 , 0x69 , 0xE7 , 0x7D , 0x5B , 0x31 ,
239
+ 0x2F , 0x05 , 0xE3 , 0x79 , 0xF7 , 0x0D , 0xEB , 0x41 , 0x3F , 0x95 , 0x73 , 0x89 ,
240
+ 0x07 , 0x9D , 0x7B , 0x51 , 0x4F , 0x25 , 0x03 , 0x99 , 0x17 , 0x2D , 0x0B , 0x61 ,
241
+ 0x5F , 0xB5 , 0x93 , 0xA9 , 0x27 , 0xBD , 0x9B , 0x71 , 0x6F , 0x45 , 0x23 , 0xB9 ,
242
+ 0x37 , 0x4D , 0x2B , 0x81 , 0x7F , 0xD5 , 0xB3 , 0xC9 , 0x47 , 0xDD , 0xBB , 0x91 ,
243
+ 0x8F , 0x65 , 0x43 , 0xD9 , 0x57 , 0x6D , 0x4B , 0xA1 , 0x9F , 0xF5 , 0xD3 , 0xE9 ,
244
+ 0x67 , 0xFD , 0xDB , 0xB1 , 0xAF , 0x85 , 0x63 , 0xF9 , 0x77 , 0x8D , 0x6B , 0xC1 ,
245
+ 0xBF , 0x15 , 0xF3 , 0x09 , 0x87 , 0x1D , 0xFB , 0xD1 , 0xCF , 0xA5 , 0x83 , 0x19 ,
246
+ 0x97 , 0xAD , 0x8B , 0xE1 , 0xDF , 0x35 , 0x13 , 0x29 , 0xA7 , 0x3D , 0x1B , 0xF1 ,
247
+ 0xEF , 0xC5 , 0xA3 , 0x39 , 0xB7 , 0xCD , 0xAB , 0x01
248
+ };
249
+
235
250
/* Compute the transition matrix and eta for 30 divsteps (variable time).
236
251
*
237
252
* Input: eta: initial eta
@@ -243,21 +258,6 @@ static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_
243
258
* Implements the divsteps_n_matrix_var function from the explanation.
244
259
*/
245
260
static int32_t secp256k1_modinv32_divsteps_30_var (int32_t eta , uint32_t f0 , uint32_t g0 , secp256k1_modinv32_trans2x2 * t ) {
246
- /* inv256[i] = -(2*i+1)^-1 (mod 256) */
247
- static const uint8_t inv256 [128 ] = {
248
- 0xFF , 0x55 , 0x33 , 0x49 , 0xC7 , 0x5D , 0x3B , 0x11 , 0x0F , 0xE5 , 0xC3 , 0x59 ,
249
- 0xD7 , 0xED , 0xCB , 0x21 , 0x1F , 0x75 , 0x53 , 0x69 , 0xE7 , 0x7D , 0x5B , 0x31 ,
250
- 0x2F , 0x05 , 0xE3 , 0x79 , 0xF7 , 0x0D , 0xEB , 0x41 , 0x3F , 0x95 , 0x73 , 0x89 ,
251
- 0x07 , 0x9D , 0x7B , 0x51 , 0x4F , 0x25 , 0x03 , 0x99 , 0x17 , 0x2D , 0x0B , 0x61 ,
252
- 0x5F , 0xB5 , 0x93 , 0xA9 , 0x27 , 0xBD , 0x9B , 0x71 , 0x6F , 0x45 , 0x23 , 0xB9 ,
253
- 0x37 , 0x4D , 0x2B , 0x81 , 0x7F , 0xD5 , 0xB3 , 0xC9 , 0x47 , 0xDD , 0xBB , 0x91 ,
254
- 0x8F , 0x65 , 0x43 , 0xD9 , 0x57 , 0x6D , 0x4B , 0xA1 , 0x9F , 0xF5 , 0xD3 , 0xE9 ,
255
- 0x67 , 0xFD , 0xDB , 0xB1 , 0xAF , 0x85 , 0x63 , 0xF9 , 0x77 , 0x8D , 0x6B , 0xC1 ,
256
- 0xBF , 0x15 , 0xF3 , 0x09 , 0x87 , 0x1D , 0xFB , 0xD1 , 0xCF , 0xA5 , 0x83 , 0x19 ,
257
- 0x97 , 0xAD , 0x8B , 0xE1 , 0xDF , 0x35 , 0x13 , 0x29 , 0xA7 , 0x3D , 0x1B , 0xF1 ,
258
- 0xEF , 0xC5 , 0xA3 , 0x39 , 0xB7 , 0xCD , 0xAB , 0x01
259
- };
260
-
261
261
/* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */
262
262
uint32_t u = 1 , v = 0 , q = 0 , r = 1 ;
263
263
uint32_t f = f0 , g = g0 , m ;
@@ -297,7 +297,7 @@ static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint
297
297
VERIFY_CHECK (limit > 0 && limit <= 30 );
298
298
m = (UINT32_MAX >> (32 - limit )) & 255U ;
299
299
/* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
300
- w = (g * inv256 [(f >> 1 ) & 127 ]) & m ;
300
+ w = (g * secp256k1_modinv32_inv256 [(f >> 1 ) & 127 ]) & m ;
301
301
/* Do so. */
302
302
g += f * w ;
303
303
q += u * w ;
@@ -317,6 +317,86 @@ static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint
317
317
return eta ;
318
318
}
319
319
320
+ /* Compute the transition matrix and eta for 30 posdivsteps (variable time, eta=-delta), and keeps track
321
+ * of the Jacobi symbol along the way. f0 and g0 must be f and g mod 2^32 rather than 2^30, because
322
+ * Jacobi tracking requires knowing (f mod 8) rather than just (f mod 2).
323
+ *
324
+ * Input: eta: initial eta
325
+ * f0: bottom limb of initial f
326
+ * g0: bottom limb of initial g
327
+ * Output: t: transition matrix
328
+ * Input/Output: (*jacp & 1) is bitflipped if and only if the Jacobi symbol of (f | g) changes sign
329
+ * by applying the returned transformation matrix to it. The other bits of *jacp may
330
+ * change, but are meaningless.
331
+ * Return: final eta
332
+ */
333
+ static int32_t secp256k1_modinv32_posdivsteps_30_var (int32_t eta , uint32_t f0 , uint32_t g0 , secp256k1_modinv32_trans2x2 * t , int * jacp ) {
334
+ /* Transformation matrix. */
335
+ uint32_t u = 1 , v = 0 , q = 0 , r = 1 ;
336
+ uint32_t f = f0 , g = g0 , m ;
337
+ uint16_t w ;
338
+ int i = 30 , limit , zeros ;
339
+ int jac = * jacp ;
340
+
341
+ for (;;) {
342
+ /* Use a sentinel bit to count zeros only up to i. */
343
+ zeros = secp256k1_ctz32_var (g | (UINT32_MAX << i ));
344
+ /* Perform zeros divsteps at once; they all just divide g by two. */
345
+ g >>= zeros ;
346
+ u <<= zeros ;
347
+ v <<= zeros ;
348
+ eta -= zeros ;
349
+ i -= zeros ;
350
+ /* Update the bottom bit of jac: when dividing g by an odd power of 2,
351
+ * if (f mod 8) is 3 or 5, the Jacobi symbol changes sign. */
352
+ jac ^= (zeros & ((f >> 1 ) ^ (f >> 2 )));
353
+ /* We're done once we've done 30 posdivsteps. */
354
+ if (i == 0 ) break ;
355
+ VERIFY_CHECK ((f & 1 ) == 1 );
356
+ VERIFY_CHECK ((g & 1 ) == 1 );
357
+ VERIFY_CHECK ((u * f0 + v * g0 ) == f << (30 - i ));
358
+ VERIFY_CHECK ((q * f0 + r * g0 ) == g << (30 - i ));
359
+ /* If eta is negative, negate it and replace f,g with g,f. */
360
+ if (eta < 0 ) {
361
+ uint32_t tmp ;
362
+ eta = - eta ;
363
+ /* Update bottom bit of jac: when swapping f and g, the Jacobi symbol changes sign
364
+ * if both f and g are 3 mod 4. */
365
+ jac ^= ((f & g ) >> 1 );
366
+ tmp = f ; f = g ; g = tmp ;
367
+ tmp = u ; u = q ; q = tmp ;
368
+ tmp = v ; v = r ; r = tmp ;
369
+ }
370
+ /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
371
+ * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
372
+ * can be done as its sign will flip once that happens. */
373
+ limit = ((int )eta + 1 ) > i ? i : ((int )eta + 1 );
374
+ /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
375
+ VERIFY_CHECK (limit > 0 && limit <= 30 );
376
+ m = (UINT32_MAX >> (32 - limit )) & 255U ;
377
+ /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
378
+ w = (g * secp256k1_modinv32_inv256 [(f >> 1 ) & 127 ]) & m ;
379
+ /* Do so. */
380
+ g += f * w ;
381
+ q += u * w ;
382
+ r += v * w ;
383
+ VERIFY_CHECK ((g & m ) == 0 );
384
+ }
385
+ /* Return data in t and return value. */
386
+ t -> u = (int32_t )u ;
387
+ t -> v = (int32_t )v ;
388
+ t -> q = (int32_t )q ;
389
+ t -> r = (int32_t )r ;
390
+ /* The determinant of t must be a power of two. This guarantees that multiplication with t
391
+ * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
392
+ * will be divided out again). As each divstep's individual matrix has determinant 2 or -2,
393
+ * the aggregate of 30 of them will have determinant 2^30 or -2^30. */
394
+ VERIFY_CHECK ((int64_t )t -> u * t -> r - (int64_t )t -> v * t -> q == ((int64_t )1 ) << 30 ||
395
+ (int64_t )t -> u * t -> r - (int64_t )t -> v * t -> q == - (((int64_t )1 ) << 30 ));
396
+ * jacp = jac ;
397
+ return eta ;
398
+ }
399
+
320
400
/* Compute (t/2^30) * [d, e] mod modulus, where t is a transition matrix for 30 divsteps.
321
401
*
322
402
* On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range
@@ -584,4 +664,74 @@ static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256
584
664
* x = d ;
585
665
}
586
666
667
+ /* Do up to 50 iterations of 30 posdivsteps (up to 1500 steps; more is extremely rare) each until f=1.
668
+ * In VERIFY mode use a lower number of iterations (750, close to the median 756), so failure actually occurs. */
669
+ #ifdef VERIFY
670
+ #define JACOBI32_ITERATIONS 25
671
+ #else
672
+ #define JACOBI32_ITERATIONS 50
673
+ #endif
674
+
675
+ /* Compute the Jacobi symbol of x modulo modinfo->modulus (variable time). gcd(x,modulus) must be 1. */
676
+ static int secp256k1_jacobi32_maybe_var (const secp256k1_modinv32_signed30 * x , const secp256k1_modinv32_modinfo * modinfo ) {
677
+ /* Start with f=modulus, g=x, eta=-1. */
678
+ secp256k1_modinv32_signed30 f = modinfo -> modulus ;
679
+ secp256k1_modinv32_signed30 g = * x ;
680
+ int j , len = 9 ;
681
+ int32_t eta = -1 ; /* eta = -delta; delta is initially 1 */
682
+ int32_t cond , fn , gn ;
683
+ int jac = 0 ;
684
+ int count ;
685
+
686
+ /* The input limbs must all be non-negative. */
687
+ VERIFY_CHECK (g .v [0 ] >= 0 && g .v [1 ] >= 0 && g .v [2 ] >= 0 && g .v [3 ] >= 0 && g .v [4 ] >= 0 && g .v [5 ] >= 0 && g .v [6 ] >= 0 && g .v [7 ] >= 0 && g .v [8 ] >= 0 );
688
+
689
+ /* If x > 0, then if the loop below converges, it converges to f=g=gcd(x,modulus). Since we
690
+ * require that gcd(x,modulus)=1 and modulus>=3, x cannot be 0. Thus, we must reach f=1 (or
691
+ * time out). */
692
+ VERIFY_CHECK ((g .v [0 ] | g .v [1 ] | g .v [2 ] | g .v [3 ] | g .v [4 ] | g .v [5 ] | g .v [6 ] | g .v [7 ] | g .v [8 ]) != 0 );
693
+
694
+ for (count = 0 ; count < JACOBI32_ITERATIONS ; ++ count ) {
695
+ /* Compute transition matrix and new eta after 30 posdivsteps. */
696
+ secp256k1_modinv32_trans2x2 t ;
697
+ eta = secp256k1_modinv32_posdivsteps_30_var (eta , f .v [0 ] | ((uint32_t )f .v [1 ] << 30 ), g .v [0 ] | ((uint32_t )g .v [1 ] << 30 ), & t , & jac );
698
+ /* Update f,g using that transition matrix. */
699
+ #ifdef VERIFY
700
+ VERIFY_CHECK (secp256k1_modinv32_mul_cmp_30 (& f , len , & modinfo -> modulus , 0 ) > 0 ); /* f > 0 */
701
+ VERIFY_CHECK (secp256k1_modinv32_mul_cmp_30 (& f , len , & modinfo -> modulus , 1 ) <= 0 ); /* f <= modulus */
702
+ VERIFY_CHECK (secp256k1_modinv32_mul_cmp_30 (& g , len , & modinfo -> modulus , 0 ) > 0 ); /* g > 0 */
703
+ VERIFY_CHECK (secp256k1_modinv32_mul_cmp_30 (& g , len , & modinfo -> modulus , 1 ) < 0 ); /* g < modulus */
704
+ #endif
705
+ secp256k1_modinv32_update_fg_30_var (len , & f , & g , & t );
706
+ /* If the bottom limb of f is 1, there is a chance that f=1. */
707
+ if (f .v [0 ] == 1 ) {
708
+ cond = 0 ;
709
+ /* Check if the other limbs are also 0. */
710
+ for (j = 1 ; j < len ; ++ j ) {
711
+ cond |= f .v [j ];
712
+ }
713
+ /* If so, we're done. If f=1, the Jacobi symbol (g | f)=1. */
714
+ if (cond == 0 ) return 1 - 2 * (jac & 1 );
715
+ }
716
+
717
+ /* Determine if len>1 and limb (len-1) of both f and g is 0. */
718
+ fn = f .v [len - 1 ];
719
+ gn = g .v [len - 1 ];
720
+ cond = ((int32_t )len - 2 ) >> 31 ;
721
+ cond |= fn ;
722
+ cond |= gn ;
723
+ /* If so, reduce length. */
724
+ if (cond == 0 ) -- len ;
725
+ #ifdef VERIFY
726
+ VERIFY_CHECK (secp256k1_modinv32_mul_cmp_30 (& f , len , & modinfo -> modulus , 0 ) > 0 ); /* f > 0 */
727
+ VERIFY_CHECK (secp256k1_modinv32_mul_cmp_30 (& f , len , & modinfo -> modulus , 1 ) <= 0 ); /* f <= modulus */
728
+ VERIFY_CHECK (secp256k1_modinv32_mul_cmp_30 (& g , len , & modinfo -> modulus , 0 ) > 0 ); /* g > 0 */
729
+ VERIFY_CHECK (secp256k1_modinv32_mul_cmp_30 (& g , len , & modinfo -> modulus , 1 ) < 0 ); /* g < modulus */
730
+ #endif
731
+ }
732
+
733
+ /* The loop failed to converge to f=g after 1500 iterations. Return 0, indicating unknown result. */
734
+ return 0 ;
735
+ }
736
+
587
737
#endif /* SECP256K1_MODINV32_IMPL_H */
0 commit comments