Skip to content

Commit 925f78d

Browse files
committed
Add _fe_half and use in _gej_add_ge
- Trades 1 _half for 3 _mul_int and 2 _normalize_weak - Updated formula and comments in _gej_add_ge - Added internal benchmark for _fe_half
1 parent d8a2463 commit 925f78d

File tree

5 files changed

+172
-20
lines changed

5 files changed

+172
-20
lines changed

src/bench_internal.c

+10
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,15 @@ void bench_scalar_inverse_var(void* arg, int iters) {
140140
CHECK(j <= iters);
141141
}
142142

143+
void bench_field_half(void* arg, int iters) {
144+
int i;
145+
bench_inv *data = (bench_inv*)arg;
146+
147+
for (i = 0; i < iters; i++) {
148+
secp256k1_fe_half(&data->fe[0]);
149+
}
150+
}
151+
143152
void bench_field_normalize(void* arg, int iters) {
144153
int i;
145154
bench_inv *data = (bench_inv*)arg;
@@ -354,6 +363,7 @@ int main(int argc, char **argv) {
354363
if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse", bench_scalar_inverse, bench_setup, NULL, &data, 10, iters);
355364
if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "inverse")) run_benchmark("scalar_inverse_var", bench_scalar_inverse_var, bench_setup, NULL, &data, 10, iters);
356365

366+
if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "half")) run_benchmark("field_half", bench_field_half, bench_setup, NULL, &data, 10, iters*100);
357367
if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "normalize")) run_benchmark("field_normalize", bench_field_normalize, bench_setup, NULL, &data, 10, iters*100);
358368
if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "normalize")) run_benchmark("field_normalize_weak", bench_field_normalize_weak, bench_setup, NULL, &data, 10, iters*100);
359369
if (d || have_flag(argc, argv, "field") || have_flag(argc, argv, "sqr")) run_benchmark("field_sqr", bench_field_sqr, bench_setup, NULL, &data, 10, iters*10);

src/field.h

+5
Original file line numberDiff line numberDiff line change
@@ -130,4 +130,9 @@ static void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_f
130130
/** If flag is true, set *r equal to *a; otherwise leave it. Constant-time. Both *r and *a must be initialized.*/
131131
static void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_fe *a, int flag);
132132

133+
/** Halves the value of a field element modulo the field prime. Constant-time.
134+
* For an input magnitude 'm', the output magnitude is set to 'floor(m/2) + 1'.
135+
* The output is not guaranteed to be normalized, regardless of the input. */
136+
static void secp256k1_fe_half(secp256k1_fe *r);
137+
133138
#endif /* SECP256K1_FIELD_H */

src/field_10x26_impl.h

+76
Original file line numberDiff line numberDiff line change
@@ -1133,6 +1133,82 @@ static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_
11331133
#endif
11341134
}
11351135

1136+
static SECP256K1_INLINE void secp256k1_fe_half(secp256k1_fe *r) {
1137+
uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
1138+
t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
1139+
uint32_t one = (uint32_t)1;
1140+
uint32_t mask = -(t0 & one) >> 6;
1141+
1142+
#ifdef VERIFY
1143+
secp256k1_fe_verify(r);
1144+
VERIFY_CHECK(r->magnitude < 32);
1145+
#endif
1146+
1147+
/* Bounds analysis (over the rationals).
1148+
*
1149+
* Let m = r->magnitude
1150+
* C = 0x3FFFFFFUL * 2
1151+
* D = 0x03FFFFFUL * 2
1152+
*
1153+
* Initial bounds: t0..t8 <= C * m
1154+
* t9 <= D * m
1155+
*/
1156+
1157+
t0 += 0x3FFFC2FUL & mask;
1158+
t1 += 0x3FFFFBFUL & mask;
1159+
t2 += mask;
1160+
t3 += mask;
1161+
t4 += mask;
1162+
t5 += mask;
1163+
t6 += mask;
1164+
t7 += mask;
1165+
t8 += mask;
1166+
t9 += mask >> 4;
1167+
1168+
VERIFY_CHECK((t0 & one) == 0);
1169+
1170+
/* t0..t8: added <= C/2
1171+
* t9: added <= D/2
1172+
*
1173+
* Current bounds: t0..t8 <= C * (m + 1/2)
1174+
* t9 <= D * (m + 1/2)
1175+
*/
1176+
1177+
r->n[0] = (t0 >> 1) + ((t1 & one) << 25);
1178+
r->n[1] = (t1 >> 1) + ((t2 & one) << 25);
1179+
r->n[2] = (t2 >> 1) + ((t3 & one) << 25);
1180+
r->n[3] = (t3 >> 1) + ((t4 & one) << 25);
1181+
r->n[4] = (t4 >> 1) + ((t5 & one) << 25);
1182+
r->n[5] = (t5 >> 1) + ((t6 & one) << 25);
1183+
r->n[6] = (t6 >> 1) + ((t7 & one) << 25);
1184+
r->n[7] = (t7 >> 1) + ((t8 & one) << 25);
1185+
r->n[8] = (t8 >> 1) + ((t9 & one) << 25);
1186+
r->n[9] = (t9 >> 1);
1187+
1188+
/* t0..t8: shifted right and added <= C/4 + 1/2
1189+
* t9: shifted right
1190+
*
1191+
* Current bounds: t0..t8 <= C * (m/2 + 1/2)
1192+
* t9 <= D * (m/2 + 1/4)
1193+
*/
1194+
1195+
#ifdef VERIFY
1196+
/* Therefore the output magnitude (M) has to be set such that:
1197+
* t0..t8: C * M >= C * (m/2 + 1/2)
1198+
* t9: D * M >= D * (m/2 + 1/4)
1199+
*
1200+
* It suffices for all limbs that, for any input magnitude m:
1201+
* M >= m/2 + 1/2
1202+
*
1203+
* and since we want the smallest such integer value for M:
1204+
* M == floor(m/2) + 1
1205+
*/
1206+
r->magnitude = (r->magnitude >> 1) + 1;
1207+
r->normalized = 0;
1208+
secp256k1_fe_verify(r);
1209+
#endif
1210+
}
1211+
11361212
static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_fe_storage *a, int flag) {
11371213
uint32_t mask0, mask1;
11381214
VG_CHECK_VERIFY(r->n, sizeof(r->n));

src/field_5x52_impl.h

+65
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,71 @@ static SECP256K1_INLINE void secp256k1_fe_cmov(secp256k1_fe *r, const secp256k1_
477477
#endif
478478
}
479479

480+
static SECP256K1_INLINE void secp256k1_fe_half(secp256k1_fe *r) {
481+
uint64_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4];
482+
uint64_t one = (uint64_t)1;
483+
uint64_t mask = -(t0 & one) >> 12;
484+
485+
#ifdef VERIFY
486+
secp256k1_fe_verify(r);
487+
VERIFY_CHECK(r->magnitude < 32);
488+
#endif
489+
490+
/* Bounds analysis (over the rationals).
491+
*
492+
* Let m = r->magnitude
493+
* C = 0xFFFFFFFFFFFFFULL * 2
494+
* D = 0x0FFFFFFFFFFFFULL * 2
495+
*
496+
* Initial bounds: t0..t3 <= C * m
497+
* t4 <= D * m
498+
*/
499+
500+
t0 += 0xFFFFEFFFFFC2FULL & mask;
501+
t1 += mask;
502+
t2 += mask;
503+
t3 += mask;
504+
t4 += mask >> 4;
505+
506+
VERIFY_CHECK((t0 & one) == 0);
507+
508+
/* t0..t3: added <= C/2
509+
* t4: added <= D/2
510+
*
511+
* Current bounds: t0..t3 <= C * (m + 1/2)
512+
* t4 <= D * (m + 1/2)
513+
*/
514+
515+
r->n[0] = (t0 >> 1) + ((t1 & one) << 51);
516+
r->n[1] = (t1 >> 1) + ((t2 & one) << 51);
517+
r->n[2] = (t2 >> 1) + ((t3 & one) << 51);
518+
r->n[3] = (t3 >> 1) + ((t4 & one) << 51);
519+
r->n[4] = (t4 >> 1);
520+
521+
/* t0..t3: shifted right and added <= C/4 + 1/2
522+
* t4: shifted right
523+
*
524+
* Current bounds: t0..t3 <= C * (m/2 + 1/2)
525+
* t4 <= D * (m/2 + 1/4)
526+
*/
527+
528+
#ifdef VERIFY
529+
/* Therefore the output magnitude (M) has to be set such that:
530+
* t0..t3: C * M >= C * (m/2 + 1/2)
531+
* t4: D * M >= D * (m/2 + 1/4)
532+
*
533+
* It suffices for all limbs that, for any input magnitude m:
534+
* M >= m/2 + 1/2
535+
*
536+
* and since we want the smallest such integer value for M:
537+
* M == floor(m/2) + 1
538+
*/
539+
r->magnitude = (r->magnitude >> 1) + 1;
540+
r->normalized = 0;
541+
secp256k1_fe_verify(r);
542+
#endif
543+
}
544+
480545
static SECP256K1_INLINE void secp256k1_fe_storage_cmov(secp256k1_fe_storage *r, const secp256k1_fe_storage *a, int flag) {
481546
uint64_t mask0, mask1;
482547
VG_CHECK_VERIFY(r->n, sizeof(r->n));

src/group_impl.h

+16-20
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ static void secp256k1_gej_add_zinv_var(secp256k1_gej *r, const secp256k1_gej *a,
492492

493493

494494
static void secp256k1_gej_add_ge(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_ge *b) {
495-
/* Operations: 7 mul, 5 sqr, 4 normalize, 21 mul_int/add/negate/cmov */
495+
/* Operations: 7 mul, 5 sqr, 24 add/cmov/half/mul_int/negate/normalize_weak/normalizes_to_zero */
496496
secp256k1_fe zz, u1, u2, s1, s2, t, tt, m, n, q, rr;
497497
secp256k1_fe m_alt, rr_alt;
498498
int infinity, degenerate;
@@ -513,11 +513,11 @@ static void secp256k1_gej_add_ge(secp256k1_gej *r, const secp256k1_gej *a, const
513513
* Z = Z1*Z2
514514
* T = U1+U2
515515
* M = S1+S2
516-
* Q = T*M^2
516+
* Q = -T*M^2
517517
* R = T^2-U1*U2
518-
* X3 = 4*(R^2-Q)
519-
* Y3 = 4*(R*(3*Q-2*R^2)-M^4)
520-
* Z3 = 2*M*Z
518+
* X3 = R^2+Q
519+
* Y3 = -(R*(2*X3+Q)+M^4)/2
520+
* Z3 = M*Z
521521
* (Note that the paper uses xi = Xi / Zi and yi = Yi / Zi instead.)
522522
*
523523
* This formula has the benefit of being the same for both addition
@@ -581,29 +581,25 @@ static void secp256k1_gej_add_ge(secp256k1_gej *r, const secp256k1_gej *a, const
581581
* and denominator of lambda; R and M represent the explicit
582582
* expressions x1^2 + x2^2 + x1x2 and y1 + y2. */
583583
secp256k1_fe_sqr(&n, &m_alt); /* n = Malt^2 (1) */
584-
secp256k1_fe_mul(&q, &n, &t); /* q = Q = T*Malt^2 (1) */
584+
secp256k1_fe_negate(&q, &t, 2); /* q = -T (3) */
585+
secp256k1_fe_mul(&q, &q, &n); /* q = Q = -T*Malt^2 (1) */
585586
/* These two lines use the observation that either M == Malt or M == 0,
586587
* so M^3 * Malt is either Malt^4 (which is computed by squaring), or
587588
* zero (which is "computed" by cmov). So the cost is one squaring
588589
* versus two multiplications. */
589590
secp256k1_fe_sqr(&n, &n);
590591
secp256k1_fe_cmov(&n, &m, degenerate); /* n = M^3 * Malt (2) */
591592
secp256k1_fe_sqr(&t, &rr_alt); /* t = Ralt^2 (1) */
592-
secp256k1_fe_mul(&r->z, &a->z, &m_alt); /* r->z = Malt*Z (1) */
593+
secp256k1_fe_mul(&r->z, &a->z, &m_alt); /* r->z = Z3 = Malt*Z (1) */
593594
infinity = secp256k1_fe_normalizes_to_zero(&r->z) & ~a->infinity;
594-
secp256k1_fe_mul_int(&r->z, 2); /* r->z = Z3 = 2*Malt*Z (2) */
595-
secp256k1_fe_negate(&q, &q, 1); /* q = -Q (2) */
596-
secp256k1_fe_add(&t, &q); /* t = Ralt^2-Q (3) */
597-
secp256k1_fe_normalize_weak(&t);
598-
r->x = t; /* r->x = Ralt^2-Q (1) */
599-
secp256k1_fe_mul_int(&t, 2); /* t = 2*x3 (2) */
600-
secp256k1_fe_add(&t, &q); /* t = 2*x3 - Q: (4) */
601-
secp256k1_fe_mul(&t, &t, &rr_alt); /* t = Ralt*(2*x3 - Q) (1) */
602-
secp256k1_fe_add(&t, &n); /* t = Ralt*(2*x3 - Q) + M^3*Malt (3) */
603-
secp256k1_fe_negate(&r->y, &t, 3); /* r->y = Ralt*(Q - 2x3) - M^3*Malt (4) */
604-
secp256k1_fe_normalize_weak(&r->y);
605-
secp256k1_fe_mul_int(&r->x, 4); /* r->x = X3 = 4*(Ralt^2-Q) */
606-
secp256k1_fe_mul_int(&r->y, 4); /* r->y = Y3 = 4*Ralt*(Q - 2x3) - 4*M^3*Malt (4) */
595+
secp256k1_fe_add(&t, &q); /* t = Ralt^2 + Q (2) */
596+
r->x = t; /* r->x = X3 = Ralt^2 + Q (2) */
597+
secp256k1_fe_mul_int(&t, 2); /* t = 2*X3 (4) */
598+
secp256k1_fe_add(&t, &q); /* t = 2*X3 + Q (5) */
599+
secp256k1_fe_mul(&t, &t, &rr_alt); /* t = Ralt*(2*X3 + Q) (1) */
600+
secp256k1_fe_add(&t, &n); /* t = Ralt*(2*X3 + Q) + M^3*Malt (3) */
601+
secp256k1_fe_negate(&r->y, &t, 3); /* r->y = -(Ralt*(2*X3 + Q) + M^3*Malt) (4) */
602+
secp256k1_fe_half(&r->y); /* r->y = Y3 = -(Ralt*(2*X3 + Q) + M^3*Malt)/2 (3) */
607603

608604
/** In case a->infinity == 1, replace r with (b->x, b->y, 1). */
609605
secp256k1_fe_cmov(&r->x, &b->x, a->infinity);

0 commit comments

Comments
 (0)