Commit f21705d Shigeki Ohtsu
committed
1 parent f5a961a commit f21705d Copy full SHA for f21705d
File tree 9 files changed +294
-87
lines changed
9 files changed +294
-87
lines changed Original file line number Diff line number Diff line change @@ -31,6 +31,21 @@ bn_mul_mont:
31
31
32
32
movq %r11 ,8 (%rsp ,%r9 ,8 )
33
33
.Lmul_body:
34
+
35
+
36
+
37
+
38
+
39
+
40
+ subq %rsp ,%r11
41
+ andq $-4096 ,%r11
42
+ .Lmul_page_walk:
43
+ movq (%rsp ,%r11 ,1 ),%r10
44
+ subq $4096 ,%r11
45
+ .byte 0x66 ,0x2e
46
+
47
+ jnc .Lmul_page_walk
48
+
34
49
movq %rdx ,%r12
35
50
movq (%r8 ),%r8
36
51
movq (%r12 ),%rbx
@@ -228,6 +243,15 @@ bn_mul4x_mont:
228
243
229
244
movq %r11 ,8 (%rsp ,%r9 ,8 )
230
245
.Lmul4x_body:
246
+ subq %rsp ,%r11
247
+ andq $-4096 ,%r11
248
+ .Lmul4x_page_walk:
249
+ movq (%rsp ,%r11 ,1 ),%r10
250
+ subq $4096 ,%r11
251
+ .byte 0x2e
252
+
253
+ jnc .Lmul4x_page_walk
254
+
231
255
movq %rdi ,16 (%rsp ,%r9 ,8 )
232
256
movq %rdx ,%r12
233
257
movq (%r8 ),%r8
@@ -610,6 +634,7 @@ bn_mul4x_mont:
610
634
.align 16
611
635
bn_sqr4x_mont:
612
636
.Lsqr4x_enter:
637
+ movq %rsp ,%rax
613
638
pushq %rbx
614
639
pushq %rbp
615
640
pushq %r12
@@ -618,13 +643,25 @@ bn_sqr4x_mont:
618
643
pushq %r15
619
644
620
645
shll $3 ,%r9d
621
- xorq %r10 ,%r10
622
646
movq %rsp ,%r11
623
- subq %r9 , %r10
647
+ negq %r9
624
648
movq (%r8 ),%r8
625
- leaq -72 (%rsp ,%r10 ,2 ),%rsp
649
+ leaq -72 (%rsp ,%r9 ,2 ),%rsp
626
650
andq $-1024 ,%rsp
627
651
652
+ subq %rsp ,%r11
653
+ andq $-4096 ,%r11
654
+ .Lsqr4x_page_walk:
655
+ movq (%rsp ,%r11 ,1 ),%r10
656
+ subq $4096 ,%r11
657
+ .byte 0x2e
658
+
659
+ jnc .Lsqr4x_page_walk
660
+
661
+ movq %r9 ,%r10
662
+ negq %r9
663
+ leaq -48 (%rax ),%r11
664
+
628
665
629
666
630
667
Original file line number Diff line number Diff line change @@ -32,6 +32,21 @@ bn_mul_mont_gather5:
32
32
33
33
movq %rax ,8 (%rsp ,%r9 ,8 )
34
34
.Lmul_body:
35
+
36
+
37
+
38
+
39
+
40
+
41
+ subq %rsp ,%rax
42
+ andq $-4096 ,%rax
43
+ .Lmul_page_walk:
44
+ movq (%rsp ,%rax ,1 ),%r11
45
+ subq $4096 ,%rax
46
+ .byte 0x2e
47
+
48
+ jnc .Lmul_page_walk
49
+
35
50
leaq 128 (%rdx ),%r12
36
51
movdqa 0 (%r10 ),%xmm0
37
52
movdqa 16 (%r10 ),%xmm1
@@ -420,6 +435,15 @@ bn_mul4x_mont_gather5:
420
435
421
436
movq %rax ,8 (%rsp ,%r9 ,8 )
422
437
.Lmul4x_body:
438
+ subq %rsp ,%rax
439
+ andq $-4096 ,%rax
440
+ .Lmul4x_page_walk:
441
+ movq (%rsp ,%rax ,1 ),%r11
442
+ subq $4096 ,%rax
443
+ .byte 0x2e
444
+
445
+ jnc .Lmul4x_page_walk
446
+
423
447
movq %rdi ,16 (%rsp ,%r9 ,8 )
424
448
leaq 128 (%rdx ),%r12
425
449
movdqa 0 (%r10 ),%xmm0
Original file line number Diff line number Diff line change @@ -31,6 +31,21 @@ L$mul_enter:
31
31
32
32
movq %r11 ,8 (%rsp ,%r9 ,8 )
33
33
L$mul_body:
34
+
35
+
36
+
37
+
38
+
39
+
40
+ subq %rsp ,%r11
41
+ andq $-4096 ,%r11
42
+ L$mul_page_walk:
43
+ movq (%rsp ,%r11 ,1 ),%r10
44
+ subq $4096 ,%r11
45
+ .byte 0x66 ,0x2e
46
+
47
+ jnc L$mul_page_walk
48
+
34
49
movq %rdx ,%r12
35
50
movq (%r8 ),%r8
36
51
movq (%r12 ),%rbx
@@ -228,6 +243,15 @@ L$mul4x_enter:
228
243
229
244
movq %r11 ,8 (%rsp ,%r9 ,8 )
230
245
L$mul4x_body:
246
+ subq %rsp ,%r11
247
+ andq $-4096 ,%r11
248
+ L$mul4x_page_walk:
249
+ movq (%rsp ,%r11 ,1 ),%r10
250
+ subq $4096 ,%r11
251
+ .byte 0x2e
252
+
253
+ jnc L$mul4x_page_walk
254
+
231
255
movq %rdi ,16 (%rsp ,%r9 ,8 )
232
256
movq %rdx ,%r12
233
257
movq (%r8 ),%r8
@@ -610,6 +634,7 @@ L$mul4x_epilogue:
610
634
.p2align 4
611
635
bn_sqr4x_mont:
612
636
L$sqr4x_enter:
637
+ movq %rsp ,%rax
613
638
pushq %rbx
614
639
pushq %rbp
615
640
pushq %r12
@@ -618,13 +643,25 @@ L$sqr4x_enter:
618
643
pushq %r15
619
644
620
645
shll $3 ,%r9d
621
- xorq %r10 ,%r10
622
646
movq %rsp ,%r11
623
- subq %r9 , %r10
647
+ negq %r9
624
648
movq (%r8 ),%r8
625
- leaq -72 (%rsp ,%r10 ,2 ),%rsp
649
+ leaq -72 (%rsp ,%r9 ,2 ),%rsp
626
650
andq $-1024 ,%rsp
627
651
652
+ subq %rsp ,%r11
653
+ andq $-4096 ,%r11
654
+ L$sqr4x_page_walk:
655
+ movq (%rsp ,%r11 ,1 ),%r10
656
+ subq $4096 ,%r11
657
+ .byte 0x2e
658
+
659
+ jnc L$sqr4x_page_walk
660
+
661
+ movq %r9 ,%r10
662
+ negq %r9
663
+ leaq -48 (%rax ),%r11
664
+
628
665
629
666
630
667
Original file line number Diff line number Diff line change @@ -32,6 +32,21 @@ L$mul_alloca:
32
32
33
33
movq %rax ,8 (%rsp ,%r9 ,8 )
34
34
L$mul_body:
35
+
36
+
37
+
38
+
39
+
40
+
41
+ subq %rsp ,%rax
42
+ andq $-4096 ,%rax
43
+ L$mul_page_walk:
44
+ movq (%rsp ,%rax ,1 ),%r11
45
+ subq $4096 ,%rax
46
+ .byte 0x2e
47
+
48
+ jnc L$mul_page_walk
49
+
35
50
leaq 128 (%rdx ),%r12
36
51
movdqa 0 (%r10 ),%xmm0
37
52
movdqa 16 (%r10 ),%xmm1
@@ -420,6 +435,15 @@ L$mul4x_alloca:
420
435
421
436
movq %rax ,8 (%rsp ,%r9 ,8 )
422
437
L$mul4x_body:
438
+ subq %rsp ,%rax
439
+ andq $-4096 ,%rax
440
+ L$mul4x_page_walk:
441
+ movq (%rsp ,%rax ,1 ),%r11
442
+ subq $4096 ,%rax
443
+ .byte 0x2e
444
+
445
+ jnc L$mul4x_page_walk
446
+
423
447
movq %rdi ,16 (%rsp ,%r9 ,8 )
424
448
leaq 128 (%rdx ),%r12
425
449
movdqa 0 (%r10 ),%xmm0
Original file line number Diff line number Diff line change @@ -43,6 +43,21 @@ $L$mul_enter::
43
43
44
44
mov QWORD PTR [ 8 + r9 * 8 + rsp ], r11
45
45
$ L $ mul_body::
46
+
47
+
48
+
49
+
50
+
51
+
52
+ sub r11 , rsp
53
+ and r11 ,- 4096
54
+ $ L $ mul_page_walk::
55
+ mov r10 , QWORD PTR [ r11 * 1 + rsp ]
56
+ sub r11 , 4096
57
+ DB 066h , 02eh
58
+
59
+ jnc $ L $ mul_page_walk
60
+
46
61
mov r12 , rdx
47
62
mov r8 , QWORD PTR [ r8 ]
48
63
mov rbx , QWORD PTR [ r12 ]
@@ -255,6 +270,15 @@ $L$mul4x_enter::
255
270
256
271
mov QWORD PTR [ 8 + r9 * 8 + rsp ], r11
257
272
$ L $ mul4x_body::
273
+ sub r11 , rsp
274
+ and r11 ,- 4096
275
+ $ L $ mul4x_page_walk::
276
+ mov r10 , QWORD PTR [ r11 * 1 + rsp ]
277
+ sub r11 , 4096
278
+ DB 02eh
279
+
280
+ jnc $ L $ mul4x_page_walk
281
+
258
282
mov QWORD PTR [ 16 + r9 * 8 + rsp ], rdi
259
283
mov r12 , rdx
260
284
mov r8 , QWORD PTR [ r8 ]
@@ -652,6 +676,7 @@ $L$SEH_begin_bn_sqr4x_mont::
652
676
653
677
654
678
$ L $ sqr4x_enter::
679
+ mov rax , rsp
655
680
push rbx
656
681
push rbp
657
682
push r12
@@ -660,13 +685,25 @@ $L$sqr4x_enter::
660
685
push r15
661
686
662
687
shl r9d , 3
663
- xor r10 , r10
664
688
mov r11 , rsp
665
- sub r10 , r9
689
+ neg r9
666
690
mov r8 , QWORD PTR [ r8 ]
667
- lea rsp , QWORD PTR [ (( - 72 )) + r10 * 2 + rsp ]
691
+ lea rsp , QWORD PTR [ (( - 72 )) + r9 * 2 + rsp ]
668
692
and rsp ,- 1024
669
693
694
+ sub r11 , rsp
695
+ and r11 ,- 4096
696
+ $ L $ sqr4x_page_walk::
697
+ mov r10 , QWORD PTR [ r11 * 1 + rsp ]
698
+ sub r11 , 4096
699
+ DB 02eh
700
+
701
+ jnc $ L $ sqr4x_page_walk
702
+
703
+ mov r10 , r9
704
+ neg r9
705
+ lea r11 , QWORD PTR [ (( - 48 )) + rax ]
706
+
670
707
671
708
672
709
Original file line number Diff line number Diff line change @@ -44,6 +44,21 @@ $L$mul_alloca::
44
44
45
45
mov QWORD PTR [ 8 + r9 * 8 + rsp ], rax
46
46
$ L $ mul_body::
47
+
48
+
49
+
50
+
51
+
52
+
53
+ sub rax , rsp
54
+ and rax ,- 4096
55
+ $ L $ mul_page_walk::
56
+ mov r11 , QWORD PTR [ rax * 1 + rsp ]
57
+ sub rax , 4096
58
+ DB 02eh
59
+
60
+ jnc $ L $ mul_page_walk
61
+
47
62
lea r12 , QWORD PTR [ 128 + rdx ]
48
63
movdqa xmm0 , XMMWORD PTR [ r10 ]
49
64
movdqa xmm1 , XMMWORD PTR [ 16 + r10 ]
@@ -447,6 +462,15 @@ $L$mul4x_alloca::
447
462
448
463
mov QWORD PTR [ 8 + r9 * 8 + rsp ], rax
449
464
$ L $ mul4x_body::
465
+ sub rax , rsp
466
+ and rax ,- 4096
467
+ $ L $ mul4x_page_walk::
468
+ mov r11 , QWORD PTR [ rax * 1 + rsp ]
469
+ sub rax , 4096
470
+ DB 02eh
471
+
472
+ jnc $ L $ mul4x_page_walk
473
+
450
474
mov QWORD PTR [ 16 + r9 * 8 + rsp ], rdi
451
475
lea r12 , QWORD PTR [ 128 + rdx ]
452
476
movdqa xmm0 , XMMWORD PTR [ r10 ]
You can’t perform that action at this time.
0 commit comments