1859 lines
24 KiB
ArmAsm
Executable file
1859 lines
24 KiB
ArmAsm
Executable file
; 1 "crypto/bn/modexp512-masm-x86_64.S.tmp"
|
|
; 1 "<built-in>" 1
|
|
; 1 "<built-in>" 3
|
|
; 340 "<built-in>" 3
|
|
; 1 "<command line>" 1
|
|
; 1 "<built-in>" 2
|
|
; 1 "crypto/bn/modexp512-masm-x86_64.S.tmp" 2
|
|
OPTION DOTNAME
|
|
|
|
; 1 "./crypto/x86_arch.h" 1
|
|
|
|
|
|
; 16 "./crypto/x86_arch.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
; 40 "./crypto/x86_arch.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
; 3 "crypto/bn/modexp512-masm-x86_64.S.tmp" 2
|
|
.text$ SEGMENT ALIGN(64) 'CODE'
|
|
|
|
|
|
ALIGN 16
|
|
MULADD_128x512 PROC PRIVATE
|
|
mov rax,QWORD PTR[rsi]
|
|
mul rbp
|
|
add r8,rax
|
|
adc rdx,0
|
|
mov QWORD PTR[rcx],r8
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[8+rsi]
|
|
mul rbp
|
|
add r9,rax
|
|
adc rdx,0
|
|
add r9,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[16+rsi]
|
|
mul rbp
|
|
add r10,rax
|
|
adc rdx,0
|
|
add r10,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[24+rsi]
|
|
mul rbp
|
|
add r11,rax
|
|
adc rdx,0
|
|
add r11,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[32+rsi]
|
|
mul rbp
|
|
add r12,rax
|
|
adc rdx,0
|
|
add r12,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[40+rsi]
|
|
mul rbp
|
|
add r13,rax
|
|
adc rdx,0
|
|
add r13,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[48+rsi]
|
|
mul rbp
|
|
add r14,rax
|
|
adc rdx,0
|
|
add r14,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[56+rsi]
|
|
mul rbp
|
|
add r15,rax
|
|
adc rdx,0
|
|
add r15,rbx
|
|
adc rdx,0
|
|
mov r8,rdx
|
|
mov rbp,QWORD PTR[8+rdi]
|
|
mov rax,QWORD PTR[rsi]
|
|
mul rbp
|
|
add r9,rax
|
|
adc rdx,0
|
|
mov QWORD PTR[8+rcx],r9
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[8+rsi]
|
|
mul rbp
|
|
add r10,rax
|
|
adc rdx,0
|
|
add r10,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[16+rsi]
|
|
mul rbp
|
|
add r11,rax
|
|
adc rdx,0
|
|
add r11,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[24+rsi]
|
|
mul rbp
|
|
add r12,rax
|
|
adc rdx,0
|
|
add r12,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[32+rsi]
|
|
mul rbp
|
|
add r13,rax
|
|
adc rdx,0
|
|
add r13,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[40+rsi]
|
|
mul rbp
|
|
add r14,rax
|
|
adc rdx,0
|
|
add r14,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[48+rsi]
|
|
mul rbp
|
|
add r15,rax
|
|
adc rdx,0
|
|
add r15,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[56+rsi]
|
|
mul rbp
|
|
add r8,rax
|
|
adc rdx,0
|
|
add r8,rbx
|
|
adc rdx,0
|
|
mov r9,rdx
|
|
DB 0F3h,0C3h ;repret
|
|
MULADD_128x512 ENDP
|
|
|
|
ALIGN 16
|
|
mont_reduce PROC PRIVATE
|
|
lea rdi,QWORD PTR[192+rsp]
|
|
mov rsi,QWORD PTR[32+rsp]
|
|
add rsi,576
|
|
lea rcx,QWORD PTR[520+rsp]
|
|
|
|
mov rbp,QWORD PTR[96+rcx]
|
|
mov rax,QWORD PTR[rsi]
|
|
mul rbp
|
|
mov r8,QWORD PTR[rcx]
|
|
add r8,rax
|
|
adc rdx,0
|
|
mov QWORD PTR[rdi],r8
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[8+rsi]
|
|
mul rbp
|
|
mov r9,QWORD PTR[8+rcx]
|
|
add r9,rax
|
|
adc rdx,0
|
|
add r9,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[16+rsi]
|
|
mul rbp
|
|
mov r10,QWORD PTR[16+rcx]
|
|
add r10,rax
|
|
adc rdx,0
|
|
add r10,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[24+rsi]
|
|
mul rbp
|
|
mov r11,QWORD PTR[24+rcx]
|
|
add r11,rax
|
|
adc rdx,0
|
|
add r11,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[32+rsi]
|
|
mul rbp
|
|
mov r12,QWORD PTR[32+rcx]
|
|
add r12,rax
|
|
adc rdx,0
|
|
add r12,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[40+rsi]
|
|
mul rbp
|
|
mov r13,QWORD PTR[40+rcx]
|
|
add r13,rax
|
|
adc rdx,0
|
|
add r13,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[48+rsi]
|
|
mul rbp
|
|
mov r14,QWORD PTR[48+rcx]
|
|
add r14,rax
|
|
adc rdx,0
|
|
add r14,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[56+rsi]
|
|
mul rbp
|
|
mov r15,QWORD PTR[56+rcx]
|
|
add r15,rax
|
|
adc rdx,0
|
|
add r15,rbx
|
|
adc rdx,0
|
|
mov r8,rdx
|
|
mov rbp,QWORD PTR[104+rcx]
|
|
mov rax,QWORD PTR[rsi]
|
|
mul rbp
|
|
add r9,rax
|
|
adc rdx,0
|
|
mov QWORD PTR[8+rdi],r9
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[8+rsi]
|
|
mul rbp
|
|
add r10,rax
|
|
adc rdx,0
|
|
add r10,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[16+rsi]
|
|
mul rbp
|
|
add r11,rax
|
|
adc rdx,0
|
|
add r11,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[24+rsi]
|
|
mul rbp
|
|
add r12,rax
|
|
adc rdx,0
|
|
add r12,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[32+rsi]
|
|
mul rbp
|
|
add r13,rax
|
|
adc rdx,0
|
|
add r13,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[40+rsi]
|
|
mul rbp
|
|
add r14,rax
|
|
adc rdx,0
|
|
add r14,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[48+rsi]
|
|
mul rbp
|
|
add r15,rax
|
|
adc rdx,0
|
|
add r15,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[56+rsi]
|
|
mul rbp
|
|
add r8,rax
|
|
adc rdx,0
|
|
add r8,rbx
|
|
adc rdx,0
|
|
mov r9,rdx
|
|
mov rbp,QWORD PTR[112+rcx]
|
|
mov rax,QWORD PTR[rsi]
|
|
mul rbp
|
|
add r10,rax
|
|
adc rdx,0
|
|
mov QWORD PTR[16+rdi],r10
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[8+rsi]
|
|
mul rbp
|
|
add r11,rax
|
|
adc rdx,0
|
|
add r11,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[16+rsi]
|
|
mul rbp
|
|
add r12,rax
|
|
adc rdx,0
|
|
add r12,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[24+rsi]
|
|
mul rbp
|
|
add r13,rax
|
|
adc rdx,0
|
|
add r13,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[32+rsi]
|
|
mul rbp
|
|
add r14,rax
|
|
adc rdx,0
|
|
add r14,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[40+rsi]
|
|
mul rbp
|
|
add r15,rax
|
|
adc rdx,0
|
|
add r15,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[48+rsi]
|
|
mul rbp
|
|
add r8,rax
|
|
adc rdx,0
|
|
add r8,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[56+rsi]
|
|
mul rbp
|
|
add r9,rax
|
|
adc rdx,0
|
|
add r9,rbx
|
|
adc rdx,0
|
|
mov r10,rdx
|
|
mov rbp,QWORD PTR[120+rcx]
|
|
mov rax,QWORD PTR[rsi]
|
|
mul rbp
|
|
add r11,rax
|
|
adc rdx,0
|
|
mov QWORD PTR[24+rdi],r11
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[8+rsi]
|
|
mul rbp
|
|
add r12,rax
|
|
adc rdx,0
|
|
add r12,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[16+rsi]
|
|
mul rbp
|
|
add r13,rax
|
|
adc rdx,0
|
|
add r13,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[24+rsi]
|
|
mul rbp
|
|
add r14,rax
|
|
adc rdx,0
|
|
add r14,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[32+rsi]
|
|
mul rbp
|
|
add r15,rax
|
|
adc rdx,0
|
|
add r15,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[40+rsi]
|
|
mul rbp
|
|
add r8,rax
|
|
adc rdx,0
|
|
add r8,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[48+rsi]
|
|
mul rbp
|
|
add r9,rax
|
|
adc rdx,0
|
|
add r9,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[56+rsi]
|
|
mul rbp
|
|
add r10,rax
|
|
adc rdx,0
|
|
add r10,rbx
|
|
adc rdx,0
|
|
mov r11,rdx
|
|
xor rax,rax
|
|
|
|
add r8,QWORD PTR[64+rcx]
|
|
adc r9,QWORD PTR[72+rcx]
|
|
adc r10,QWORD PTR[80+rcx]
|
|
adc r11,QWORD PTR[88+rcx]
|
|
adc rax,0
|
|
|
|
|
|
|
|
|
|
mov QWORD PTR[64+rdi],r8
|
|
mov QWORD PTR[72+rdi],r9
|
|
mov rbp,r10
|
|
mov QWORD PTR[88+rdi],r11
|
|
|
|
mov QWORD PTR[384+rsp],rax
|
|
|
|
mov r8,QWORD PTR[rdi]
|
|
mov r9,QWORD PTR[8+rdi]
|
|
mov r10,QWORD PTR[16+rdi]
|
|
mov r11,QWORD PTR[24+rdi]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
add rdi,8*10
|
|
|
|
add rsi,64
|
|
lea rcx,QWORD PTR[296+rsp]
|
|
|
|
call MULADD_128x512
|
|
|
|
mov rax,QWORD PTR[384+rsp]
|
|
|
|
|
|
add r8,QWORD PTR[((-16))+rdi]
|
|
adc r9,QWORD PTR[((-8))+rdi]
|
|
mov QWORD PTR[64+rcx],r8
|
|
mov QWORD PTR[72+rcx],r9
|
|
|
|
adc rax,rax
|
|
mov QWORD PTR[384+rsp],rax
|
|
|
|
lea rdi,QWORD PTR[192+rsp]
|
|
add rsi,64
|
|
|
|
|
|
|
|
|
|
|
|
mov r8,QWORD PTR[rsi]
|
|
mov rbx,QWORD PTR[8+rsi]
|
|
|
|
mov rax,QWORD PTR[rcx]
|
|
mul r8
|
|
mov rbp,rax
|
|
mov r9,rdx
|
|
|
|
mov rax,QWORD PTR[8+rcx]
|
|
mul r8
|
|
add r9,rax
|
|
|
|
mov rax,QWORD PTR[rcx]
|
|
mul rbx
|
|
add r9,rax
|
|
|
|
mov QWORD PTR[8+rdi],r9
|
|
|
|
|
|
sub rsi,192
|
|
|
|
mov r8,QWORD PTR[rcx]
|
|
mov r9,QWORD PTR[8+rcx]
|
|
|
|
call MULADD_128x512
|
|
|
|
|
|
|
|
|
|
mov rax,QWORD PTR[rsi]
|
|
mov rbx,QWORD PTR[8+rsi]
|
|
mov rdi,QWORD PTR[16+rsi]
|
|
mov rdx,QWORD PTR[24+rsi]
|
|
|
|
|
|
mov rbp,QWORD PTR[384+rsp]
|
|
|
|
add r8,QWORD PTR[64+rcx]
|
|
adc r9,QWORD PTR[72+rcx]
|
|
|
|
|
|
adc rbp,rbp
|
|
|
|
|
|
|
|
shl rbp,3
|
|
mov rcx,QWORD PTR[32+rsp]
|
|
add rbp,rcx
|
|
|
|
|
|
xor rsi,rsi
|
|
|
|
add r10,QWORD PTR[rbp]
|
|
adc r11,QWORD PTR[64+rbp]
|
|
adc r12,QWORD PTR[128+rbp]
|
|
adc r13,QWORD PTR[192+rbp]
|
|
adc r14,QWORD PTR[256+rbp]
|
|
adc r15,QWORD PTR[320+rbp]
|
|
adc r8,QWORD PTR[384+rbp]
|
|
adc r9,QWORD PTR[448+rbp]
|
|
|
|
|
|
|
|
sbb rsi,0
|
|
|
|
|
|
and rax,rsi
|
|
and rbx,rsi
|
|
and rdi,rsi
|
|
and rdx,rsi
|
|
|
|
mov rbp,1
|
|
sub r10,rax
|
|
sbb r11,rbx
|
|
sbb r12,rdi
|
|
sbb r13,rdx
|
|
|
|
|
|
|
|
|
|
sbb rbp,0
|
|
|
|
|
|
|
|
add rcx,512
|
|
mov rax,QWORD PTR[32+rcx]
|
|
mov rbx,QWORD PTR[40+rcx]
|
|
mov rdi,QWORD PTR[48+rcx]
|
|
mov rdx,QWORD PTR[56+rcx]
|
|
|
|
|
|
|
|
and rax,rsi
|
|
and rbx,rsi
|
|
and rdi,rsi
|
|
and rdx,rsi
|
|
|
|
|
|
|
|
sub rbp,1
|
|
|
|
sbb r14,rax
|
|
sbb r15,rbx
|
|
sbb r8,rdi
|
|
sbb r9,rdx
|
|
|
|
|
|
|
|
mov rsi,QWORD PTR[144+rsp]
|
|
mov QWORD PTR[rsi],r10
|
|
mov QWORD PTR[8+rsi],r11
|
|
mov QWORD PTR[16+rsi],r12
|
|
mov QWORD PTR[24+rsi],r13
|
|
mov QWORD PTR[32+rsi],r14
|
|
mov QWORD PTR[40+rsi],r15
|
|
mov QWORD PTR[48+rsi],r8
|
|
mov QWORD PTR[56+rsi],r9
|
|
|
|
DB 0F3h,0C3h ;repret
|
|
mont_reduce ENDP
|
|
|
|
ALIGN 16
|
|
mont_mul_a3b PROC PRIVATE
|
|
|
|
|
|
|
|
|
|
mov rbp,QWORD PTR[rdi]
|
|
|
|
mov rax,r10
|
|
mul rbp
|
|
mov QWORD PTR[520+rsp],rax
|
|
mov r10,rdx
|
|
mov rax,r11
|
|
mul rbp
|
|
add r10,rax
|
|
adc rdx,0
|
|
mov r11,rdx
|
|
mov rax,r12
|
|
mul rbp
|
|
add r11,rax
|
|
adc rdx,0
|
|
mov r12,rdx
|
|
mov rax,r13
|
|
mul rbp
|
|
add r12,rax
|
|
adc rdx,0
|
|
mov r13,rdx
|
|
mov rax,r14
|
|
mul rbp
|
|
add r13,rax
|
|
adc rdx,0
|
|
mov r14,rdx
|
|
mov rax,r15
|
|
mul rbp
|
|
add r14,rax
|
|
adc rdx,0
|
|
mov r15,rdx
|
|
mov rax,r8
|
|
mul rbp
|
|
add r15,rax
|
|
adc rdx,0
|
|
mov r8,rdx
|
|
mov rax,r9
|
|
mul rbp
|
|
add r8,rax
|
|
adc rdx,0
|
|
mov r9,rdx
|
|
mov rbp,QWORD PTR[8+rdi]
|
|
mov rax,QWORD PTR[rsi]
|
|
mul rbp
|
|
add r10,rax
|
|
adc rdx,0
|
|
mov QWORD PTR[528+rsp],r10
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[8+rsi]
|
|
mul rbp
|
|
add r11,rax
|
|
adc rdx,0
|
|
add r11,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[16+rsi]
|
|
mul rbp
|
|
add r12,rax
|
|
adc rdx,0
|
|
add r12,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[24+rsi]
|
|
mul rbp
|
|
add r13,rax
|
|
adc rdx,0
|
|
add r13,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[32+rsi]
|
|
mul rbp
|
|
add r14,rax
|
|
adc rdx,0
|
|
add r14,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[40+rsi]
|
|
mul rbp
|
|
add r15,rax
|
|
adc rdx,0
|
|
add r15,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[48+rsi]
|
|
mul rbp
|
|
add r8,rax
|
|
adc rdx,0
|
|
add r8,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[56+rsi]
|
|
mul rbp
|
|
add r9,rax
|
|
adc rdx,0
|
|
add r9,rbx
|
|
adc rdx,0
|
|
mov r10,rdx
|
|
mov rbp,QWORD PTR[16+rdi]
|
|
mov rax,QWORD PTR[rsi]
|
|
mul rbp
|
|
add r11,rax
|
|
adc rdx,0
|
|
mov QWORD PTR[536+rsp],r11
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[8+rsi]
|
|
mul rbp
|
|
add r12,rax
|
|
adc rdx,0
|
|
add r12,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[16+rsi]
|
|
mul rbp
|
|
add r13,rax
|
|
adc rdx,0
|
|
add r13,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[24+rsi]
|
|
mul rbp
|
|
add r14,rax
|
|
adc rdx,0
|
|
add r14,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[32+rsi]
|
|
mul rbp
|
|
add r15,rax
|
|
adc rdx,0
|
|
add r15,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[40+rsi]
|
|
mul rbp
|
|
add r8,rax
|
|
adc rdx,0
|
|
add r8,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[48+rsi]
|
|
mul rbp
|
|
add r9,rax
|
|
adc rdx,0
|
|
add r9,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[56+rsi]
|
|
mul rbp
|
|
add r10,rax
|
|
adc rdx,0
|
|
add r10,rbx
|
|
adc rdx,0
|
|
mov r11,rdx
|
|
mov rbp,QWORD PTR[24+rdi]
|
|
mov rax,QWORD PTR[rsi]
|
|
mul rbp
|
|
add r12,rax
|
|
adc rdx,0
|
|
mov QWORD PTR[544+rsp],r12
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[8+rsi]
|
|
mul rbp
|
|
add r13,rax
|
|
adc rdx,0
|
|
add r13,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[16+rsi]
|
|
mul rbp
|
|
add r14,rax
|
|
adc rdx,0
|
|
add r14,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[24+rsi]
|
|
mul rbp
|
|
add r15,rax
|
|
adc rdx,0
|
|
add r15,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[32+rsi]
|
|
mul rbp
|
|
add r8,rax
|
|
adc rdx,0
|
|
add r8,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[40+rsi]
|
|
mul rbp
|
|
add r9,rax
|
|
adc rdx,0
|
|
add r9,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[48+rsi]
|
|
mul rbp
|
|
add r10,rax
|
|
adc rdx,0
|
|
add r10,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[56+rsi]
|
|
mul rbp
|
|
add r11,rax
|
|
adc rdx,0
|
|
add r11,rbx
|
|
adc rdx,0
|
|
mov r12,rdx
|
|
mov rbp,QWORD PTR[32+rdi]
|
|
mov rax,QWORD PTR[rsi]
|
|
mul rbp
|
|
add r13,rax
|
|
adc rdx,0
|
|
mov QWORD PTR[552+rsp],r13
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[8+rsi]
|
|
mul rbp
|
|
add r14,rax
|
|
adc rdx,0
|
|
add r14,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[16+rsi]
|
|
mul rbp
|
|
add r15,rax
|
|
adc rdx,0
|
|
add r15,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[24+rsi]
|
|
mul rbp
|
|
add r8,rax
|
|
adc rdx,0
|
|
add r8,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[32+rsi]
|
|
mul rbp
|
|
add r9,rax
|
|
adc rdx,0
|
|
add r9,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[40+rsi]
|
|
mul rbp
|
|
add r10,rax
|
|
adc rdx,0
|
|
add r10,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[48+rsi]
|
|
mul rbp
|
|
add r11,rax
|
|
adc rdx,0
|
|
add r11,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[56+rsi]
|
|
mul rbp
|
|
add r12,rax
|
|
adc rdx,0
|
|
add r12,rbx
|
|
adc rdx,0
|
|
mov r13,rdx
|
|
mov rbp,QWORD PTR[40+rdi]
|
|
mov rax,QWORD PTR[rsi]
|
|
mul rbp
|
|
add r14,rax
|
|
adc rdx,0
|
|
mov QWORD PTR[560+rsp],r14
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[8+rsi]
|
|
mul rbp
|
|
add r15,rax
|
|
adc rdx,0
|
|
add r15,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[16+rsi]
|
|
mul rbp
|
|
add r8,rax
|
|
adc rdx,0
|
|
add r8,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[24+rsi]
|
|
mul rbp
|
|
add r9,rax
|
|
adc rdx,0
|
|
add r9,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[32+rsi]
|
|
mul rbp
|
|
add r10,rax
|
|
adc rdx,0
|
|
add r10,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[40+rsi]
|
|
mul rbp
|
|
add r11,rax
|
|
adc rdx,0
|
|
add r11,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[48+rsi]
|
|
mul rbp
|
|
add r12,rax
|
|
adc rdx,0
|
|
add r12,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[56+rsi]
|
|
mul rbp
|
|
add r13,rax
|
|
adc rdx,0
|
|
add r13,rbx
|
|
adc rdx,0
|
|
mov r14,rdx
|
|
mov rbp,QWORD PTR[48+rdi]
|
|
mov rax,QWORD PTR[rsi]
|
|
mul rbp
|
|
add r15,rax
|
|
adc rdx,0
|
|
mov QWORD PTR[568+rsp],r15
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[8+rsi]
|
|
mul rbp
|
|
add r8,rax
|
|
adc rdx,0
|
|
add r8,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[16+rsi]
|
|
mul rbp
|
|
add r9,rax
|
|
adc rdx,0
|
|
add r9,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[24+rsi]
|
|
mul rbp
|
|
add r10,rax
|
|
adc rdx,0
|
|
add r10,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[32+rsi]
|
|
mul rbp
|
|
add r11,rax
|
|
adc rdx,0
|
|
add r11,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[40+rsi]
|
|
mul rbp
|
|
add r12,rax
|
|
adc rdx,0
|
|
add r12,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[48+rsi]
|
|
mul rbp
|
|
add r13,rax
|
|
adc rdx,0
|
|
add r13,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[56+rsi]
|
|
mul rbp
|
|
add r14,rax
|
|
adc rdx,0
|
|
add r14,rbx
|
|
adc rdx,0
|
|
mov r15,rdx
|
|
mov rbp,QWORD PTR[56+rdi]
|
|
mov rax,QWORD PTR[rsi]
|
|
mul rbp
|
|
add r8,rax
|
|
adc rdx,0
|
|
mov QWORD PTR[576+rsp],r8
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[8+rsi]
|
|
mul rbp
|
|
add r9,rax
|
|
adc rdx,0
|
|
add r9,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[16+rsi]
|
|
mul rbp
|
|
add r10,rax
|
|
adc rdx,0
|
|
add r10,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[24+rsi]
|
|
mul rbp
|
|
add r11,rax
|
|
adc rdx,0
|
|
add r11,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[32+rsi]
|
|
mul rbp
|
|
add r12,rax
|
|
adc rdx,0
|
|
add r12,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[40+rsi]
|
|
mul rbp
|
|
add r13,rax
|
|
adc rdx,0
|
|
add r13,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[48+rsi]
|
|
mul rbp
|
|
add r14,rax
|
|
adc rdx,0
|
|
add r14,rbx
|
|
adc rdx,0
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[56+rsi]
|
|
mul rbp
|
|
add r15,rax
|
|
adc rdx,0
|
|
add r15,rbx
|
|
adc rdx,0
|
|
mov r8,rdx
|
|
mov QWORD PTR[584+rsp],r9
|
|
mov QWORD PTR[592+rsp],r10
|
|
mov QWORD PTR[600+rsp],r11
|
|
mov QWORD PTR[608+rsp],r12
|
|
mov QWORD PTR[616+rsp],r13
|
|
mov QWORD PTR[624+rsp],r14
|
|
mov QWORD PTR[632+rsp],r15
|
|
mov QWORD PTR[640+rsp],r8
|
|
|
|
|
|
|
|
|
|
|
|
jmp mont_reduce
|
|
|
|
|
|
mont_mul_a3b ENDP
|
|
|
|
ALIGN 16
|
|
sqr_reduce PROC PRIVATE
|
|
mov rcx,QWORD PTR[16+rsp]
|
|
|
|
|
|
|
|
mov rbx,r10
|
|
|
|
mov rax,r11
|
|
mul rbx
|
|
mov QWORD PTR[528+rsp],rax
|
|
mov r10,rdx
|
|
mov rax,r12
|
|
mul rbx
|
|
add r10,rax
|
|
adc rdx,0
|
|
mov r11,rdx
|
|
mov rax,r13
|
|
mul rbx
|
|
add r11,rax
|
|
adc rdx,0
|
|
mov r12,rdx
|
|
mov rax,r14
|
|
mul rbx
|
|
add r12,rax
|
|
adc rdx,0
|
|
mov r13,rdx
|
|
mov rax,r15
|
|
mul rbx
|
|
add r13,rax
|
|
adc rdx,0
|
|
mov r14,rdx
|
|
mov rax,r8
|
|
mul rbx
|
|
add r14,rax
|
|
adc rdx,0
|
|
mov r15,rdx
|
|
mov rax,r9
|
|
mul rbx
|
|
add r15,rax
|
|
adc rdx,0
|
|
mov rsi,rdx
|
|
|
|
mov QWORD PTR[536+rsp],r10
|
|
|
|
|
|
|
|
|
|
|
|
mov rbx,QWORD PTR[8+rcx]
|
|
|
|
mov rax,QWORD PTR[16+rcx]
|
|
mul rbx
|
|
add r11,rax
|
|
adc rdx,0
|
|
mov QWORD PTR[544+rsp],r11
|
|
|
|
mov r10,rdx
|
|
mov rax,QWORD PTR[24+rcx]
|
|
mul rbx
|
|
add r12,rax
|
|
adc rdx,0
|
|
add r12,r10
|
|
adc rdx,0
|
|
mov QWORD PTR[552+rsp],r12
|
|
|
|
mov r10,rdx
|
|
mov rax,QWORD PTR[32+rcx]
|
|
mul rbx
|
|
add r13,rax
|
|
adc rdx,0
|
|
add r13,r10
|
|
adc rdx,0
|
|
|
|
mov r10,rdx
|
|
mov rax,QWORD PTR[40+rcx]
|
|
mul rbx
|
|
add r14,rax
|
|
adc rdx,0
|
|
add r14,r10
|
|
adc rdx,0
|
|
|
|
mov r10,rdx
|
|
mov rax,r8
|
|
mul rbx
|
|
add r15,rax
|
|
adc rdx,0
|
|
add r15,r10
|
|
adc rdx,0
|
|
|
|
mov r10,rdx
|
|
mov rax,r9
|
|
mul rbx
|
|
add rsi,rax
|
|
adc rdx,0
|
|
add rsi,r10
|
|
adc rdx,0
|
|
|
|
mov r11,rdx
|
|
|
|
|
|
|
|
|
|
mov rbx,QWORD PTR[16+rcx]
|
|
|
|
mov rax,QWORD PTR[24+rcx]
|
|
mul rbx
|
|
add r13,rax
|
|
adc rdx,0
|
|
mov QWORD PTR[560+rsp],r13
|
|
|
|
mov r10,rdx
|
|
mov rax,QWORD PTR[32+rcx]
|
|
mul rbx
|
|
add r14,rax
|
|
adc rdx,0
|
|
add r14,r10
|
|
adc rdx,0
|
|
mov QWORD PTR[568+rsp],r14
|
|
|
|
mov r10,rdx
|
|
mov rax,QWORD PTR[40+rcx]
|
|
mul rbx
|
|
add r15,rax
|
|
adc rdx,0
|
|
add r15,r10
|
|
adc rdx,0
|
|
|
|
mov r10,rdx
|
|
mov rax,r8
|
|
mul rbx
|
|
add rsi,rax
|
|
adc rdx,0
|
|
add rsi,r10
|
|
adc rdx,0
|
|
|
|
mov r10,rdx
|
|
mov rax,r9
|
|
mul rbx
|
|
add r11,rax
|
|
adc rdx,0
|
|
add r11,r10
|
|
adc rdx,0
|
|
|
|
mov r12,rdx
|
|
|
|
|
|
|
|
|
|
|
|
mov rbx,QWORD PTR[24+rcx]
|
|
|
|
mov rax,QWORD PTR[32+rcx]
|
|
mul rbx
|
|
add r15,rax
|
|
adc rdx,0
|
|
mov QWORD PTR[576+rsp],r15
|
|
|
|
mov r10,rdx
|
|
mov rax,QWORD PTR[40+rcx]
|
|
mul rbx
|
|
add rsi,rax
|
|
adc rdx,0
|
|
add rsi,r10
|
|
adc rdx,0
|
|
mov QWORD PTR[584+rsp],rsi
|
|
|
|
mov r10,rdx
|
|
mov rax,r8
|
|
mul rbx
|
|
add r11,rax
|
|
adc rdx,0
|
|
add r11,r10
|
|
adc rdx,0
|
|
|
|
mov r10,rdx
|
|
mov rax,r9
|
|
mul rbx
|
|
add r12,rax
|
|
adc rdx,0
|
|
add r12,r10
|
|
adc rdx,0
|
|
|
|
mov r15,rdx
|
|
|
|
|
|
|
|
|
|
mov rbx,QWORD PTR[32+rcx]
|
|
|
|
mov rax,QWORD PTR[40+rcx]
|
|
mul rbx
|
|
add r11,rax
|
|
adc rdx,0
|
|
mov QWORD PTR[592+rsp],r11
|
|
|
|
mov r10,rdx
|
|
mov rax,r8
|
|
mul rbx
|
|
add r12,rax
|
|
adc rdx,0
|
|
add r12,r10
|
|
adc rdx,0
|
|
mov QWORD PTR[600+rsp],r12
|
|
|
|
mov r10,rdx
|
|
mov rax,r9
|
|
mul rbx
|
|
add r15,rax
|
|
adc rdx,0
|
|
add r15,r10
|
|
adc rdx,0
|
|
|
|
mov r11,rdx
|
|
|
|
|
|
|
|
|
|
mov rbx,QWORD PTR[40+rcx]
|
|
|
|
mov rax,r8
|
|
mul rbx
|
|
add r15,rax
|
|
adc rdx,0
|
|
mov QWORD PTR[608+rsp],r15
|
|
|
|
mov r10,rdx
|
|
mov rax,r9
|
|
mul rbx
|
|
add r11,rax
|
|
adc rdx,0
|
|
add r11,r10
|
|
adc rdx,0
|
|
mov QWORD PTR[616+rsp],r11
|
|
|
|
mov r12,rdx
|
|
|
|
|
|
|
|
|
|
mov rbx,r8
|
|
|
|
mov rax,r9
|
|
mul rbx
|
|
add r12,rax
|
|
adc rdx,0
|
|
mov QWORD PTR[624+rsp],r12
|
|
|
|
mov QWORD PTR[632+rsp],rdx
|
|
|
|
|
|
mov r10,QWORD PTR[528+rsp]
|
|
mov r11,QWORD PTR[536+rsp]
|
|
mov r12,QWORD PTR[544+rsp]
|
|
mov r13,QWORD PTR[552+rsp]
|
|
mov r14,QWORD PTR[560+rsp]
|
|
mov r15,QWORD PTR[568+rsp]
|
|
|
|
mov rax,QWORD PTR[24+rcx]
|
|
mul rax
|
|
mov rdi,rax
|
|
mov r8,rdx
|
|
|
|
add r10,r10
|
|
adc r11,r11
|
|
adc r12,r12
|
|
adc r13,r13
|
|
adc r14,r14
|
|
adc r15,r15
|
|
adc r8,0
|
|
|
|
mov rax,QWORD PTR[rcx]
|
|
mul rax
|
|
mov QWORD PTR[520+rsp],rax
|
|
mov rbx,rdx
|
|
|
|
mov rax,QWORD PTR[8+rcx]
|
|
mul rax
|
|
|
|
add r10,rbx
|
|
adc r11,rax
|
|
adc rdx,0
|
|
|
|
mov rbx,rdx
|
|
mov QWORD PTR[528+rsp],r10
|
|
mov QWORD PTR[536+rsp],r11
|
|
|
|
mov rax,QWORD PTR[16+rcx]
|
|
mul rax
|
|
|
|
add r12,rbx
|
|
adc r13,rax
|
|
adc rdx,0
|
|
|
|
mov rbx,rdx
|
|
|
|
mov QWORD PTR[544+rsp],r12
|
|
mov QWORD PTR[552+rsp],r13
|
|
|
|
xor rbp,rbp
|
|
add r14,rbx
|
|
adc r15,rdi
|
|
adc rbp,0
|
|
|
|
mov QWORD PTR[560+rsp],r14
|
|
mov QWORD PTR[568+rsp],r15
|
|
|
|
|
|
|
|
|
|
mov r10,QWORD PTR[576+rsp]
|
|
mov r11,QWORD PTR[584+rsp]
|
|
mov r12,QWORD PTR[592+rsp]
|
|
mov r13,QWORD PTR[600+rsp]
|
|
mov r14,QWORD PTR[608+rsp]
|
|
mov r15,QWORD PTR[616+rsp]
|
|
mov rdi,QWORD PTR[624+rsp]
|
|
mov rsi,QWORD PTR[632+rsp]
|
|
|
|
mov rax,r9
|
|
mul rax
|
|
mov r9,rax
|
|
mov rbx,rdx
|
|
|
|
add r10,r10
|
|
adc r11,r11
|
|
adc r12,r12
|
|
adc r13,r13
|
|
adc r14,r14
|
|
adc r15,r15
|
|
adc rdi,rdi
|
|
adc rsi,rsi
|
|
adc rbx,0
|
|
|
|
add r10,rbp
|
|
|
|
mov rax,QWORD PTR[32+rcx]
|
|
mul rax
|
|
|
|
add r10,r8
|
|
adc r11,rax
|
|
adc rdx,0
|
|
|
|
mov rbp,rdx
|
|
|
|
mov QWORD PTR[576+rsp],r10
|
|
mov QWORD PTR[584+rsp],r11
|
|
|
|
mov rax,QWORD PTR[40+rcx]
|
|
mul rax
|
|
|
|
add r12,rbp
|
|
adc r13,rax
|
|
adc rdx,0
|
|
|
|
mov rbp,rdx
|
|
|
|
mov QWORD PTR[592+rsp],r12
|
|
mov QWORD PTR[600+rsp],r13
|
|
|
|
mov rax,QWORD PTR[48+rcx]
|
|
mul rax
|
|
|
|
add r14,rbp
|
|
adc r15,rax
|
|
adc rdx,0
|
|
|
|
mov QWORD PTR[608+rsp],r14
|
|
mov QWORD PTR[616+rsp],r15
|
|
|
|
add rdi,rdx
|
|
adc rsi,r9
|
|
adc rbx,0
|
|
|
|
mov QWORD PTR[624+rsp],rdi
|
|
mov QWORD PTR[632+rsp],rsi
|
|
mov QWORD PTR[640+rsp],rbx
|
|
|
|
jmp mont_reduce
|
|
|
|
|
|
sqr_reduce ENDP
|
|
PUBLIC mod_exp_512
|
|
|
|
mod_exp_512 PROC PUBLIC
|
|
mov QWORD PTR[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD PTR[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_mod_exp_512::
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
mov rcx,r9
|
|
|
|
|
|
push rbp
|
|
push rbx
|
|
push r12
|
|
push r13
|
|
push r14
|
|
push r15
|
|
|
|
|
|
mov r8,rsp
|
|
sub rsp,2688
|
|
and rsp,-64
|
|
|
|
|
|
mov QWORD PTR[rsp],r8
|
|
mov QWORD PTR[8+rsp],rdi
|
|
mov QWORD PTR[16+rsp],rsi
|
|
mov QWORD PTR[24+rsp],rcx
|
|
$L$body::
|
|
|
|
|
|
|
|
pxor xmm4,xmm4
|
|
movdqu xmm0,XMMWORD PTR[rsi]
|
|
movdqu xmm1,XMMWORD PTR[16+rsi]
|
|
movdqu xmm2,XMMWORD PTR[32+rsi]
|
|
movdqu xmm3,XMMWORD PTR[48+rsi]
|
|
movdqa XMMWORD PTR[512+rsp],xmm4
|
|
movdqa XMMWORD PTR[528+rsp],xmm4
|
|
movdqa XMMWORD PTR[608+rsp],xmm4
|
|
movdqa XMMWORD PTR[624+rsp],xmm4
|
|
movdqa XMMWORD PTR[544+rsp],xmm0
|
|
movdqa XMMWORD PTR[560+rsp],xmm1
|
|
movdqa XMMWORD PTR[576+rsp],xmm2
|
|
movdqa XMMWORD PTR[592+rsp],xmm3
|
|
|
|
|
|
movdqu xmm0,XMMWORD PTR[rdx]
|
|
movdqu xmm1,XMMWORD PTR[16+rdx]
|
|
movdqu xmm2,XMMWORD PTR[32+rdx]
|
|
movdqu xmm3,XMMWORD PTR[48+rdx]
|
|
|
|
lea rbx,QWORD PTR[384+rsp]
|
|
mov QWORD PTR[136+rsp],rbx
|
|
call mont_reduce
|
|
|
|
|
|
lea rcx,QWORD PTR[448+rsp]
|
|
xor rax,rax
|
|
mov QWORD PTR[rcx],rax
|
|
mov QWORD PTR[8+rcx],rax
|
|
mov QWORD PTR[24+rcx],rax
|
|
mov QWORD PTR[32+rcx],rax
|
|
mov QWORD PTR[40+rcx],rax
|
|
mov QWORD PTR[48+rcx],rax
|
|
mov QWORD PTR[56+rcx],rax
|
|
mov QWORD PTR[128+rsp],rax
|
|
mov QWORD PTR[16+rcx],1
|
|
|
|
lea rbp,QWORD PTR[640+rsp]
|
|
mov rsi,rcx
|
|
mov rdi,rbp
|
|
mov rax,8
|
|
loop_0::
|
|
mov rbx,QWORD PTR[rcx]
|
|
mov WORD PTR[rdi],bx
|
|
shr rbx,16
|
|
mov WORD PTR[64+rdi],bx
|
|
shr rbx,16
|
|
mov WORD PTR[128+rdi],bx
|
|
shr rbx,16
|
|
mov WORD PTR[192+rdi],bx
|
|
lea rcx,QWORD PTR[8+rcx]
|
|
lea rdi,QWORD PTR[256+rdi]
|
|
dec rax
|
|
jnz loop_0
|
|
mov rax,31
|
|
mov QWORD PTR[32+rsp],rax
|
|
mov QWORD PTR[40+rsp],rbp
|
|
|
|
mov QWORD PTR[136+rsp],rsi
|
|
mov r10,QWORD PTR[rsi]
|
|
mov r11,QWORD PTR[8+rsi]
|
|
mov r12,QWORD PTR[16+rsi]
|
|
mov r13,QWORD PTR[24+rsi]
|
|
mov r14,QWORD PTR[32+rsi]
|
|
mov r15,QWORD PTR[40+rsi]
|
|
mov r8,QWORD PTR[48+rsi]
|
|
mov r9,QWORD PTR[56+rsi]
|
|
init_loop::
|
|
lea rdi,QWORD PTR[384+rsp]
|
|
call mont_mul_a3b
|
|
lea rsi,QWORD PTR[448+rsp]
|
|
mov rbp,QWORD PTR[40+rsp]
|
|
add rbp,2
|
|
mov QWORD PTR[40+rsp],rbp
|
|
mov rcx,rsi
|
|
mov rax,8
|
|
loop_1::
|
|
mov rbx,QWORD PTR[rcx]
|
|
mov WORD PTR[rbp],bx
|
|
shr rbx,16
|
|
mov WORD PTR[64+rbp],bx
|
|
shr rbx,16
|
|
mov WORD PTR[128+rbp],bx
|
|
shr rbx,16
|
|
mov WORD PTR[192+rbp],bx
|
|
lea rcx,QWORD PTR[8+rcx]
|
|
lea rbp,QWORD PTR[256+rbp]
|
|
dec rax
|
|
jnz loop_1
|
|
mov rax,QWORD PTR[32+rsp]
|
|
sub rax,1
|
|
mov QWORD PTR[32+rsp],rax
|
|
jne init_loop
|
|
|
|
|
|
|
|
movdqa XMMWORD PTR[64+rsp],xmm0
|
|
movdqa XMMWORD PTR[80+rsp],xmm1
|
|
movdqa XMMWORD PTR[96+rsp],xmm2
|
|
movdqa XMMWORD PTR[112+rsp],xmm3
|
|
|
|
|
|
|
|
|
|
|
|
mov eax,DWORD PTR[126+rsp]
|
|
mov rdx,rax
|
|
shr rax,11
|
|
and edx,007FFh
|
|
mov DWORD PTR[126+rsp],edx
|
|
lea rsi,QWORD PTR[640+rax*2+rsp]
|
|
mov rdx,QWORD PTR[8+rsp]
|
|
mov rbp,4
|
|
loop_2::
|
|
movzx rbx,WORD PTR[192+rsi]
|
|
movzx rax,WORD PTR[448+rsi]
|
|
shl rbx,16
|
|
shl rax,16
|
|
mov bx,WORD PTR[128+rsi]
|
|
mov ax,WORD PTR[384+rsi]
|
|
shl rbx,16
|
|
shl rax,16
|
|
mov bx,WORD PTR[64+rsi]
|
|
mov ax,WORD PTR[320+rsi]
|
|
shl rbx,16
|
|
shl rax,16
|
|
mov bx,WORD PTR[rsi]
|
|
mov ax,WORD PTR[256+rsi]
|
|
mov QWORD PTR[rdx],rbx
|
|
mov QWORD PTR[8+rdx],rax
|
|
lea rsi,QWORD PTR[512+rsi]
|
|
lea rdx,QWORD PTR[16+rdx]
|
|
sub rbp,1
|
|
jnz loop_2
|
|
mov QWORD PTR[48+rsp],505
|
|
|
|
mov rcx,QWORD PTR[8+rsp]
|
|
mov QWORD PTR[136+rsp],rcx
|
|
mov r10,QWORD PTR[rcx]
|
|
mov r11,QWORD PTR[8+rcx]
|
|
mov r12,QWORD PTR[16+rcx]
|
|
mov r13,QWORD PTR[24+rcx]
|
|
mov r14,QWORD PTR[32+rcx]
|
|
mov r15,QWORD PTR[40+rcx]
|
|
mov r8,QWORD PTR[48+rcx]
|
|
mov r9,QWORD PTR[56+rcx]
|
|
jmp sqr_2
|
|
|
|
main_loop_a3b::
|
|
call sqr_reduce
|
|
call sqr_reduce
|
|
call sqr_reduce
|
|
sqr_2::
|
|
call sqr_reduce
|
|
call sqr_reduce
|
|
|
|
|
|
|
|
mov rcx,QWORD PTR[48+rsp]
|
|
mov rax,rcx
|
|
shr rax,4
|
|
mov edx,DWORD PTR[64+rax*2+rsp]
|
|
and rcx,15
|
|
shr rdx,cl
|
|
and rdx,01Fh
|
|
|
|
lea rsi,QWORD PTR[640+rdx*2+rsp]
|
|
lea rdx,QWORD PTR[448+rsp]
|
|
mov rdi,rdx
|
|
mov rbp,4
|
|
loop_3::
|
|
movzx rbx,WORD PTR[192+rsi]
|
|
movzx rax,WORD PTR[448+rsi]
|
|
shl rbx,16
|
|
shl rax,16
|
|
mov bx,WORD PTR[128+rsi]
|
|
mov ax,WORD PTR[384+rsi]
|
|
shl rbx,16
|
|
shl rax,16
|
|
mov bx,WORD PTR[64+rsi]
|
|
mov ax,WORD PTR[320+rsi]
|
|
shl rbx,16
|
|
shl rax,16
|
|
mov bx,WORD PTR[rsi]
|
|
mov ax,WORD PTR[256+rsi]
|
|
mov QWORD PTR[rdx],rbx
|
|
mov QWORD PTR[8+rdx],rax
|
|
lea rsi,QWORD PTR[512+rsi]
|
|
lea rdx,QWORD PTR[16+rdx]
|
|
sub rbp,1
|
|
jnz loop_3
|
|
mov rsi,QWORD PTR[8+rsp]
|
|
call mont_mul_a3b
|
|
|
|
|
|
|
|
mov rcx,QWORD PTR[48+rsp]
|
|
sub rcx,5
|
|
mov QWORD PTR[48+rsp],rcx
|
|
jge main_loop_a3b
|
|
|
|
|
|
|
|
end_main_loop_a3b::
|
|
|
|
|
|
mov rdx,QWORD PTR[8+rsp]
|
|
pxor xmm4,xmm4
|
|
movdqu xmm0,XMMWORD PTR[rdx]
|
|
movdqu xmm1,XMMWORD PTR[16+rdx]
|
|
movdqu xmm2,XMMWORD PTR[32+rdx]
|
|
movdqu xmm3,XMMWORD PTR[48+rdx]
|
|
movdqa XMMWORD PTR[576+rsp],xmm4
|
|
movdqa XMMWORD PTR[592+rsp],xmm4
|
|
movdqa XMMWORD PTR[608+rsp],xmm4
|
|
movdqa XMMWORD PTR[624+rsp],xmm4
|
|
movdqa XMMWORD PTR[512+rsp],xmm0
|
|
movdqa XMMWORD PTR[528+rsp],xmm1
|
|
movdqa XMMWORD PTR[544+rsp],xmm2
|
|
movdqa XMMWORD PTR[560+rsp],xmm3
|
|
call mont_reduce
|
|
|
|
|
|
|
|
mov rax,QWORD PTR[8+rsp]
|
|
mov r8,QWORD PTR[rax]
|
|
mov r9,QWORD PTR[8+rax]
|
|
mov r10,QWORD PTR[16+rax]
|
|
mov r11,QWORD PTR[24+rax]
|
|
mov r12,QWORD PTR[32+rax]
|
|
mov r13,QWORD PTR[40+rax]
|
|
mov r14,QWORD PTR[48+rax]
|
|
mov r15,QWORD PTR[56+rax]
|
|
|
|
|
|
mov rbx,QWORD PTR[24+rsp]
|
|
add rbx,512
|
|
|
|
sub r8,QWORD PTR[rbx]
|
|
sbb r9,QWORD PTR[8+rbx]
|
|
sbb r10,QWORD PTR[16+rbx]
|
|
sbb r11,QWORD PTR[24+rbx]
|
|
sbb r12,QWORD PTR[32+rbx]
|
|
sbb r13,QWORD PTR[40+rbx]
|
|
sbb r14,QWORD PTR[48+rbx]
|
|
sbb r15,QWORD PTR[56+rbx]
|
|
|
|
|
|
mov rsi,QWORD PTR[rax]
|
|
mov rdi,QWORD PTR[8+rax]
|
|
mov rcx,QWORD PTR[16+rax]
|
|
mov rdx,QWORD PTR[24+rax]
|
|
cmovnc rsi,r8
|
|
cmovnc rdi,r9
|
|
cmovnc rcx,r10
|
|
cmovnc rdx,r11
|
|
mov QWORD PTR[rax],rsi
|
|
mov QWORD PTR[8+rax],rdi
|
|
mov QWORD PTR[16+rax],rcx
|
|
mov QWORD PTR[24+rax],rdx
|
|
|
|
mov rsi,QWORD PTR[32+rax]
|
|
mov rdi,QWORD PTR[40+rax]
|
|
mov rcx,QWORD PTR[48+rax]
|
|
mov rdx,QWORD PTR[56+rax]
|
|
cmovnc rsi,r12
|
|
cmovnc rdi,r13
|
|
cmovnc rcx,r14
|
|
cmovnc rdx,r15
|
|
mov QWORD PTR[32+rax],rsi
|
|
mov QWORD PTR[40+rax],rdi
|
|
mov QWORD PTR[48+rax],rcx
|
|
mov QWORD PTR[56+rax],rdx
|
|
|
|
mov rsi,QWORD PTR[rsp]
|
|
mov r15,QWORD PTR[rsi]
|
|
mov r14,QWORD PTR[8+rsi]
|
|
mov r13,QWORD PTR[16+rsi]
|
|
mov r12,QWORD PTR[24+rsi]
|
|
mov rbx,QWORD PTR[32+rsi]
|
|
mov rbp,QWORD PTR[40+rsi]
|
|
lea rsp,QWORD PTR[48+rsi]
|
|
$L$epilogue::
|
|
mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD PTR[16+rsp]
|
|
DB 0F3h,0C3h ;repret
|
|
$L$SEH_end_mod_exp_512::
|
|
mod_exp_512 ENDP
|
|
|
|
.text$ ENDS
|
|
END
|
|
|