1#ifdef IN_SANDY2X 2 3/* 4 This file is the result of merging 5 amd64-51/fe25519_pack.c and amd64-51/fe25519_freeze.s. 6*/ 7#include "fe51_namespace.h" 8#include "consts_namespace.h" 9.p2align 5 10 11#ifdef ASM_HIDE_SYMBOL 12ASM_HIDE_SYMBOL fe51_pack 13ASM_HIDE_SYMBOL _fe51_pack 14#endif 15.globl fe51_pack 16.globl _fe51_pack 17#ifdef __ELF__ 18.type fe51_pack, @function 19.type _fe51_pack, @function 20#endif 21fe51_pack: 22_fe51_pack: 23 24mov %rsp,%r11 25and $31,%r11 26add $32,%r11 27sub %r11,%rsp 28movq %r11,0(%rsp) 29movq %r12,8(%rsp) 30movq 0(%rsi),%rdx 31movq 8(%rsi),%rcx 32movq 16(%rsi),%r8 33movq 24(%rsi),%r9 34movq 32(%rsi),%rsi 35movq REDMASK51(%rip),%rax 36lea -18(%rax),%r10 37mov $3,%r11 38 39.p2align 4 40._reduceloop: 41mov %rdx,%r12 42shr $51,%r12 43and %rax,%rdx 44add %r12,%rcx 45mov %rcx,%r12 46shr $51,%r12 47and %rax,%rcx 48add %r12,%r8 49mov %r8,%r12 50shr $51,%r12 51and %rax,%r8 52add %r12,%r9 53mov %r9,%r12 54shr $51,%r12 55and %rax,%r9 56add %r12,%rsi 57mov %rsi,%r12 58shr $51,%r12 59and %rax,%rsi 60imulq $19, %r12,%r12 61add %r12,%rdx 62sub $1,%r11 63ja ._reduceloop 64 65mov $1,%r12 66cmp %r10,%rdx 67cmovl %r11,%r12 68cmp %rax,%rcx 69cmovne %r11,%r12 70cmp %rax,%r8 71cmovne %r11,%r12 72cmp %rax,%r9 73cmovne %r11,%r12 74cmp %rax,%rsi 75cmovne %r11,%r12 76neg %r12 77and %r12,%rax 78and %r12,%r10 79sub %r10,%rdx 80sub %rax,%rcx 81sub %rax,%r8 82sub %rax,%r9 83sub %rax,%rsi 84mov %rdx,%rax 85and $0xFF,%eax 86movb %al,0(%rdi) 87mov %rdx,%rax 88shr $8,%rax 89and $0xFF,%eax 90movb %al,1(%rdi) 91mov %rdx,%rax 92shr $16,%rax 93and $0xFF,%eax 94movb %al,2(%rdi) 95mov %rdx,%rax 96shr $24,%rax 97and $0xFF,%eax 98movb %al,3(%rdi) 99mov %rdx,%rax 100shr $32,%rax 101and $0xFF,%eax 102movb %al,4(%rdi) 103mov %rdx,%rax 104shr $40,%rax 105and $0xFF,%eax 106movb %al,5(%rdi) 107mov %rdx,%rdx 108shr $48,%rdx 109mov %rcx,%rax 110shl $3,%rax 111and $0xF8,%eax 112xor %rdx,%rax 113movb %al,6(%rdi) 114mov %rcx,%rdx 115shr $5,%rdx 116and $0xFF,%edx 117movb %dl,7(%rdi) 118mov %rcx,%rdx 119shr $13,%rdx 120and $0xFF,%edx 121movb %dl,8(%rdi) 122mov %rcx,%rdx 123shr $21,%rdx 124and $0xFF,%edx 125movb %dl,9(%rdi) 126mov %rcx,%rdx 127shr $29,%rdx 128and $0xFF,%edx 129movb %dl,10(%rdi) 130mov %rcx,%rdx 131shr $37,%rdx 132and $0xFF,%edx 133movb %dl,11(%rdi) 134mov %rcx,%rdx 135shr $45,%rdx 136mov %r8,%rcx 137shl $6,%rcx 138and $0xC0,%ecx 139xor %rdx,%rcx 140movb %cl,12(%rdi) 141mov %r8,%rdx 142shr $2,%rdx 143and $0xFF,%edx 144movb %dl,13(%rdi) 145mov %r8,%rdx 146shr $10,%rdx 147and $0xFF,%edx 148movb %dl,14(%rdi) 149mov %r8,%rdx 150shr $18,%rdx 151and $0xFF,%edx 152movb %dl,15(%rdi) 153mov %r8,%rdx 154shr $26,%rdx 155and $0xFF,%edx 156movb %dl,16(%rdi) 157mov %r8,%rdx 158shr $34,%rdx 159and $0xFF,%edx 160movb %dl,17(%rdi) 161mov %r8,%rdx 162shr $42,%rdx 163movb %dl,18(%rdi) 164mov %r8,%rdx 165shr $50,%rdx 166mov %r9,%rcx 167shl $1,%rcx 168and $0xFE,%ecx 169xor %rdx,%rcx 170movb %cl,19(%rdi) 171mov %r9,%rdx 172shr $7,%rdx 173and $0xFF,%edx 174movb %dl,20(%rdi) 175mov %r9,%rdx 176shr $15,%rdx 177and $0xFF,%edx 178movb %dl,21(%rdi) 179mov %r9,%rdx 180shr $23,%rdx 181and $0xFF,%edx 182movb %dl,22(%rdi) 183mov %r9,%rdx 184shr $31,%rdx 185and $0xFF,%edx 186movb %dl,23(%rdi) 187mov %r9,%rdx 188shr $39,%rdx 189and $0xFF,%edx 190movb %dl,24(%rdi) 191mov %r9,%rdx 192shr $47,%rdx 193mov %rsi,%rcx 194shl $4,%rcx 195and $0xF0,%ecx 196xor %rdx,%rcx 197movb %cl,25(%rdi) 198mov %rsi,%rdx 199shr $4,%rdx 200and $0xFF,%edx 201movb %dl,26(%rdi) 202mov %rsi,%rdx 203shr $12,%rdx 204and $0xFF,%edx 205movb %dl,27(%rdi) 206mov %rsi,%rdx 207shr $20,%rdx 208and $0xFF,%edx 209movb %dl,28(%rdi) 210mov %rsi,%rdx 211shr $28,%rdx 212and $0xFF,%edx 213movb %dl,29(%rdi) 214mov %rsi,%rdx 215shr $36,%rdx 216and $0xFF,%edx 217movb %dl,30(%rdi) 218mov %rsi,%rsi 219shr $44,%rsi 220movb %sil,31(%rdi) 221movq 0(%rsp),%r11 222movq 8(%rsp),%r12 223add %r11,%rsp 224ret 225 226#endif 227