1160814Ssimon#!/usr/bin/env perl 2160814Ssimon 3238405Sjkim$flavour = shift; 4238405Sjkim$output = shift; 5238405Sjkimif ($flavour =~ /\./) { $output = $flavour; undef $flavour; } 6160814Ssimon 7238405Sjkim$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); 8160814Ssimon 9238405Sjkim$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 10238405Sjkim( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or 11238405Sjkim( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or 12238405Sjkimdie "can't locate x86_64-xlate.pl"; 13160814Ssimon 14246772Sjkimopen OUT,"| \"$^X\" $xlate $flavour $output"; 15246772Sjkim*STDOUT=*OUT; 16160814Ssimon 17238405Sjkim($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order 18238405Sjkim ("%rdi","%rsi","%rdx","%rcx"); # Unix order 19194206Ssimon 20238405Sjkimprint<<___; 21238405Sjkim.extern OPENSSL_cpuid_setup 22238405Sjkim.hidden OPENSSL_cpuid_setup 23238405Sjkim.section .init 24238405Sjkim call OPENSSL_cpuid_setup 25238405Sjkim 26238405Sjkim.hidden OPENSSL_ia32cap_P 27238405Sjkim.comm OPENSSL_ia32cap_P,8,4 28238405Sjkim 29160814Ssimon.text 30160814Ssimon 31160814Ssimon.globl OPENSSL_atomic_add 32238405Sjkim.type OPENSSL_atomic_add,\@abi-omnipotent 33160814Ssimon.align 16 34160814SsimonOPENSSL_atomic_add: 35238405Sjkim movl ($arg1),%eax 36238405Sjkim.Lspin: leaq ($arg2,%rax),%r8 37238405Sjkim .byte 0xf0 # lock 38238405Sjkim cmpxchgl %r8d,($arg1) 39160814Ssimon jne .Lspin 40160814Ssimon movl %r8d,%eax 41238405Sjkim .byte 0x48,0x98 # cltq/cdqe 42160814Ssimon ret 43160814Ssimon.size OPENSSL_atomic_add,.-OPENSSL_atomic_add 44160814Ssimon 45194206Ssimon.globl OPENSSL_rdtsc 46194206Ssimon.type OPENSSL_rdtsc,\@abi-omnipotent 47194206Ssimon.align 16 48194206SsimonOPENSSL_rdtsc: 49194206Ssimon rdtsc 50194206Ssimon shl \$32,%rdx 51194206Ssimon or %rdx,%rax 52194206Ssimon ret 53194206Ssimon.size OPENSSL_rdtsc,.-OPENSSL_rdtsc 54194206Ssimon 55160814Ssimon.globl OPENSSL_ia32_cpuid 56194206Ssimon.type OPENSSL_ia32_cpuid,\@abi-omnipotent 57160814Ssimon.align 16 58160814SsimonOPENSSL_ia32_cpuid: 59238405Sjkim mov %rbx,%r8 # save %rbx 60194206Ssimon 61194206Ssimon xor %eax,%eax 62160814Ssimon cpuid 63238405Sjkim mov %eax,%r11d # max value for standard query level 64238405Sjkim 65194206Ssimon xor %eax,%eax 66194206Ssimon cmp \$0x756e6547,%ebx # "Genu" 67194206Ssimon setne %al 68194206Ssimon mov %eax,%r9d 69194206Ssimon cmp \$0x49656e69,%edx # "ineI" 70194206Ssimon setne %al 71194206Ssimon or %eax,%r9d 72194206Ssimon cmp \$0x6c65746e,%ecx # "ntel" 73194206Ssimon setne %al 74238405Sjkim or %eax,%r9d # 0 indicates Intel CPU 75238405Sjkim jz .Lintel 76194206Ssimon 77238405Sjkim cmp \$0x68747541,%ebx # "Auth" 78238405Sjkim setne %al 79238405Sjkim mov %eax,%r10d 80238405Sjkim cmp \$0x69746E65,%edx # "enti" 81238405Sjkim setne %al 82238405Sjkim or %eax,%r10d 83238405Sjkim cmp \$0x444D4163,%ecx # "cAMD" 84238405Sjkim setne %al 85238405Sjkim or %eax,%r10d # 0 indicates AMD CPU 86238405Sjkim jnz .Lintel 87238405Sjkim 88238405Sjkim # AMD specific 89238405Sjkim mov \$0x80000000,%eax 90238405Sjkim cpuid 91238405Sjkim cmp \$0x80000001,%eax 92238405Sjkim jb .Lintel 93238405Sjkim mov %eax,%r10d 94238405Sjkim mov \$0x80000001,%eax 95238405Sjkim cpuid 96238405Sjkim or %ecx,%r9d 97238405Sjkim and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 98238405Sjkim 99238405Sjkim cmp \$0x80000008,%r10d 100238405Sjkim jb .Lintel 101238405Sjkim 102238405Sjkim mov \$0x80000008,%eax 103238405Sjkim cpuid 104238405Sjkim movzb %cl,%r10 # number of cores - 1 105238405Sjkim inc %r10 # number of cores 106238405Sjkim 107194206Ssimon mov \$1,%eax 108194206Ssimon cpuid 109238405Sjkim bt \$28,%edx # test hyper-threading bit 110238405Sjkim jnc .Lgeneric 111238405Sjkim shr \$16,%ebx # number of logical processors 112238405Sjkim cmp %r10b,%bl 113238405Sjkim ja .Lgeneric 114238405Sjkim and \$0xefffffff,%edx # ~(1<<28) 115238405Sjkim jmp .Lgeneric 116238405Sjkim 117238405Sjkim.Lintel: 118238405Sjkim cmp \$4,%r11d 119238405Sjkim mov \$-1,%r10d 120238405Sjkim jb .Lnocacheinfo 121238405Sjkim 122238405Sjkim mov \$4,%eax 123238405Sjkim mov \$0,%ecx # query L1D 124238405Sjkim cpuid 125238405Sjkim mov %eax,%r10d 126238405Sjkim shr \$14,%r10d 127238405Sjkim and \$0xfff,%r10d # number of cores -1 per L1D 128238405Sjkim 129238405Sjkim.Lnocacheinfo: 130238405Sjkim mov \$1,%eax 131238405Sjkim cpuid 132238405Sjkim and \$0xbfefffff,%edx # force reserved bits to 0 133194206Ssimon cmp \$0,%r9d 134194206Ssimon jne .Lnotintel 135238405Sjkim or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs 136194206Ssimon and \$15,%ah 137194206Ssimon cmp \$15,%ah # examine Family ID 138238405Sjkim jne .Lnotintel 139238405Sjkim or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR 140194206Ssimon.Lnotintel: 141194206Ssimon bt \$28,%edx # test hyper-threading bit 142238405Sjkim jnc .Lgeneric 143238405Sjkim and \$0xefffffff,%edx # ~(1<<28) 144238405Sjkim cmp \$0,%r10d 145238405Sjkim je .Lgeneric 146238405Sjkim 147238405Sjkim or \$0x10000000,%edx # 1<<28 148194206Ssimon shr \$16,%ebx 149194206Ssimon cmp \$1,%bl # see if cache is shared 150238405Sjkim ja .Lgeneric 151194206Ssimon and \$0xefffffff,%edx # ~(1<<28) 152238405Sjkim.Lgeneric: 153238405Sjkim and \$0x00000800,%r9d # isolate AMD XOP flag 154238405Sjkim and \$0xfffff7ff,%ecx 155238405Sjkim or %ecx,%r9d # merge AMD XOP flag 156238405Sjkim 157238405Sjkim mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx 158238405Sjkim bt \$27,%r9d # check OSXSAVE bit 159238405Sjkim jnc .Lclear_avx 160238405Sjkim xor %ecx,%ecx # XCR0 161238405Sjkim .byte 0x0f,0x01,0xd0 # xgetbv 162238405Sjkim and \$6,%eax # isolate XMM and YMM state support 163238405Sjkim cmp \$6,%eax 164238405Sjkim je .Ldone 165238405Sjkim.Lclear_avx: 166238405Sjkim mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) 167238405Sjkim and %eax,%r9d # clear AVX, FMA and AMD XOP bits 168194206Ssimon.Ldone: 169238405Sjkim shl \$32,%r9 170238405Sjkim mov %r10d,%eax 171238405Sjkim mov %r8,%rbx # restore %rbx 172238405Sjkim or %r9,%rax 173160814Ssimon ret 174160814Ssimon.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid 175238405Sjkim 176238405Sjkim.globl OPENSSL_cleanse 177238405Sjkim.type OPENSSL_cleanse,\@abi-omnipotent 178238405Sjkim.align 16 179238405SjkimOPENSSL_cleanse: 180238405Sjkim xor %rax,%rax 181238405Sjkim cmp \$15,$arg2 182238405Sjkim jae .Lot 183238405Sjkim cmp \$0,$arg2 184238405Sjkim je .Lret 185238405Sjkim.Little: 186238405Sjkim mov %al,($arg1) 187238405Sjkim sub \$1,$arg2 188238405Sjkim lea 1($arg1),$arg1 189238405Sjkim jnz .Little 190238405Sjkim.Lret: 191238405Sjkim ret 192238405Sjkim.align 16 193238405Sjkim.Lot: 194238405Sjkim test \$7,$arg1 195238405Sjkim jz .Laligned 196238405Sjkim mov %al,($arg1) 197238405Sjkim lea -1($arg2),$arg2 198238405Sjkim lea 1($arg1),$arg1 199238405Sjkim jmp .Lot 200238405Sjkim.Laligned: 201238405Sjkim mov %rax,($arg1) 202238405Sjkim lea -8($arg2),$arg2 203238405Sjkim test \$-8,$arg2 204238405Sjkim lea 8($arg1),$arg1 205238405Sjkim jnz .Laligned 206238405Sjkim cmp \$0,$arg2 207238405Sjkim jne .Little 208238405Sjkim ret 209238405Sjkim.size OPENSSL_cleanse,.-OPENSSL_cleanse 210160814Ssimon___ 211238405Sjkim 212238405Sjkimprint<<___ if (!$win64); 213238405Sjkim.globl OPENSSL_wipe_cpu 214238405Sjkim.type OPENSSL_wipe_cpu,\@abi-omnipotent 215238405Sjkim.align 16 216238405SjkimOPENSSL_wipe_cpu: 217238405Sjkim pxor %xmm0,%xmm0 218238405Sjkim pxor %xmm1,%xmm1 219238405Sjkim pxor %xmm2,%xmm2 220238405Sjkim pxor %xmm3,%xmm3 221238405Sjkim pxor %xmm4,%xmm4 222238405Sjkim pxor %xmm5,%xmm5 223238405Sjkim pxor %xmm6,%xmm6 224238405Sjkim pxor %xmm7,%xmm7 225238405Sjkim pxor %xmm8,%xmm8 226238405Sjkim pxor %xmm9,%xmm9 227238405Sjkim pxor %xmm10,%xmm10 228238405Sjkim pxor %xmm11,%xmm11 229238405Sjkim pxor %xmm12,%xmm12 230238405Sjkim pxor %xmm13,%xmm13 231238405Sjkim pxor %xmm14,%xmm14 232238405Sjkim pxor %xmm15,%xmm15 233238405Sjkim xorq %rcx,%rcx 234238405Sjkim xorq %rdx,%rdx 235238405Sjkim xorq %rsi,%rsi 236238405Sjkim xorq %rdi,%rdi 237238405Sjkim xorq %r8,%r8 238238405Sjkim xorq %r9,%r9 239238405Sjkim xorq %r10,%r10 240238405Sjkim xorq %r11,%r11 241238405Sjkim leaq 8(%rsp),%rax 242238405Sjkim ret 243238405Sjkim.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu 244238405Sjkim___ 245238405Sjkimprint<<___ if ($win64); 246238405Sjkim.globl OPENSSL_wipe_cpu 247238405Sjkim.type OPENSSL_wipe_cpu,\@abi-omnipotent 248238405Sjkim.align 16 249238405SjkimOPENSSL_wipe_cpu: 250238405Sjkim pxor %xmm0,%xmm0 251238405Sjkim pxor %xmm1,%xmm1 252238405Sjkim pxor %xmm2,%xmm2 253238405Sjkim pxor %xmm3,%xmm3 254238405Sjkim pxor %xmm4,%xmm4 255238405Sjkim pxor %xmm5,%xmm5 256238405Sjkim xorq %rcx,%rcx 257238405Sjkim xorq %rdx,%rdx 258238405Sjkim xorq %r8,%r8 259238405Sjkim xorq %r9,%r9 260238405Sjkim xorq %r10,%r10 261238405Sjkim xorq %r11,%r11 262238405Sjkim leaq 8(%rsp),%rax 263238405Sjkim ret 264238405Sjkim.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu 265238405Sjkim___ 266238405Sjkim 267238405Sjkimprint<<___; 268238405Sjkim.globl OPENSSL_ia32_rdrand 269238405Sjkim.type OPENSSL_ia32_rdrand,\@abi-omnipotent 270238405Sjkim.align 16 271238405SjkimOPENSSL_ia32_rdrand: 272238405Sjkim mov \$8,%ecx 273238405Sjkim.Loop_rdrand: 274238405Sjkim rdrand %rax 275238405Sjkim jc .Lbreak_rdrand 276238405Sjkim loop .Loop_rdrand 277238405Sjkim.Lbreak_rdrand: 278238405Sjkim cmp \$0,%rax 279238405Sjkim cmove %rcx,%rax 280238405Sjkim ret 281238405Sjkim.size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand 282238405Sjkim___ 283238405Sjkim 284194206Ssimonclose STDOUT; # flush 285