/* Copyright (c) 2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /******************************************************************************/ // ALGORITHM DESCRIPTION // --------------------- // // Description: // Let K = 64 (table size). // x x/log(2) n // e = 2 = 2 * T[j] * (1 + P(y)) // where // x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K] // m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2] // j/K // values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]). // // P(y) is a minimax polynomial approximation of exp(x)-1 // on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V). // // To avoid problems with arithmetic overflow and underflow, // n n1 n2 // value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2] // where BIAS is a value of exponent bias. // // Special cases: // exp(NaN) = NaN // exp(+INF) = +INF // exp(-INF) = 0 // exp(x) = 1 for subnormals // for finite argument, only exp(0)=1 is exact // For IEEE double // if x > 709.782712893383973096 then exp(x) overflow // if x < -745.133219101941108420 then exp(x) underflow // /******************************************************************************/ #include <private/bionic_asm.h> # -- Begin exp ENTRY(exp) # parameter 1: %xmm0 ..B1.1: ..___tag_value_exp.1: subq $24, %rsp ..___tag_value_exp.3: movsd %xmm0, 8(%rsp) ..B1.2: unpcklpd %xmm0, %xmm0 movapd cv(%rip), %xmm1 movapd Shifter(%rip), %xmm6 movapd 16+cv(%rip), %xmm2 movapd 32+cv(%rip), %xmm3 pextrw $3, %xmm0, %eax andl $32767, %eax movl $16527, %edx subl %eax, %edx subl $15504, %eax orl %eax, %edx cmpl $-2147483648, %edx jae .L_2TAG_PACKET_0.0.2 mulpd %xmm0, %xmm1 addpd %xmm6, %xmm1 movapd %xmm1, %xmm7 subpd %xmm6, %xmm1 mulpd %xmm1, %xmm2 movapd 64+cv(%rip), %xmm4 mulpd %xmm1, %xmm3 movapd 80+cv(%rip), %xmm5 subpd %xmm2, %xmm0 movd %xmm7, %eax movl %eax, %ecx andl $63, %ecx shll $4, %ecx sarl $6, %eax movl %eax, %edx movdqa mmask(%rip), %xmm6 pand %xmm6, %xmm7 movdqa bias(%rip), %xmm6 paddq %xmm6, %xmm7 psllq $46, %xmm7 subpd %xmm3, %xmm0 lea Tbl_addr(%rip), %r8 movapd (%rcx,%r8), %xmm2 mulpd %xmm0, %xmm4 movapd %xmm0, %xmm6 movapd %xmm0, %xmm1 mulpd %xmm6, %xmm6 mulpd %xmm6, %xmm0 addpd %xmm4, %xmm5 mulsd %xmm6, %xmm0 mulpd 48+cv(%rip), %xmm6 addsd %xmm2, %xmm1 unpckhpd %xmm2, %xmm2 mulpd %xmm5, %xmm0 addsd %xmm0, %xmm1 orpd %xmm7, %xmm2 unpckhpd %xmm0, %xmm0 addsd %xmm1, %xmm0 addsd %xmm6, %xmm0 addl $894, %edx cmpl $1916, %edx ja .L_2TAG_PACKET_1.0.2 mulsd %xmm2, %xmm0 addsd %xmm2, %xmm0 jmp ..B1.5 .L_2TAG_PACKET_1.0.2: xorpd %xmm3, %xmm3 movapd ALLONES(%rip), %xmm4 movl $-1022, %edx subl %eax, %edx movd %edx, %xmm5 psllq %xmm5, %xmm4 movl %eax, %ecx sarl $1, %eax pinsrw $3, %eax, %xmm3 movapd ebias(%rip), %xmm6 psllq $4, %xmm3 psubd %xmm3, %xmm2 mulsd %xmm2, %xmm0 cmpl $52, %edx jg .L_2TAG_PACKET_2.0.2 andpd %xmm2, %xmm4 paddd %xmm6, %xmm3 subsd %xmm4, %xmm2 addsd %xmm2, %xmm0 cmpl $1023, %ecx jge .L_2TAG_PACKET_3.0.2 pextrw $3, %xmm0, %ecx andl $32768, %ecx orl %ecx, %edx cmpl $0, %edx je .L_2TAG_PACKET_4.0.2 movapd %xmm0, %xmm6 addsd %xmm4, %xmm0 mulsd %xmm3, %xmm0 pextrw $3, %xmm0, %ecx andl $32752, %ecx cmpl $0, %ecx je .L_2TAG_PACKET_5.0.2 jmp ..B1.5 .L_2TAG_PACKET_5.0.2: mulsd %xmm3, %xmm6 mulsd %xmm3, %xmm4 movq %xmm6, %xmm0 pxor %xmm4, %xmm6 psrad $31, %xmm6 pshufd $85, %xmm6, %xmm6 psllq $1, %xmm0 psrlq $1, %xmm0 pxor %xmm6, %xmm0 psrlq $63, %xmm6 paddq %xmm6, %xmm0 paddq %xmm4, %xmm0 movl $15, (%rsp) jmp .L_2TAG_PACKET_6.0.2 .L_2TAG_PACKET_4.0.2: addsd %xmm4, %xmm0 mulsd %xmm3, %xmm0 jmp ..B1.5 .L_2TAG_PACKET_3.0.2: addsd %xmm4, %xmm0 mulsd %xmm3, %xmm0 pextrw $3, %xmm0, %ecx andl $32752, %ecx cmpl $32752, %ecx jnb .L_2TAG_PACKET_7.0.2 jmp ..B1.5 .L_2TAG_PACKET_2.0.2: paddd %xmm6, %xmm3 addpd %xmm2, %xmm0 mulsd %xmm3, %xmm0 movl $15, (%rsp) jmp .L_2TAG_PACKET_6.0.2 .L_2TAG_PACKET_8.0.2: cmpl $2146435072, %eax jae .L_2TAG_PACKET_9.0.2 movl 12(%rsp), %eax cmpl $-2147483648, %eax jae .L_2TAG_PACKET_10.0.2 movsd XMAX(%rip), %xmm0 mulsd %xmm0, %xmm0 .L_2TAG_PACKET_7.0.2: movl $14, (%rsp) jmp .L_2TAG_PACKET_6.0.2 .L_2TAG_PACKET_10.0.2: movsd XMIN(%rip), %xmm0 mulsd %xmm0, %xmm0 movl $15, (%rsp) jmp .L_2TAG_PACKET_6.0.2 .L_2TAG_PACKET_9.0.2: movl 8(%rsp), %edx cmpl $2146435072, %eax ja .L_2TAG_PACKET_11.0.2 cmpl $0, %edx jne .L_2TAG_PACKET_11.0.2 movl 12(%rsp), %eax cmpl $2146435072, %eax jne .L_2TAG_PACKET_12.0.2 movsd INF(%rip), %xmm0 jmp ..B1.5 .L_2TAG_PACKET_12.0.2: movsd ZERO(%rip), %xmm0 jmp ..B1.5 .L_2TAG_PACKET_11.0.2: movsd 8(%rsp), %xmm0 addsd %xmm0, %xmm0 jmp ..B1.5 .L_2TAG_PACKET_0.0.2: movl 12(%rsp), %eax andl $2147483647, %eax cmpl $1083179008, %eax jae .L_2TAG_PACKET_8.0.2 movsd 8(%rsp), %xmm0 addsd ONE_val(%rip), %xmm0 jmp ..B1.5 .L_2TAG_PACKET_6.0.2: movq %xmm0, 16(%rsp) ..B1.3: movq 16(%rsp), %xmm0 .L_2TAG_PACKET_13.0.2: ..B1.5: addq $24, %rsp ..___tag_value_exp.4: ret ..___tag_value_exp.5: END(exp) # -- End exp .section .rodata, "a" .align 16 .align 16 cv: .long 1697350398 .long 1079448903 .long 1697350398 .long 1079448903 .long 4277796864 .long 1065758274 .long 4277796864 .long 1065758274 .long 3164486458 .long 1025308570 .long 3164486458 .long 1025308570 .long 4294967294 .long 1071644671 .long 4294967294 .long 1071644671 .long 3811088480 .long 1062650204 .long 1432067621 .long 1067799893 .long 3230715663 .long 1065423125 .long 1431604129 .long 1069897045 .type cv,@object .size cv,96 .align 16 Shifter: .long 0 .long 1127743488 .long 0 .long 1127743488 .type Shifter,@object .size Shifter,16 .align 16 mmask: .long 4294967232 .long 0 .long 4294967232 .long 0 .type mmask,@object .size mmask,16 .align 16 bias: .long 65472 .long 0 .long 65472 .long 0 .type bias,@object .size bias,16 .align 16 Tbl_addr: .long 0 .long 0 .long 0 .long 0 .long 235107661 .long 1018002367 .long 1048019040 .long 11418 .long 896005651 .long 1015861842 .long 3541402996 .long 22960 .long 1642514529 .long 1012987726 .long 410360776 .long 34629 .long 1568897900 .long 1016568486 .long 1828292879 .long 46424 .long 1882168529 .long 1010744893 .long 852742562 .long 58348 .long 509852888 .long 1017336174 .long 3490863952 .long 70401 .long 653277307 .long 1017431380 .long 2930322911 .long 82586 .long 1649557430 .long 1017729363 .long 1014845818 .long 94904 .long 1058231231 .long 1015777676 .long 3949972341 .long 107355 .long 1044000607 .long 1016786167 .long 828946858 .long 119943 .long 1151779725 .long 1015705409 .long 2288159958 .long 132667 .long 3819481236 .long 1016499965 .long 1853186616 .long 145530 .long 2552227826 .long 1015039787 .long 1709341917 .long 158533 .long 1829350193 .long 1015216097 .long 4112506593 .long 171677 .long 1913391795 .long 1015756674 .long 2799960843 .long 184965 .long 1303423926 .long 1015238005 .long 171030293 .long 198398 .long 1574172746 .long 1016061241 .long 2992903935 .long 211976 .long 3424156969 .long 1017196428 .long 926591434 .long 225703 .long 1938513547 .long 1017631273 .long 887463926 .long 239579 .long 2804567149 .long 1015390024 .long 1276261410 .long 253606 .long 631083525 .long 1017690182 .long 569847337 .long 267786 .long 1623370770 .long 1011049453 .long 1617004845 .long 282120 .long 3667985273 .long 1013894369 .long 3049340112 .long 296610 .long 3145379760 .long 1014403278 .long 3577096743 .long 311258 .long 2603100681 .long 1017152460 .long 1990012070 .long 326066 .long 3249202951 .long 1017448880 .long 1453150081 .long 341035 .long 419288974 .long 1016280325 .long 917841882 .long 356167 .long 3793507337 .long 1016095713 .long 3712504873 .long 371463 .long 728023093 .long 1016345318 .long 363667784 .long 386927 .long 2582678538 .long 1017123460 .long 2956612996 .long 402558 .long 7592966 .long 1016721543 .long 2186617380 .long 418360 .long 228611441 .long 1016696141 .long 1719614412 .long 434334 .long 2261665670 .long 1017457593 .long 1013258798 .long 450482 .long 544148907 .long 1017323666 .long 3907805043 .long 466805 .long 2383914918 .long 1017143586 .long 1447192520 .long 483307 .long 1176412038 .long 1017267372 .long 1944781190 .long 499988 .long 2882956373 .long 1013312481 .long 919555682 .long 516851 .long 3154077648 .long 1016528543 .long 2571947538 .long 533897 .long 348651999 .long 1016405780 .long 2604962540 .long 551129 .long 3253791412 .long 1015920431 .long 1110089947 .long 568549 .long 1509121860 .long 1014756995 .long 2568320822 .long 586158 .long 2617649212 .long 1017340090 .long 2966275556 .long 603959 .long 553214634 .long 1016457425 .long 2682146383 .long 621954 .long 730975783 .long 1014083580 .long 2191782032 .long 640145 .long 1486499517 .long 1016818996 .long 2069751140 .long 658534 .long 2595788928 .long 1016407932 .long 2990417244 .long 677123 .long 1853053619 .long 1015310724 .long 1434058175 .long 695915 .long 2462790535 .long 1015814775 .long 2572866477 .long 714911 .long 3693944214 .long 1017259110 .long 3092190714 .long 734114 .long 2979333550 .long 1017188654 .long 4076559942 .long 753526 .long 174054861 .long 1014300631 .long 2420883922 .long 773150 .long 816778419 .long 1014197934 .long 3716502172 .long 792987 .long 3507050924 .long 1015341199 .long 777507147 .long 813041 .long 1821514088 .long 1013410604 .long 3706687593 .long 833312 .long 920623539 .long 1016295433 .long 1242007931 .long 853805 .long 2789017511 .long 1014276997 .long 3707479175 .long 874520 .long 3586233004 .long 1015962192 .long 64696965 .long 895462 .long 474650514 .long 1016642419 .long 863738718 .long 916631 .long 1614448851 .long 1014281732 .long 3884662774 .long 938030 .long 2450082086 .long 1016164135 .long 2728693977 .long 959663 .long 1101668360 .long 1015989180 .long 3999357479 .long 981531 .long 835814894 .long 1015702697 .long 1533953344 .long 1003638 .long 1301400989 .long 1014466875 .long 2174652632 .long 1025985 .type Tbl_addr,@object .size Tbl_addr,1024 .align 16 ALLONES: .long 4294967295 .long 4294967295 .long 4294967295 .long 4294967295 .type ALLONES,@object .size ALLONES,16 .align 16 ebias: .long 0 .long 1072693248 .long 0 .long 1072693248 .type ebias,@object .size ebias,16 .align 4 XMAX: .long 4294967295 .long 2146435071 .type XMAX,@object .size XMAX,8 .align 4 XMIN: .long 0 .long 1048576 .type XMIN,@object .size XMIN,8 .align 4 INF: .long 0 .long 2146435072 .type INF,@object .size INF,8 .align 4 ZERO: .long 0 .long 0 .type ZERO,@object .size ZERO,8 .align 4 ONE_val: .long 0 .long 1072693248 .type ONE_val,@object .size ONE_val,8 .data .section .note.GNU-stack, "" // -- Begin DWARF2 SEGMENT .eh_frame .section .eh_frame,"a",@progbits .eh_frame_seg: .align 1 .4byte 0x00000014 .8byte 0x00527a0100000000 .8byte 0x08070c1b01107801 .4byte 0x00000190 .4byte 0x0000001c .4byte 0x0000001c .4byte ..___tag_value_exp.1-. .4byte ..___tag_value_exp.5-..___tag_value_exp.1 .2byte 0x0400 .4byte ..___tag_value_exp.3-..___tag_value_exp.1 .2byte 0x200e .byte 0x04 .4byte ..___tag_value_exp.4-..___tag_value_exp.3 .2byte 0x080e .byte 0x00 # End