1143211Sdas/*- 2226245Sdas * Copyright (c) 2005-2011 David Schultz <das@FreeBSD.ORG> 3143211Sdas * All rights reserved. 4143211Sdas * 5143211Sdas * Redistribution and use in source and binary forms, with or without 6143211Sdas * modification, are permitted provided that the following conditions 7143211Sdas * are met: 8143211Sdas * 1. Redistributions of source code must retain the above copyright 9143211Sdas * notice, this list of conditions and the following disclaimer. 10143211Sdas * 2. Redistributions in binary form must reproduce the above copyright 11143211Sdas * notice, this list of conditions and the following disclaimer in the 12143211Sdas * documentation and/or other materials provided with the distribution. 13143211Sdas * 14143211Sdas * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15143211Sdas * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16143211Sdas * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17143211Sdas * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18143211Sdas * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19143211Sdas * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20143211Sdas * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21143211Sdas * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22143211Sdas * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23143211Sdas * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24143211Sdas * SUCH DAMAGE. 25143211Sdas */ 26143211Sdas 27143211Sdas#include <sys/cdefs.h> 28143211Sdas__FBSDID("$FreeBSD$"); 29143211Sdas 30143211Sdas#include <fenv.h> 31143211Sdas#include <float.h> 32143211Sdas#include <math.h> 33143211Sdas 34226371Sdas#include "fpmath.h" 35226371Sdas 36143211Sdas/* 37226245Sdas * A struct dd represents a floating-point number with twice the precision 38226245Sdas * of a long double. We maintain the invariant that "hi" stores the high-order 39226245Sdas * bits of the result. 40226245Sdas */ 41226245Sdasstruct dd { 42226245Sdas long double hi; 43226245Sdas long double lo; 44226245Sdas}; 45226245Sdas 46226245Sdas/* 47226245Sdas * Compute a+b exactly, returning the exact result in a struct dd. We assume 48226245Sdas * that both a and b are finite, but make no assumptions about their relative 49226245Sdas * magnitudes. 50226245Sdas */ 51226245Sdasstatic inline struct dd 52226245Sdasdd_add(long double a, long double b) 53226245Sdas{ 54226245Sdas struct dd ret; 55226245Sdas long double s; 56226245Sdas 57226245Sdas ret.hi = a + b; 58226245Sdas s = ret.hi - a; 59226245Sdas ret.lo = (a - (ret.hi - s)) + (b - s); 60226245Sdas return (ret); 61226245Sdas} 62226245Sdas 63226245Sdas/* 64226371Sdas * Compute a+b, with a small tweak: The least significant bit of the 65226371Sdas * result is adjusted into a sticky bit summarizing all the bits that 66226371Sdas * were lost to rounding. This adjustment negates the effects of double 67226371Sdas * rounding when the result is added to another number with a higher 68226371Sdas * exponent. For an explanation of round and sticky bits, see any reference 69226371Sdas * on FPU design, e.g., 70226371Sdas * 71226371Sdas * J. Coonen. An Implementation Guide to a Proposed Standard for 72226371Sdas * Floating-Point Arithmetic. Computer, vol. 13, no. 1, Jan 1980. 73226371Sdas */ 74226371Sdasstatic inline long double 75226371Sdasadd_adjusted(long double a, long double b) 76226371Sdas{ 77226371Sdas struct dd sum; 78226371Sdas union IEEEl2bits u; 79226371Sdas 80226371Sdas sum = dd_add(a, b); 81226371Sdas if (sum.lo != 0) { 82226371Sdas u.e = sum.hi; 83226371Sdas if ((u.bits.manl & 1) == 0) 84226371Sdas sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); 85226371Sdas } 86226371Sdas return (sum.hi); 87226371Sdas} 88226371Sdas 89226371Sdas/* 90226371Sdas * Compute ldexp(a+b, scale) with a single rounding error. It is assumed 91226371Sdas * that the result will be subnormal, and care is taken to ensure that 92226371Sdas * double rounding does not occur. 93226371Sdas */ 94226371Sdasstatic inline long double 95226371Sdasadd_and_denormalize(long double a, long double b, int scale) 96226371Sdas{ 97226371Sdas struct dd sum; 98226371Sdas int bits_lost; 99226371Sdas union IEEEl2bits u; 100226371Sdas 101226371Sdas sum = dd_add(a, b); 102226371Sdas 103226371Sdas /* 104226371Sdas * If we are losing at least two bits of accuracy to denormalization, 105226371Sdas * then the first lost bit becomes a round bit, and we adjust the 106226371Sdas * lowest bit of sum.hi to make it a sticky bit summarizing all the 107226371Sdas * bits in sum.lo. With the sticky bit adjusted, the hardware will 108226371Sdas * break any ties in the correct direction. 109226371Sdas * 110226371Sdas * If we are losing only one bit to denormalization, however, we must 111226371Sdas * break the ties manually. 112226371Sdas */ 113226371Sdas if (sum.lo != 0) { 114226371Sdas u.e = sum.hi; 115226371Sdas bits_lost = -u.bits.exp - scale + 1; 116252170Seadler if ((bits_lost != 1) ^ (int)(u.bits.manl & 1)) 117226371Sdas sum.hi = nextafterl(sum.hi, INFINITY * sum.lo); 118226371Sdas } 119226371Sdas return (ldexp(sum.hi, scale)); 120226371Sdas} 121226371Sdas 122226371Sdas/* 123226245Sdas * Compute a*b exactly, returning the exact result in a struct dd. We assume 124226245Sdas * that both a and b are normalized, so no underflow or overflow will occur. 125226245Sdas * The current rounding mode must be round-to-nearest. 126226245Sdas */ 127226245Sdasstatic inline struct dd 128226245Sdasdd_mul(long double a, long double b) 129226245Sdas{ 130226245Sdas#if LDBL_MANT_DIG == 64 131226245Sdas static const long double split = 0x1p32L + 1.0; 132226245Sdas#elif LDBL_MANT_DIG == 113 133226245Sdas static const long double split = 0x1p57L + 1.0; 134226245Sdas#endif 135226245Sdas struct dd ret; 136226245Sdas long double ha, hb, la, lb, p, q; 137226245Sdas 138226245Sdas p = a * split; 139226245Sdas ha = a - p; 140226245Sdas ha += p; 141226245Sdas la = a - ha; 142226245Sdas 143226245Sdas p = b * split; 144226245Sdas hb = b - p; 145226245Sdas hb += p; 146226245Sdas lb = b - hb; 147226245Sdas 148226245Sdas p = ha * hb; 149226245Sdas q = ha * lb + la * hb; 150226245Sdas 151226245Sdas ret.hi = p + q; 152226245Sdas ret.lo = p - ret.hi + q + la * lb; 153226245Sdas return (ret); 154226245Sdas} 155226245Sdas 156226245Sdas/* 157143211Sdas * Fused multiply-add: Compute x * y + z with a single rounding error. 158143211Sdas * 159143211Sdas * We use scaling to avoid overflow/underflow, along with the 160143211Sdas * canonical precision-doubling technique adapted from: 161143211Sdas * 162143211Sdas * Dekker, T. A Floating-Point Technique for Extending the 163143211Sdas * Available Precision. Numer. Math. 18, 224-242 (1971). 164143211Sdas */ 165143211Sdaslong double 166143211Sdasfmal(long double x, long double y, long double z) 167143211Sdas{ 168226371Sdas long double xs, ys, zs, adj; 169226371Sdas struct dd xy, r; 170143211Sdas int oround; 171143211Sdas int ex, ey, ez; 172143211Sdas int spread; 173143211Sdas 174177875Sdas /* 175177875Sdas * Handle special cases. The order of operations and the particular 176177875Sdas * return values here are crucial in handling special cases involving 177177875Sdas * infinities, NaNs, overflows, and signed zeroes correctly. 178177875Sdas */ 179177875Sdas if (x == 0.0 || y == 0.0) 180177875Sdas return (x * y + z); 181143211Sdas if (z == 0.0) 182143211Sdas return (x * y); 183177875Sdas if (!isfinite(x) || !isfinite(y)) 184143211Sdas return (x * y + z); 185177875Sdas if (!isfinite(z)) 186177875Sdas return (z); 187143211Sdas 188143211Sdas xs = frexpl(x, &ex); 189143211Sdas ys = frexpl(y, &ey); 190143211Sdas zs = frexpl(z, &ez); 191143211Sdas oround = fegetround(); 192143211Sdas spread = ex + ey - ez; 193143211Sdas 194143211Sdas /* 195143211Sdas * If x * y and z are many orders of magnitude apart, the scaling 196143211Sdas * will overflow, so we handle these cases specially. Rounding 197143211Sdas * modes other than FE_TONEAREST are painful. 198143211Sdas */ 199143211Sdas if (spread < -LDBL_MANT_DIG) { 200143211Sdas feraiseexcept(FE_INEXACT); 201143211Sdas if (!isnormal(z)) 202143211Sdas feraiseexcept(FE_UNDERFLOW); 203143211Sdas switch (oround) { 204143211Sdas case FE_TONEAREST: 205143211Sdas return (z); 206143211Sdas case FE_TOWARDZERO: 207143211Sdas if (x > 0.0 ^ y < 0.0 ^ z < 0.0) 208143211Sdas return (z); 209143211Sdas else 210143211Sdas return (nextafterl(z, 0)); 211143211Sdas case FE_DOWNWARD: 212143211Sdas if (x > 0.0 ^ y < 0.0) 213143211Sdas return (z); 214143211Sdas else 215143211Sdas return (nextafterl(z, -INFINITY)); 216143211Sdas default: /* FE_UPWARD */ 217143211Sdas if (x > 0.0 ^ y < 0.0) 218143211Sdas return (nextafterl(z, INFINITY)); 219143211Sdas else 220143211Sdas return (z); 221143211Sdas } 222143211Sdas } 223226371Sdas if (spread <= LDBL_MANT_DIG * 2) 224226371Sdas zs = ldexpl(zs, -spread); 225226371Sdas else 226226371Sdas zs = copysignl(LDBL_MIN, zs); 227143211Sdas 228143211Sdas fesetround(FE_TONEAREST); 229251024Sdas /* work around clang bug 8100 */ 230251024Sdas volatile long double vxs = xs; 231143211Sdas 232226371Sdas /* 233226371Sdas * Basic approach for round-to-nearest: 234226371Sdas * 235226371Sdas * (xy.hi, xy.lo) = x * y (exact) 236226371Sdas * (r.hi, r.lo) = xy.hi + z (exact) 237226371Sdas * adj = xy.lo + r.lo (inexact; low bit is sticky) 238226371Sdas * result = r.hi + adj (correctly rounded) 239226371Sdas */ 240251024Sdas xy = dd_mul(vxs, ys); 241226245Sdas r = dd_add(xy.hi, zs); 242143211Sdas 243226601Sdas spread = ex + ey; 244226601Sdas 245226371Sdas if (r.hi == 0.0) { 246226371Sdas /* 247226371Sdas * When the addends cancel to 0, ensure that the result has 248226371Sdas * the correct sign. 249226371Sdas */ 250226371Sdas fesetround(oround); 251226371Sdas volatile long double vzs = zs; /* XXX gcc CSE bug workaround */ 252226601Sdas return (xy.hi + vzs + ldexpl(xy.lo, spread)); 253226371Sdas } 254226371Sdas 255226371Sdas if (oround != FE_TONEAREST) { 256143780Sdas /* 257226371Sdas * There is no need to worry about double rounding in directed 258226371Sdas * rounding modes. 259143780Sdas */ 260143780Sdas fesetround(oround); 261251024Sdas /* work around clang bug 8100 */ 262251024Sdas volatile long double vrlo = r.lo; 263251024Sdas adj = vrlo + xy.lo; 264226371Sdas return (ldexpl(r.hi + adj, spread)); 265143780Sdas } 266226371Sdas 267226371Sdas adj = add_adjusted(r.lo, xy.lo); 268226371Sdas if (spread + ilogbl(r.hi) > -16383) 269226371Sdas return (ldexpl(r.hi + adj, spread)); 270226371Sdas else 271226371Sdas return (add_and_denormalize(r.hi, adj, spread)); 272143211Sdas} 273