1//=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// Set source and destination precision setting 10// 11//===----------------------------------------------------------------------===// 12 13#ifndef FP_TRUNC_HEADER 14#define FP_TRUNC_HEADER 15 16#include "int_lib.h" 17 18#if defined SRC_SINGLE 19typedef float src_t; 20typedef uint32_t src_rep_t; 21#define SRC_REP_C UINT32_C 22static const int srcBits = sizeof(src_t) * CHAR_BIT; 23static const int srcSigFracBits = 23; 24// -1 accounts for the sign bit. 25// srcBits - srcSigFracBits - 1 26static const int srcExpBits = 8; 27 28#elif defined SRC_DOUBLE 29typedef double src_t; 30typedef uint64_t src_rep_t; 31#define SRC_REP_C UINT64_C 32static const int srcBits = sizeof(src_t) * CHAR_BIT; 33static const int srcSigFracBits = 52; 34// -1 accounts for the sign bit. 35// srcBits - srcSigFracBits - 1 36static const int srcExpBits = 11; 37 38#elif defined SRC_QUAD 39typedef tf_float src_t; 40typedef __uint128_t src_rep_t; 41#define SRC_REP_C (__uint128_t) 42static const int srcBits = sizeof(src_t) * CHAR_BIT; 43static const int srcSigFracBits = 112; 44// -1 accounts for the sign bit. 45// srcBits - srcSigFracBits - 1 46static const int srcExpBits = 15; 47 48#else 49#error Source should be double precision or quad precision! 50#endif // end source precision 51 52#if defined DST_DOUBLE 53typedef double dst_t; 54typedef uint64_t dst_rep_t; 55#define DST_REP_C UINT64_C 56static const int dstBits = sizeof(dst_t) * CHAR_BIT; 57static const int dstSigFracBits = 52; 58// -1 accounts for the sign bit. 59// dstBits - dstSigFracBits - 1 60static const int dstExpBits = 11; 61 62#elif defined DST_80 63typedef xf_float dst_t; 64typedef __uint128_t dst_rep_t; 65#define DST_REP_C (__uint128_t) 66static const int dstBits = 80; 67static const int dstSigFracBits = 63; 68// -1 accounts for the sign bit. 69// -1 accounts for the explicitly stored integer bit. 70// dstBits - dstSigFracBits - 1 - 1 71static const int dstExpBits = 15; 72 73#elif defined DST_SINGLE 74typedef float dst_t; 75typedef uint32_t dst_rep_t; 76#define DST_REP_C UINT32_C 77static const int dstBits = sizeof(dst_t) * CHAR_BIT; 78static const int dstSigFracBits = 23; 79// -1 accounts for the sign bit. 80// dstBits - dstSigFracBits - 1 81static const int dstExpBits = 8; 82 83#elif defined DST_HALF 84#ifdef COMPILER_RT_HAS_FLOAT16 85typedef _Float16 dst_t; 86#else 87typedef uint16_t dst_t; 88#endif 89typedef uint16_t dst_rep_t; 90#define DST_REP_C UINT16_C 91static const int dstBits = sizeof(dst_t) * CHAR_BIT; 92static const int dstSigFracBits = 10; 93// -1 accounts for the sign bit. 94// dstBits - dstSigFracBits - 1 95static const int dstExpBits = 5; 96 97#elif defined DST_BFLOAT 98typedef __bf16 dst_t; 99typedef uint16_t dst_rep_t; 100#define DST_REP_C UINT16_C 101static const int dstBits = sizeof(dst_t) * CHAR_BIT; 102static const int dstSigFracBits = 7; 103// -1 accounts for the sign bit. 104// dstBits - dstSigFracBits - 1 105static const int dstExpBits = 8; 106 107#else 108#error Destination should be single precision or double precision! 109#endif // end destination precision 110 111// TODO: These helper routines should be placed into fp_lib.h 112// Currently they depend on macros/constants defined above. 113 114static inline src_rep_t extract_sign_from_src(src_rep_t x) { 115 const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1); 116 return (x & srcSignMask) >> (srcBits - 1); 117} 118 119static inline src_rep_t extract_exp_from_src(src_rep_t x) { 120 const int srcSigBits = srcBits - 1 - srcExpBits; 121 const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits; 122 return (x & srcExpMask) >> srcSigBits; 123} 124 125static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) { 126 const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1; 127 return x & srcSigFracMask; 128} 129 130static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) { 131 dst_rep_t result = (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac; 132 // Set the explicit integer bit in F80 if present. 133 if (dstBits == 80 && exp) { 134 result |= (DST_REP_C(1) << dstSigFracBits); 135 } 136 return result; 137} 138 139// End of specialization parameters. Two helper routines for conversion to and 140// from the representation of floating-point data as integer values follow. 141 142static inline src_rep_t srcToRep(src_t x) { 143 const union { 144 src_t f; 145 src_rep_t i; 146 } rep = {.f = x}; 147 return rep.i; 148} 149 150static inline dst_t dstFromRep(dst_rep_t x) { 151 const union { 152 dst_t f; 153 dst_rep_t i; 154 } rep = {.i = x}; 155 return rep.f; 156} 157 158#endif // FP_TRUNC_HEADER 159