mirror of
https://github.com/BX-Team/DivineMC.git
synced 2025-12-23 08:49:18 +00:00
673 lines
18 KiB
C
673 lines
18 KiB
C
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include <float.h>
|
|
|
|
typedef int make_iso_compilers_happy;
|
|
|
|
#ifdef WIN32
|
|
|
|
// ld.lld: error: <root>: undefined symbol: DllMainCRTStartup
|
|
int __stdcall DllMainCRTStartup(void* instance, unsigned reason, void* reserved)
|
|
{
|
|
(void) instance;
|
|
(void) reason;
|
|
(void) reserved;
|
|
return 1;
|
|
}
|
|
|
|
// ld.lld: error: undefined symbol: _fltused
|
|
int _fltused = 0;
|
|
|
|
// ld.lld: error: undefined symbol: abort
|
|
void abort(void)
|
|
{
|
|
__builtin_trap();
|
|
}
|
|
|
|
#endif // WIN32
|
|
|
|
/*
|
|
The following code is from musl, original license below:
|
|
|
|
musl as a whole is licensed under the following standard MIT license:
|
|
|
|
----------------------------------------------------------------------
|
|
Copyright © 2005-2020 Rich Felker, et al.
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining
|
|
a copy of this software and associated documentation files (the
|
|
"Software"), to deal in the Software without restriction, including
|
|
without limitation the rights to use, copy, modify, merge, publish,
|
|
distribute, sublicense, and/or sell copies of the Software, and to
|
|
permit persons to whom the Software is furnished to do so, subject to
|
|
the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be
|
|
included in all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
----------------------------------------------------------------------
|
|
*/
|
|
|
|
// src/internal/libm.h
|
|
#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
|
|
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
|
|
union ldshape {
|
|
long double f;
|
|
struct {
|
|
uint64_t m;
|
|
uint16_t se;
|
|
} i;
|
|
};
|
|
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
|
|
/* This is the m68k variant of 80-bit long double, and this definition only works
|
|
* on archs where the alignment requirement of uint64_t is <= 4. */
|
|
union ldshape {
|
|
long double f;
|
|
struct {
|
|
uint16_t se;
|
|
uint16_t pad;
|
|
uint64_t m;
|
|
} i;
|
|
};
|
|
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
|
|
union ldshape {
|
|
long double f;
|
|
struct {
|
|
uint64_t lo;
|
|
uint32_t mid;
|
|
uint16_t top;
|
|
uint16_t se;
|
|
} i;
|
|
struct {
|
|
uint64_t lo;
|
|
uint64_t hi;
|
|
} i2;
|
|
};
|
|
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
|
|
union ldshape {
|
|
long double f;
|
|
struct {
|
|
uint16_t se;
|
|
uint16_t top;
|
|
uint32_t mid;
|
|
uint64_t lo;
|
|
} i;
|
|
struct {
|
|
uint64_t hi;
|
|
uint64_t lo;
|
|
} i2;
|
|
};
|
|
#else
|
|
#error Unsupported long double representation
|
|
#endif
|
|
|
|
/* Support non-nearest rounding mode. */
|
|
#define WANT_ROUNDING 1
|
|
/* Support signaling NaNs. */
|
|
#define WANT_SNAN 0
|
|
|
|
#if WANT_SNAN
|
|
#error SNaN is unsupported
|
|
#else
|
|
#define issignalingf_inline(x) 0
|
|
#define issignaling_inline(x) 0
|
|
#endif
|
|
|
|
#ifndef TOINT_INTRINSICS
|
|
#define TOINT_INTRINSICS 0
|
|
#endif
|
|
|
|
#if TOINT_INTRINSICS
|
|
/* Round x to nearest int in all rounding modes, ties have to be rounded
|
|
consistently with converttoint so the results match. If the result
|
|
would be outside of [-2^31, 2^31-1] then the semantics is unspecified. */
|
|
static double_t roundtoint(double_t);
|
|
|
|
/* Convert x to nearest int in all rounding modes, ties have to be rounded
|
|
consistently with roundtoint. If the result is not representible in an
|
|
int32_t then the semantics is unspecified. */
|
|
static int32_t converttoint(double_t);
|
|
#endif
|
|
|
|
/* Helps static branch prediction so hot path can be better optimized. */
|
|
#ifdef __GNUC__
|
|
#define predict_true(x) __builtin_expect(!!(x), 1)
|
|
#define predict_false(x) __builtin_expect(x, 0)
|
|
#else
|
|
#define predict_true(x) (x)
|
|
#define predict_false(x) (x)
|
|
#endif
|
|
|
|
/* Evaluate an expression as the specified type. With standard excess
|
|
precision handling a type cast or assignment is enough (with
|
|
-ffloat-store an assignment is required, in old compilers argument
|
|
passing and return statement may not drop excess precision). */
|
|
|
|
static inline float eval_as_float(float x) {
|
|
float y = x;
|
|
return y;
|
|
}
|
|
|
|
static inline double eval_as_double(double x) {
|
|
double y = x;
|
|
return y;
|
|
}
|
|
|
|
/* fp_barrier returns its input, but limits code transformations
|
|
as if it had a side-effect (e.g. observable io) and returned
|
|
an arbitrary value. */
|
|
|
|
#ifndef fp_barrierf
|
|
#define fp_barrierf fp_barrierf
|
|
|
|
static inline float fp_barrierf(float x) {
|
|
volatile float y = x;
|
|
return y;
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifndef fp_barrier
|
|
#define fp_barrier fp_barrier
|
|
|
|
static inline double fp_barrier(double x) {
|
|
volatile double y = x;
|
|
return y;
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifndef fp_barrierl
|
|
#define fp_barrierl fp_barrierl
|
|
|
|
static inline long double fp_barrierl(long double x) {
|
|
volatile long double y = x;
|
|
return y;
|
|
}
|
|
|
|
#endif
|
|
|
|
/* fp_force_eval ensures that the input value is computed when that's
|
|
otherwise unused. To prevent the constant folding of the input
|
|
expression, an additional fp_barrier may be needed or a compilation
|
|
mode that does so (e.g. -frounding-math in gcc). Then it can be
|
|
used to evaluate an expression for its fenv side-effects only. */
|
|
|
|
#ifndef fp_force_evalf
|
|
#define fp_force_evalf fp_force_evalf
|
|
|
|
static inline void fp_force_evalf(float x) {
|
|
volatile float y;
|
|
y = x;
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifndef fp_force_eval
|
|
#define fp_force_eval fp_force_eval
|
|
|
|
static inline void fp_force_eval(double x) {
|
|
volatile double y;
|
|
y = x;
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifndef fp_force_evall
|
|
#define fp_force_evall fp_force_evall
|
|
|
|
static inline void fp_force_evall(long double x) {
|
|
volatile long double y;
|
|
y = x;
|
|
}
|
|
|
|
#endif
|
|
|
|
#define FORCE_EVAL(x) do { \
|
|
if (sizeof(x) == sizeof(float)) { \
|
|
fp_force_evalf(x); \
|
|
} else if (sizeof(x) == sizeof(double)) { \
|
|
fp_force_eval(x); \
|
|
} else { \
|
|
fp_force_evall(x); \
|
|
} \
|
|
} while(0)
|
|
|
|
#define asuint(f) ((union{float _f; uint32_t _i;}){f})._i
|
|
#define asfloat(i) ((union{uint32_t _i; float _f;}){i})._f
|
|
#define asuint64(f) ((union{double _f; uint64_t _i;}){f})._i
|
|
#define asdouble(i) ((union{uint64_t _i; double _f;}){i})._f
|
|
|
|
#define EXTRACT_WORDS(hi, lo, d) \
|
|
do { \
|
|
uint64_t __u = asuint64(d); \
|
|
(hi) = __u >> 32; \
|
|
(lo) = (uint32_t)__u; \
|
|
} while (0)
|
|
|
|
#define GET_HIGH_WORD(hi, d) \
|
|
do { \
|
|
(hi) = asuint64(d) >> 32; \
|
|
} while (0)
|
|
|
|
#define GET_LOW_WORD(lo, d) \
|
|
do { \
|
|
(lo) = (uint32_t)asuint64(d); \
|
|
} while (0)
|
|
|
|
#define INSERT_WORDS(d, hi, lo) \
|
|
do { \
|
|
(d) = asdouble(((uint64_t)(hi)<<32) | (uint32_t)(lo)); \
|
|
} while (0)
|
|
|
|
#define SET_HIGH_WORD(d, hi) \
|
|
INSERT_WORDS(d, hi, (uint32_t)asuint64(d))
|
|
|
|
#define SET_LOW_WORD(d, lo) \
|
|
INSERT_WORDS(d, asuint64(d)>>32, lo)
|
|
|
|
#define GET_FLOAT_WORD(w, d) \
|
|
do { \
|
|
(w) = asuint(d); \
|
|
} while (0)
|
|
|
|
#define SET_FLOAT_WORD(d, w) \
|
|
do { \
|
|
(d) = asfloat(w); \
|
|
} while (0)
|
|
|
|
static int __rem_pio2_large(double *, double *, int, int, int);
|
|
|
|
static int __rem_pio2(double, double *);
|
|
|
|
static double __sin(double, double, int);
|
|
|
|
static double __cos(double, double);
|
|
|
|
static double __tan(double, double, int);
|
|
|
|
static double __expo2(double, double);
|
|
|
|
static int __rem_pio2f(float, double *);
|
|
|
|
static float __sindf(double);
|
|
|
|
static float __cosdf(double);
|
|
|
|
static float __tandf(double, int);
|
|
|
|
static float __expo2f(float, float);
|
|
|
|
static int __rem_pio2l(long double, long double *);
|
|
|
|
static long double __sinl(long double, long double, int);
|
|
|
|
static long double __cosl(long double, long double);
|
|
|
|
static long double __tanl(long double, long double, int);
|
|
|
|
static long double __polevll(long double, const long double *, int);
|
|
|
|
static long double __p1evll(long double, const long double *, int);
|
|
|
|
//extern int __signgam;
|
|
static double __lgamma_r(double, int *);
|
|
|
|
static float __lgammaf_r(float, int *);
|
|
|
|
/* error handling functions */
|
|
static float __math_xflowf(uint32_t, float);
|
|
|
|
static float __math_uflowf(uint32_t);
|
|
|
|
static float __math_oflowf(uint32_t);
|
|
|
|
static float __math_divzerof(uint32_t);
|
|
|
|
static float __math_invalidf(float);
|
|
|
|
static double __math_xflow(uint32_t, double);
|
|
|
|
static double __math_uflow(uint32_t);
|
|
|
|
static double __math_oflow(uint32_t);
|
|
|
|
static double __math_divzero(uint32_t);
|
|
|
|
static double __math_invalid(double);
|
|
|
|
#if LDBL_MANT_DIG != DBL_MANT_DIG
|
|
|
|
static long double __math_invalidl(long double);
|
|
|
|
#endif
|
|
|
|
// src/math/__math_invalidf.c
|
|
static float __math_invalidf(float x)
|
|
{
|
|
return (x - x) / (x - x);
|
|
}
|
|
|
|
// src/math/truncf.c
|
|
|
|
float truncf(float x) {
|
|
union {
|
|
float f;
|
|
uint32_t i;
|
|
} u = {x};
|
|
int e = (int) (u.i >> 23 & 0xff) - 0x7f + 9;
|
|
uint32_t m;
|
|
|
|
if (e >= 23 + 9)
|
|
return x;
|
|
if (e < 9)
|
|
e = 1;
|
|
m = -1U >> e;
|
|
if ((u.i & m) == 0)
|
|
return x;
|
|
FORCE_EVAL(x + 0x1p120f);
|
|
u.i &= ~m;
|
|
return u.f;
|
|
}
|
|
|
|
// src/math/floor.c
|
|
|
|
#if FLT_EVAL_METHOD == 0 || FLT_EVAL_METHOD == 1
|
|
#define EPS DBL_EPSILON
|
|
#elif FLT_EVAL_METHOD == 2
|
|
#define EPS LDBL_EPSILON
|
|
#endif
|
|
static const double toint = 1 / EPS;
|
|
|
|
double floor(double x) {
|
|
union {
|
|
double f;
|
|
uint64_t i;
|
|
} u = {x};
|
|
int e = u.i >> 52 & 0x7ff;
|
|
double y;
|
|
|
|
if (e >= 0x3ff + 52 || x == 0)
|
|
return x;
|
|
/* y = int(x) - x, where int(x) is an integer neighbor of x */
|
|
if (u.i >> 63)
|
|
y = x - toint + toint - x;
|
|
else
|
|
y = x + toint - toint - x;
|
|
/* special case because of non-nearest rounding modes */
|
|
if (e <= 0x3ff - 1) {
|
|
FORCE_EVAL(y);
|
|
return u.i >> 63 ? -1 : 0;
|
|
}
|
|
if (y > 0)
|
|
return x + y - 1;
|
|
return x + y;
|
|
}
|
|
|
|
// src/math/fmodf.c
|
|
|
|
float fmodf(float x, float y) {
|
|
union {
|
|
float f;
|
|
uint32_t i;
|
|
} ux = {x}, uy = {y};
|
|
int ex = ux.i >> 23 & 0xff;
|
|
int ey = uy.i >> 23 & 0xff;
|
|
uint32_t sx = ux.i & 0x80000000;
|
|
uint32_t i;
|
|
uint32_t uxi = ux.i;
|
|
|
|
if (uy.i << 1 == 0 || __builtin_isnan(y) || ex == 0xff)
|
|
return (x * y) / (x * y);
|
|
if (uxi << 1 <= uy.i << 1) {
|
|
if (uxi << 1 == uy.i << 1)
|
|
return 0 * x;
|
|
return x;
|
|
}
|
|
|
|
/* normalize x and y */
|
|
if (!ex) {
|
|
for (i = uxi << 9; i >> 31 == 0; ex--, i <<= 1);
|
|
uxi <<= -ex + 1;
|
|
} else {
|
|
uxi &= -1U >> 9;
|
|
uxi |= 1U << 23;
|
|
}
|
|
if (!ey) {
|
|
for (i = uy.i << 9; i >> 31 == 0; ey--, i <<= 1);
|
|
uy.i <<= -ey + 1;
|
|
} else {
|
|
uy.i &= -1U >> 9;
|
|
uy.i |= 1U << 23;
|
|
}
|
|
|
|
/* x mod y */
|
|
for (; ex > ey; ex--) {
|
|
i = uxi - uy.i;
|
|
if (i >> 31 == 0) {
|
|
if (i == 0)
|
|
return 0 * x;
|
|
uxi = i;
|
|
}
|
|
uxi <<= 1;
|
|
}
|
|
i = uxi - uy.i;
|
|
if (i >> 31 == 0) {
|
|
if (i == 0)
|
|
return 0 * x;
|
|
uxi = i;
|
|
}
|
|
for (; uxi >> 23 == 0; uxi <<= 1, ex--);
|
|
|
|
/* scale result up */
|
|
if (ex > 0) {
|
|
uxi -= 1U << 23;
|
|
uxi |= (uint32_t) ex << 23;
|
|
} else {
|
|
uxi >>= -ex + 1;
|
|
}
|
|
uxi |= sx;
|
|
ux.i = uxi;
|
|
return ux.f;
|
|
}
|
|
|
|
// src/string/memset.c
|
|
|
|
void *memset(void *dest, int c, size_t n) {
|
|
unsigned char *s = dest;
|
|
size_t k;
|
|
|
|
/* Fill head and tail with minimal branching. Each
|
|
* conditional ensures that all the subsequently used
|
|
* offsets are well-defined and in the dest region. */
|
|
|
|
if (!n) return dest;
|
|
s[0] = c;
|
|
s[n - 1] = c;
|
|
if (n <= 2) return dest;
|
|
s[1] = c;
|
|
s[2] = c;
|
|
s[n - 2] = c;
|
|
s[n - 3] = c;
|
|
if (n <= 6) return dest;
|
|
s[3] = c;
|
|
s[n - 4] = c;
|
|
if (n <= 8) return dest;
|
|
|
|
/* Advance pointer to align it at a 4-byte boundary,
|
|
* and truncate n to a multiple of 4. The previous code
|
|
* already took care of any head/tail that get cut off
|
|
* by the alignment. */
|
|
|
|
k = -(uintptr_t) s & 3;
|
|
s += k;
|
|
n -= k;
|
|
n &= -4;
|
|
|
|
#ifdef __GNUC__
|
|
typedef uint32_t __attribute__((__may_alias__)) u32;
|
|
typedef uint64_t __attribute__((__may_alias__)) u64;
|
|
|
|
u32 c32 = ((u32) -1) / 255 * (unsigned char) c;
|
|
|
|
/* In preparation to copy 32 bytes at a time, aligned on
|
|
* an 8-byte bounary, fill head/tail up to 28 bytes each.
|
|
* As in the initial byte-based head/tail fill, each
|
|
* conditional below ensures that the subsequent offsets
|
|
* are valid (e.g. !(n<=24) implies n>=28). */
|
|
|
|
*(u32 *) (s + 0) = c32;
|
|
*(u32 *) (s + n - 4) = c32;
|
|
if (n <= 8) return dest;
|
|
*(u32 *) (s + 4) = c32;
|
|
*(u32 *) (s + 8) = c32;
|
|
*(u32 *) (s + n - 12) = c32;
|
|
*(u32 *) (s + n - 8) = c32;
|
|
if (n <= 24) return dest;
|
|
*(u32 *) (s + 12) = c32;
|
|
*(u32 *) (s + 16) = c32;
|
|
*(u32 *) (s + 20) = c32;
|
|
*(u32 *) (s + 24) = c32;
|
|
*(u32 *) (s + n - 28) = c32;
|
|
*(u32 *) (s + n - 24) = c32;
|
|
*(u32 *) (s + n - 20) = c32;
|
|
*(u32 *) (s + n - 16) = c32;
|
|
|
|
/* Align to a multiple of 8 so we can fill 64 bits at a time,
|
|
* and avoid writing the same bytes twice as much as is
|
|
* practical without introducing additional branching. */
|
|
|
|
k = 24 + ((uintptr_t) s & 4);
|
|
s += k;
|
|
n -= k;
|
|
|
|
/* If this loop is reached, 28 tail bytes have already been
|
|
* filled, so any remainder when n drops below 32 can be
|
|
* safely ignored. */
|
|
|
|
u64 c64 = c32 | ((u64) c32 << 32);
|
|
for (; n >= 32; n -= 32, s += 32) {
|
|
*(u64 *) (s + 0) = c64;
|
|
*(u64 *) (s + 8) = c64;
|
|
*(u64 *) (s + 16) = c64;
|
|
*(u64 *) (s + 24) = c64;
|
|
}
|
|
#else
|
|
/* Pure C fallback with no aliasing violations. */
|
|
for (; n; n--, s++) *s = c;
|
|
#endif
|
|
|
|
return dest;
|
|
}
|
|
|
|
// src/math/sqrt_data.[c|h]
|
|
|
|
/* if x in [1,2): i = (int)(64*x);
|
|
if x in [2,4): i = (int)(32*x-64);
|
|
__rsqrt_tab[i]*2^-16 is estimating 1/sqrt(x) with small relative error:
|
|
|__rsqrt_tab[i]*0x1p-16*sqrt(x) - 1| < -0x1.fdp-9 < 2^-8 */
|
|
extern const uint16_t __rsqrt_tab[128] = {
|
|
0xb451, 0xb2f0, 0xb196, 0xb044, 0xaef9, 0xadb6, 0xac79, 0xab43,
|
|
0xaa14, 0xa8eb, 0xa7c8, 0xa6aa, 0xa592, 0xa480, 0xa373, 0xa26b,
|
|
0xa168, 0xa06a, 0x9f70, 0x9e7b, 0x9d8a, 0x9c9d, 0x9bb5, 0x9ad1,
|
|
0x99f0, 0x9913, 0x983a, 0x9765, 0x9693, 0x95c4, 0x94f8, 0x9430,
|
|
0x936b, 0x92a9, 0x91ea, 0x912e, 0x9075, 0x8fbe, 0x8f0a, 0x8e59,
|
|
0x8daa, 0x8cfe, 0x8c54, 0x8bac, 0x8b07, 0x8a64, 0x89c4, 0x8925,
|
|
0x8889, 0x87ee, 0x8756, 0x86c0, 0x862b, 0x8599, 0x8508, 0x8479,
|
|
0x83ec, 0x8361, 0x82d8, 0x8250, 0x81c9, 0x8145, 0x80c2, 0x8040,
|
|
0xff02, 0xfd0e, 0xfb25, 0xf947, 0xf773, 0xf5aa, 0xf3ea, 0xf234,
|
|
0xf087, 0xeee3, 0xed47, 0xebb3, 0xea27, 0xe8a3, 0xe727, 0xe5b2,
|
|
0xe443, 0xe2dc, 0xe17a, 0xe020, 0xdecb, 0xdd7d, 0xdc34, 0xdaf1,
|
|
0xd9b3, 0xd87b, 0xd748, 0xd61a, 0xd4f1, 0xd3cd, 0xd2ad, 0xd192,
|
|
0xd07b, 0xcf69, 0xce5b, 0xcd51, 0xcc4a, 0xcb48, 0xca4a, 0xc94f,
|
|
0xc858, 0xc764, 0xc674, 0xc587, 0xc49d, 0xc3b7, 0xc2d4, 0xc1f4,
|
|
0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
|
|
0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
|
|
};
|
|
|
|
// src/math/sqrtf.c
|
|
#define FENV_SUPPORT 1
|
|
|
|
static inline uint32_t mul32(uint32_t a, uint32_t b) {
|
|
return (uint64_t) a * b >> 32;
|
|
}
|
|
|
|
/* see sqrt.c for more detailed comments. */
|
|
|
|
float sqrtf(float x) {
|
|
uint32_t ix, m, m1, m0, even, ey;
|
|
|
|
ix = asuint(x);
|
|
if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
|
|
/* x < 0x1p-126 or inf or nan. */
|
|
if (ix * 2 == 0)
|
|
return x;
|
|
if (ix == 0x7f800000)
|
|
return x;
|
|
if (ix > 0x7f800000)
|
|
return __math_invalidf(x);
|
|
/* x is subnormal, normalize it. */
|
|
ix = asuint(x * 0x1p23f);
|
|
ix -= 23 << 23;
|
|
}
|
|
|
|
/* x = 4^e m; with int e and m in [1, 4). */
|
|
even = ix & 0x00800000;
|
|
m1 = (ix << 8) | 0x80000000;
|
|
m0 = (ix << 7) & 0x7fffffff;
|
|
m = even ? m0 : m1;
|
|
|
|
/* 2^e is the exponent part of the return value. */
|
|
ey = ix >> 1;
|
|
ey += 0x3f800000 >> 1;
|
|
ey &= 0x7f800000;
|
|
|
|
/* compute r ~ 1/sqrt(m), s ~ sqrt(m) with 2 goldschmidt iterations. */
|
|
static const uint32_t three = 0xc0000000;
|
|
uint32_t r, s, d, u, i;
|
|
i = (ix >> 17) % 128;
|
|
r = (uint32_t) __rsqrt_tab[i] << 16;
|
|
/* |r*sqrt(m) - 1| < 0x1p-8 */
|
|
s = mul32(m, r);
|
|
/* |s/sqrt(m) - 1| < 0x1p-8 */
|
|
d = mul32(s, r);
|
|
u = three - d;
|
|
r = mul32(r, u) << 1;
|
|
/* |r*sqrt(m) - 1| < 0x1.7bp-16 */
|
|
s = mul32(s, u) << 1;
|
|
/* |s/sqrt(m) - 1| < 0x1.7bp-16 */
|
|
d = mul32(s, r);
|
|
u = three - d;
|
|
s = mul32(s, u);
|
|
/* -0x1.03p-28 < s/sqrt(m) - 1 < 0x1.fp-31 */
|
|
s = (s - 1) >> 6;
|
|
/* s < sqrt(m) < s + 0x1.08p-23 */
|
|
|
|
/* compute nearest rounded result. */
|
|
uint32_t d0, d1, d2;
|
|
float y, t;
|
|
d0 = (m << 16) - s * s;
|
|
d1 = s - d0;
|
|
d2 = d1 + s + 1;
|
|
s += d1 >> 31;
|
|
s &= 0x007fffff;
|
|
s |= ey;
|
|
y = asfloat(s);
|
|
if (FENV_SUPPORT) {
|
|
/* handle rounding and inexact exception. */
|
|
uint32_t tiny = predict_false(d2 == 0) ? 0 : 0x01000000;
|
|
tiny |= (d1 ^ d2) & 0x80000000;
|
|
t = asfloat(tiny);
|
|
y = eval_as_float(y + t);
|
|
}
|
|
return y;
|
|
}
|
|
|