I guess you do a bit of rounding then? If you do, it may be that it is possible to improve that by a lot of bit-fiddling … On x64 I've seen significant performance improvement, between 20% and 300%, for the following: isnan, isinf, signbit, frexp, min, max, trunc, round, clamp and lerp I have no idea about how well this will work out for an ARM cpu, but here is the core of my implementation (Sorry about the formatting, paste and encode as HTML doesn't work well for C++ code anymore :(():
template
struct FractionWidth;
template <>
struct FractionWidth
{
static constexpr UInt32 value = 23;
};
template <>
struct FractionWidth
{
static constexpr UInt32 value = 52;
};
template
struct ExponentWidth;
template <>
struct ExponentWidth
{
static constexpr UInt32 value = 8;
};
template <>
struct ExponentWidth
{
static constexpr UInt32 value = 11;
};
template
struct ExponenBias;
template <>
struct ExponenBias
{
static constexpr UInt32 value = _FBIAS;
};
template <>
struct ExponenBias
{
static constexpr UInt32 value = _DBIAS;
};
template
struct InfinityUnsignedValue;
template <>
struct InfinityUnsignedValue
{
static constexpr UInt32 value = 0X7F800000UL;
};
template <>
struct InfinityUnsignedValue
{
static constexpr UInt64 value = 0x7FF0000000000000ULL;
};
template
struct NegativeInfinityUnsignedValue;
template <>
struct NegativeInfinityUnsignedValue
{
static constexpr UInt32 value = 0xFF800000UL;
};
template <>
struct NegativeInfinityUnsignedValue
{
static constexpr UInt64 value = 0xFFF0000000000000ULL;
};
template
struct QuietNaNUnsignedValue;
template <>
struct QuietNaNUnsignedValue
{
static constexpr UInt32 value = 0XFFC00001UL;
};
template <>
struct QuietNaNUnsignedValue
{
static constexpr UInt64 value = 0x7FF0000000000001ULL;
};
#pragma pack(push,1)
template
struct FloatingPoint
{
using ValueType = std::remove_cvref_t;
using UIntType = MakeUnsigned;
static constexpr Int32 FractionWidth = static\_cast( Internal::FractionWidth::value );
static constexpr Int32 ExponentWidth = static\_cast( Internal::ExponentWidth::value );
static constexpr Int32 ExponentBias = ( 1 << ( ExponentWidth - 1 ) ) - 1;
st