Skip to content

Commit dde8fc1

Browse files
committed
[libc][math] Refactor exp2m1f16 implementation to header-only in src/__support/math folder.
1 parent c771895 commit dde8fc1

File tree

9 files changed

+250
-167
lines changed

9 files changed

+250
-167
lines changed

libc/shared/math.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
#include "math/exp2f.h"
5252
#include "math/exp2f16.h"
5353
#include "math/exp2m1f.h"
54+
#include "math/exp2m1f16.h"
5455
#include "math/expf.h"
5556
#include "math/expf16.h"
5657
#include "math/frexpf.h"

libc/shared/math/exp2m1f16.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//===-- Shared exp2m1f16 function -------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SHARED_MATH_EXP2M1F16_H
10+
#define LLVM_LIBC_SHARED_MATH_EXP2M1F16_H
11+
12+
#include "include/llvm-libc-macros/float16-macros.h"
13+
#include "shared/libc_common.h"
14+
15+
#ifdef LIBC_TYPES_HAS_FLOAT16
16+
17+
#include "src/__support/math/exp2m1f16.h"
18+
19+
namespace LIBC_NAMESPACE_DECL {
20+
namespace shared {
21+
22+
using math::exp2m1f16;
23+
24+
} // namespace shared
25+
} // namespace LIBC_NAMESPACE_DECL
26+
27+
#endif // LIBC_TYPES_HAS_FLOAT16
28+
29+
#endif // LLVM_LIBC_SHARED_MATH_EXP2M1F16_H

libc/src/__support/math/CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,24 @@ add_header_library(
785785
libc.src.__support.macros.properties.cpu_features
786786
)
787787

788+
add_header_library(
789+
exp2m1f16
790+
HDRS
791+
exp2m1f16.h
792+
DEPENDS
793+
.expxf16_utils
794+
libc.src.__support.common
795+
libc.src.__support.FPUtil.cast
796+
libc.src.__support.FPUtil.except_value_utils
797+
libc.src.__support.FPUtil.fenv_impl
798+
libc.src.__support.FPUtil.fp_bits
799+
libc.src.__support.FPUtil.multiply_add
800+
libc.src.__support.FPUtil.polyeval
801+
libc.src.__support.FPUtil.rounding_mode
802+
libc.src.__support.macros.optimization
803+
libc.src.__support.macros.properties.cpu_features
804+
)
805+
788806
add_header_library(
789807
exp10
790808
HDRS
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
//===-- Implementation header for exp2m1f16 ----------------------*- C++-*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F16_H
10+
#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F16_H
11+
12+
#include "include/llvm-libc-macros/float16-macros.h"
13+
14+
#ifdef LIBC_TYPES_HAS_FLOAT16
15+
16+
#include "src/__support/FPUtil/FEnvImpl.h"
17+
#include "src/__support/FPUtil/FPBits.h"
18+
#include "src/__support/FPUtil/PolyEval.h"
19+
#include "src/__support/FPUtil/cast.h"
20+
#include "src/__support/FPUtil/except_value_utils.h"
21+
#include "src/__support/FPUtil/multiply_add.h"
22+
#include "src/__support/FPUtil/rounding_mode.h"
23+
#include "src/__support/macros/config.h"
24+
#include "src/__support/macros/optimization.h"
25+
#include "src/__support/macros/properties/cpu_features.h"
26+
#include "src/__support/math/expxf16_utils.h"
27+
28+
namespace LIBC_NAMESPACE_DECL {
29+
30+
namespace math {
31+
32+
LIBC_INLINE static constexpr float16 exp2m1f16(float16 x) {
33+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
34+
constexpr fputil::ExceptValues<float16, 6> EXP2M1F16_EXCEPTS_LO = {{
35+
// (input, RZ output, RU offset, RD offset, RN offset)
36+
// x = 0x1.cf4p-13, exp2m1f16(x) = 0x1.41p-13 (RZ)
37+
{0x0b3dU, 0x0904U, 1U, 0U, 1U},
38+
// x = 0x1.4fcp-12, exp2m1f16(x) = 0x1.d14p-13 (RZ)
39+
{0x0d3fU, 0x0b45U, 1U, 0U, 1U},
40+
// x = 0x1.63p-11, exp2m1f16(x) = 0x1.ec4p-12 (RZ)
41+
{0x118cU, 0x0fb1U, 1U, 0U, 0U},
42+
// x = 0x1.6fp-7, exp2m1f16(x) = 0x1.fe8p-8 (RZ)
43+
{0x21bcU, 0x1ffaU, 1U, 0U, 1U},
44+
// x = -0x1.c6p-10, exp2m1f16(x) = -0x1.3a8p-10 (RZ)
45+
{0x9718U, 0x94eaU, 0U, 1U, 0U},
46+
// x = -0x1.cfcp-10, exp2m1f16(x) = -0x1.414p-10 (RZ)
47+
{0x973fU, 0x9505U, 0U, 1U, 0U},
48+
}};
49+
50+
#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
51+
constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 6;
52+
#else
53+
constexpr size_t N_EXP2M1F16_EXCEPTS_HI = 7;
54+
#endif
55+
56+
constexpr fputil::ExceptValues<float16, N_EXP2M1F16_EXCEPTS_HI>
57+
EXP2M1F16_EXCEPTS_HI = {{
58+
// (input, RZ output, RU offset, RD offset, RN offset)
59+
// x = 0x1.e58p-3, exp2m1f16(x) = 0x1.6dcp-3 (RZ)
60+
{0x3396U, 0x31b7U, 1U, 0U, 0U},
61+
#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
62+
// x = 0x1.2e8p-2, exp2m1f16(x) = 0x1.d14p-3 (RZ)
63+
{0x34baU, 0x3345U, 1U, 0U, 0U},
64+
#endif
65+
// x = 0x1.ad8p-2, exp2m1f16(x) = 0x1.598p-2 (RZ)
66+
{0x36b6U, 0x3566U, 1U, 0U, 0U},
67+
#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
68+
// x = 0x1.edcp-2, exp2m1f16(x) = 0x1.964p-2 (RZ)
69+
{0x37b7U, 0x3659U, 1U, 0U, 1U},
70+
#endif
71+
// x = -0x1.804p-3, exp2m1f16(x) = -0x1.f34p-4 (RZ)
72+
{0xb201U, 0xafcdU, 0U, 1U, 1U},
73+
// x = -0x1.f3p-3, exp2m1f16(x) = -0x1.3e4p-3 (RZ)
74+
{0xb3ccU, 0xb0f9U, 0U, 1U, 0U},
75+
// x = -0x1.294p-1, exp2m1f16(x) = -0x1.53p-2 (RZ)
76+
{0xb8a5U, 0xb54cU, 0U, 1U, 1U},
77+
#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
78+
// x = -0x1.a34p-1, exp2m1f16(x) = -0x1.bb4p-2 (RZ)
79+
{0xba8dU, 0xb6edU, 0U, 1U, 1U},
80+
#endif
81+
}};
82+
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
83+
84+
using namespace math::expxf16_internal;
85+
using FPBits = fputil::FPBits<float16>;
86+
FPBits x_bits(x);
87+
88+
uint16_t x_u = x_bits.uintval();
89+
uint16_t x_abs = x_u & 0x7fffU;
90+
91+
// When |x| <= 2^(-3), or |x| >= 11, or x is NaN.
92+
if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x4980U)) {
93+
// exp2m1(NaN) = NaN
94+
if (x_bits.is_nan()) {
95+
if (x_bits.is_signaling_nan()) {
96+
fputil::raise_except_if_required(FE_INVALID);
97+
return FPBits::quiet_nan().get_val();
98+
}
99+
100+
return x;
101+
}
102+
103+
// When x >= 16.
104+
if (x_u >= 0x4c00 && x_bits.is_pos()) {
105+
// exp2m1(+inf) = +inf
106+
if (x_bits.is_inf())
107+
return FPBits::inf().get_val();
108+
109+
switch (fputil::quick_get_round()) {
110+
case FE_TONEAREST:
111+
case FE_UPWARD:
112+
fputil::set_errno_if_required(ERANGE);
113+
fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT);
114+
return FPBits::inf().get_val();
115+
default:
116+
return FPBits::max_normal().get_val();
117+
}
118+
}
119+
120+
// When x < -11.
121+
if (x_u > 0xc980U) {
122+
// exp2m1(-inf) = -1
123+
if (x_bits.is_inf())
124+
return FPBits::one(Sign::NEG).get_val();
125+
126+
// When -12 < x < -11, round(2^x - 1, HP, RN) = -0x1.ffcp-1.
127+
if (x_u < 0xca00U)
128+
return fputil::round_result_slightly_down(
129+
fputil::cast<float16>(-0x1.ffcp-1));
130+
131+
// When x <= -12, round(2^x - 1, HP, RN) = -1.
132+
switch (fputil::quick_get_round()) {
133+
case FE_TONEAREST:
134+
case FE_DOWNWARD:
135+
return FPBits::one(Sign::NEG).get_val();
136+
default:
137+
return fputil::cast<float16>(-0x1.ffcp-1);
138+
}
139+
}
140+
141+
// When |x| <= 2^(-3).
142+
if (x_abs <= 0x3000U) {
143+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
144+
if (auto r = EXP2M1F16_EXCEPTS_LO.lookup(x_u);
145+
LIBC_UNLIKELY(r.has_value()))
146+
return r.value();
147+
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
148+
149+
float xf = x;
150+
// Degree-5 minimax polynomial generated by Sollya with the following
151+
// commands:
152+
// > display = hexadecimal;
153+
// > P = fpminimax((2^x - 1)/x, 4, [|SG...|], [-2^-3, 2^-3]);
154+
// > x * P;
155+
return fputil::cast<float16>(
156+
xf * fputil::polyeval(xf, 0x1.62e43p-1f, 0x1.ebfbdep-3f,
157+
0x1.c6af88p-5f, 0x1.3b45d6p-7f,
158+
0x1.641e7cp-10f));
159+
}
160+
}
161+
162+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
163+
if (auto r = EXP2M1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
164+
return r.value();
165+
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
166+
167+
// exp2(x) = exp2(hi + mid) * exp2(lo)
168+
auto [exp2_hi_mid, exp2_lo] = exp2_range_reduction(x);
169+
// exp2m1(x) = exp2(hi + mid) * exp2(lo) - 1
170+
return fputil::cast<float16>(
171+
fputil::multiply_add(exp2_hi_mid, exp2_lo, -1.0f));
172+
}
173+
174+
} // namespace math
175+
176+
} // namespace LIBC_NAMESPACE_DECL
177+
178+
#endif // LIBC_TYPES_HAS_FLOAT16
179+
180+
#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP2M1F16_H

libc/src/math/generic/CMakeLists.txt

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1488,19 +1488,7 @@ add_entrypoint_object(
14881488
HDRS
14891489
../exp2m1f16.h
14901490
DEPENDS
1491-
libc.hdr.errno_macros
1492-
libc.hdr.fenv_macros
1493-
libc.src.__support.common
1494-
libc.src.__support.FPUtil.cast
1495-
libc.src.__support.FPUtil.except_value_utils
1496-
libc.src.__support.FPUtil.fenv_impl
1497-
libc.src.__support.FPUtil.fp_bits
1498-
libc.src.__support.FPUtil.multiply_add
1499-
libc.src.__support.FPUtil.polyeval
1500-
libc.src.__support.FPUtil.rounding_mode
1501-
libc.src.__support.macros.optimization
1502-
libc.src.__support.macros.properties.cpu_features
1503-
libc.src.__support.math.expxf16_utils
1491+
libc.src.__support.math.exp2m1f16
15041492
)
15051493

15061494
add_entrypoint_object(

0 commit comments

Comments
 (0)