Skip to content

Commit 6c032fa

Browse files
authored
[libc][math] Refactor exp10m1f16 implementation to header-only in src/__support/math folder. (#161119)
Part of #147386 in preparation for: https://discourse.llvm.org/t/rfc-make-clang-builtin-math-functions-constexpr-with-llvm-libc-to-support-c-23-constexpr-math-functions/86450
1 parent 6958633 commit 6c032fa

File tree

9 files changed

+254
-169
lines changed

9 files changed

+254
-169
lines changed

libc/shared/math.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include "math/exp10f.h"
4747
#include "math/exp10f16.h"
4848
#include "math/exp10m1f.h"
49+
#include "math/exp10m1f16.h"
4950
#include "math/expf.h"
5051
#include "math/expf16.h"
5152
#include "math/frexpf.h"

libc/shared/math/exp10m1f16.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//===-- Shared exp10m1f16 function ------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SHARED_MATH_EXP10M1F16_H
10+
#define LLVM_LIBC_SHARED_MATH_EXP10M1F16_H
11+
12+
#include "include/llvm-libc-macros/float16-macros.h"
13+
#include "shared/libc_common.h"
14+
15+
#ifdef LIBC_TYPES_HAS_FLOAT16
16+
17+
#include "src/__support/math/exp10m1f16.h"
18+
19+
namespace LIBC_NAMESPACE_DECL {
20+
namespace shared {
21+
22+
using math::exp10m1f16;
23+
24+
} // namespace shared
25+
} // namespace LIBC_NAMESPACE_DECL
26+
27+
#endif // LIBC_TYPES_HAS_FLOAT16
28+
29+
#endif // LLVM_LIBC_SHARED_MATH_EXP10M1F16_H

libc/src/__support/math/CMakeLists.txt

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,23 @@ add_header_library(
498498
libc.src.__support.macros.optimization
499499
)
500500

501+
add_header_library(
502+
exp10m1f16
503+
HDRS
504+
exp10m1f16.h
505+
DEPENDS
506+
.exp10f16_utils
507+
libc.src.__support.FPUtil.cast
508+
libc.src.__support.FPUtil.except_value_utils
509+
libc.src.__support.FPUtil.fenv_impl
510+
libc.src.__support.FPUtil.fp_bits
511+
libc.src.__support.FPUtil.multiply_add
512+
libc.src.__support.FPUtil.polyeval
513+
libc.src.__support.FPUtil.rounding_mode
514+
libc.src.__support.macros.optimization
515+
libc.src.__support.macros.properties.cpu_features
516+
)
517+
501518
add_header_library(
502519
erff
503520
HDRS
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
//===-- Implementation header for exp10m1f16 --------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXP10M1F16_H
10+
#define LLVM_LIBC_SRC___SUPPORT_MATH_EXP10M1F16_H
11+
12+
#include "include/llvm-libc-macros/float16-macros.h"
13+
14+
#ifdef LIBC_TYPES_HAS_FLOAT16
15+
16+
#include "exp10f16_utils.h"
17+
#include "src/__support/FPUtil/FEnvImpl.h"
18+
#include "src/__support/FPUtil/FPBits.h"
19+
#include "src/__support/FPUtil/PolyEval.h"
20+
#include "src/__support/FPUtil/cast.h"
21+
#include "src/__support/FPUtil/except_value_utils.h"
22+
#include "src/__support/FPUtil/multiply_add.h"
23+
#include "src/__support/FPUtil/rounding_mode.h"
24+
#include "src/__support/common.h"
25+
#include "src/__support/macros/config.h"
26+
#include "src/__support/macros/optimization.h"
27+
#include "src/__support/macros/properties/cpu_features.h"
28+
29+
namespace LIBC_NAMESPACE_DECL {
30+
31+
namespace math {
32+
33+
LIBC_INLINE static constexpr float16 exp10m1f16(float16 x) {
34+
35+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
36+
constexpr fputil::ExceptValues<float16, 3> EXP10M1F16_EXCEPTS_LO = {{
37+
// (input, RZ output, RU offset, RD offset, RN offset)
38+
// x = 0x1.5c4p-4, exp10m1f16(x) = 0x1.bacp-3 (RZ)
39+
{0x2d71U, 0x32ebU, 1U, 0U, 0U},
40+
// x = -0x1.5ep-13, exp10m1f16(x) = -0x1.92cp-12 (RZ)
41+
{0x8978U, 0x8e4bU, 0U, 1U, 0U},
42+
// x = -0x1.e2p-10, exp10m1f16(x) = -0x1.14cp-8 (RZ)
43+
{0x9788U, 0x9c53U, 0U, 1U, 0U},
44+
}};
45+
46+
#ifdef LIBC_TARGET_CPU_HAS_FMA_FLOAT
47+
constexpr size_t N_EXP10M1F16_EXCEPTS_HI = 3;
48+
#else
49+
constexpr size_t N_EXP10M1F16_EXCEPTS_HI = 6;
50+
#endif
51+
52+
constexpr fputil::ExceptValues<float16, N_EXP10M1F16_EXCEPTS_HI>
53+
EXP10M1F16_EXCEPTS_HI = {{
54+
// (input, RZ output, RU offset, RD offset, RN offset)
55+
// x = 0x1.8f4p-2, exp10m1f16(x) = 0x1.744p+0 (RZ)
56+
{0x363dU, 0x3dd1U, 1U, 0U, 0U},
57+
// x = 0x1.95cp-2, exp10m1f16(x) = 0x1.7d8p+0 (RZ)
58+
{0x3657U, 0x3df6U, 1U, 0U, 0U},
59+
// x = 0x1.d04p-2, exp10m1f16(x) = 0x1.d7p+0 (RZ)
60+
{0x3741U, 0x3f5cU, 1U, 0U, 1U},
61+
#ifndef LIBC_TARGET_CPU_HAS_FMA_FLOAT
62+
// x = 0x1.0cp+1, exp10m1f16(x) = 0x1.ec4p+6 (RZ)
63+
{0x4030U, 0x57b1U, 1U, 0U, 1U},
64+
// x = 0x1.1b8p+1, exp10m1f16(x) = 0x1.45cp+7 (RZ)
65+
{0x406eU, 0x5917U, 1U, 0U, 1U},
66+
// x = 0x1.2f4p+2, exp10m1f16(x) = 0x1.ab8p+15 (RZ)
67+
{0x44bdU, 0x7aaeU, 1U, 0U, 1U},
68+
#endif
69+
}};
70+
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
71+
72+
using FPBits = fputil::FPBits<float16>;
73+
FPBits x_bits(x);
74+
75+
uint16_t x_u = x_bits.uintval();
76+
uint16_t x_abs = x_u & 0x7fffU;
77+
78+
// When |x| <= 2^(-3), or |x| >= 11 * log10(2), or x is NaN.
79+
if (LIBC_UNLIKELY(x_abs <= 0x3000U || x_abs >= 0x429fU)) {
80+
// exp10m1(NaN) = NaN
81+
if (x_bits.is_nan()) {
82+
if (x_bits.is_signaling_nan()) {
83+
fputil::raise_except_if_required(FE_INVALID);
84+
return FPBits::quiet_nan().get_val();
85+
}
86+
87+
return x;
88+
}
89+
90+
// When x >= 16 * log10(2).
91+
if (x_u >= 0x44d1U && x_bits.is_pos()) {
92+
// exp10m1(+inf) = +inf
93+
if (x_bits.is_inf())
94+
return FPBits::inf().get_val();
95+
96+
switch (fputil::quick_get_round()) {
97+
case FE_TONEAREST:
98+
case FE_UPWARD:
99+
fputil::set_errno_if_required(ERANGE);
100+
fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT);
101+
return FPBits::inf().get_val();
102+
default:
103+
return FPBits::max_normal().get_val();
104+
}
105+
}
106+
107+
// When x < -11 * log10(2).
108+
if (x_u > 0xc29fU) {
109+
// exp10m1(-inf) = -1
110+
if (x_bits.is_inf())
111+
return FPBits::one(Sign::NEG).get_val();
112+
113+
// When x >= -0x1.ce4p+1, round(10^x - 1, HP, RN) = -0x1.ffcp-1.
114+
if (x_u <= 0xc339U) {
115+
return fputil::round_result_slightly_down(
116+
fputil::cast<float16>(-0x1.ffcp-1));
117+
}
118+
119+
// When x < -0x1.ce4p+1, round(10^x - 1, HP, RN) = -1.
120+
switch (fputil::quick_get_round()) {
121+
case FE_TONEAREST:
122+
case FE_DOWNWARD:
123+
return FPBits::one(Sign::NEG).get_val();
124+
default:
125+
return fputil::cast<float16>(-0x1.ffcp-1);
126+
}
127+
}
128+
129+
// When |x| <= 2^(-3).
130+
if (x_abs <= 0x3000U) {
131+
if (LIBC_UNLIKELY(x_abs == 0))
132+
return x;
133+
134+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
135+
if (auto r = EXP10M1F16_EXCEPTS_LO.lookup(x_u);
136+
LIBC_UNLIKELY(r.has_value()))
137+
return r.value();
138+
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
139+
140+
float xf = x;
141+
// Degree-5 minimax polynomial generated by Sollya with the following
142+
// commands:
143+
// > display = hexadecimal;
144+
// > P = fpminimax((10^x - 1)/x, 4, [|SG...|], [-2^-3, 2^-3]);
145+
// > x * P;
146+
return fputil::cast<float16>(
147+
xf * fputil::polyeval(xf, 0x1.26bb1cp+1f, 0x1.5351c8p+1f,
148+
0x1.04704p+1f, 0x1.2ce084p+0f, 0x1.14a6bep-1f));
149+
}
150+
}
151+
152+
// When x is 1, 2, or 3. These are hard-to-round cases with exact results.
153+
// 10^4 - 1 = 9'999 is not exactly representable as a float16, but luckily the
154+
// polynomial approximation gives the correct result for x = 4 in all
155+
// rounding modes.
156+
if (LIBC_UNLIKELY((x_u & ~(0x3c00U | 0x4000U | 0x4200U | 0x4400U)) == 0)) {
157+
switch (x_u) {
158+
case 0x3c00U: // x = 1.0f16
159+
return fputil::cast<float16>(9.0);
160+
case 0x4000U: // x = 2.0f16
161+
return fputil::cast<float16>(99.0);
162+
case 0x4200U: // x = 3.0f16
163+
return fputil::cast<float16>(999.0);
164+
}
165+
}
166+
167+
#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
168+
if (auto r = EXP10M1F16_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value()))
169+
return r.value();
170+
#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
171+
172+
// exp10(x) = exp2((hi + mid) * log2(10)) * exp10(lo)
173+
auto [exp2_hi_mid, exp10_lo] = exp10_range_reduction(x);
174+
// exp10m1(x) = exp2((hi + mid) * log2(lo)) * exp10(lo) - 1
175+
return fputil::cast<float16>(
176+
fputil::multiply_add(exp2_hi_mid, exp10_lo, -1.0f));
177+
}
178+
179+
} // namespace math
180+
181+
} // namespace LIBC_NAMESPACE_DECL
182+
183+
#endif // LIBC_TYPES_HAS_FLOAT16
184+
185+
#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXP10M1F16_H

libc/src/math/generic/CMakeLists.txt

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1603,18 +1603,7 @@ add_entrypoint_object(
16031603
HDRS
16041604
../exp10m1f16.h
16051605
DEPENDS
1606-
libc.hdr.errno_macros
1607-
libc.hdr.fenv_macros
1608-
libc.src.__support.FPUtil.cast
1609-
libc.src.__support.FPUtil.except_value_utils
1610-
libc.src.__support.FPUtil.fenv_impl
1611-
libc.src.__support.FPUtil.fp_bits
1612-
libc.src.__support.FPUtil.multiply_add
1613-
libc.src.__support.FPUtil.polyeval
1614-
libc.src.__support.FPUtil.rounding_mode
1615-
libc.src.__support.macros.optimization
1616-
libc.src.__support.macros.properties.cpu_features
1617-
libc.src.__support.math.exp10f16_utils
1606+
libc.src.__support.math.exp10m1f16
16181607
)
16191608

16201609
add_entrypoint_object(

0 commit comments

Comments
 (0)