Skip to content

Commit 062348a

Browse files
committed
implement libm::exp and its variants for i586 with inline assembly to fix precision issues
1 parent ee959d7 commit 062348a

File tree

9 files changed

+104
-2
lines changed

9 files changed

+104
-2
lines changed

libm-test/src/precision.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,16 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
8585
Bn::Tgamma => 20,
8686
};
8787

88+
// These have a separate implementation on i586
89+
if cfg!(x86_no_sse) {
90+
match ctx.base_name {
91+
Bn::Exp => ulp = 1,
92+
Bn::Exp2 => ulp = 1,
93+
Bn::Exp10 => ulp = 1,
94+
_ => (),
95+
}
96+
}
97+
8898
// There are some cases where musl's approximation is less accurate than ours. For these
8999
// cases, increase the ULP.
90100
if ctx.basis == Musl {
@@ -126,8 +136,6 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
126136
Id::Asinh => ulp = 3,
127137
Id::Asinhf => ulp = 3,
128138
Id::Cbrt => ulp = 1,
129-
Id::Exp10 | Id::Exp10f => ulp = 1_000_000,
130-
Id::Exp2 | Id::Exp2f => ulp = 10_000_000,
131139
Id::Log1p | Id::Log1pf => ulp = 2,
132140
Id::Tan => ulp = 2,
133141
_ => (),

libm/src/math/arch/i586.rs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,3 +60,56 @@ pub fn floor(mut x: f64) -> f64 {
6060
}
6161
x
6262
}
63+
64+
macro_rules! x87exp {
65+
($float_ty:ident, $word_size:literal, $fn_name:ident, $load_op:literal) => {
66+
pub fn $fn_name(mut x: $float_ty) -> $float_ty { unsafe {
67+
core::arch::asm!(
68+
// Prepare the register stack as
69+
// ```
70+
// st(0) = y = x*log2(base)
71+
// st(1) = 1.0
72+
// st(2) = round(y)
73+
// ```
74+
concat!($load_op, " ", $word_size, " ptr [{x}]"),
75+
"fld1",
76+
"fld st(1)",
77+
"frndint",
78+
"fxch st(2)",
79+
80+
// Compare y with round(y) to determine if y is finite and
81+
// not an integer. If so, compute `exp2(y - round(y))` into
82+
// st(1). Otherwise skip ahead with `st(1) = 1.0`
83+
"fucom st(2)",
84+
"fstsw ax",
85+
"test ax, 0x4000",
86+
"jnz 2f",
87+
"fsub st(0), st(2)", // st(0) = y - round(y)
88+
"f2xm1", // st(0) = 2^st(0) - 1.0
89+
"fadd st(1), st(0)", // st(1) = 1 + st(0) = exp2(y - round(y))
90+
"2:",
91+
92+
// Finally, scale by `exp2(round(y))` and clear the stack.
93+
"fstp st(0)",
94+
"fscale",
95+
concat!("fstp ", $word_size, " ptr [{x}]"),
96+
"fstp st(0)",
97+
x = in(reg) &mut x,
98+
out("ax") _,
99+
out("st(0)") _, out("st(1)") _,
100+
out("st(2)") _, out("st(3)") _,
101+
out("st(4)") _, out("st(5)") _,
102+
out("st(6)") _, out("st(7)") _,
103+
options(nostack),
104+
);
105+
x
106+
}}
107+
};
108+
}
109+
110+
x87exp!(f32, "dword", x87_exp2f, "fld");
111+
x87exp!(f64, "qword", x87_exp2, "fld");
112+
x87exp!(f32, "dword", x87_exp10f, "fldl2t\nfmul");
113+
x87exp!(f64, "qword", x87_exp10, "fldl2t\nfmul");
114+
x87exp!(f32, "dword", x87_expf, "fldl2e\nfmul");
115+
x87exp!(f64, "qword", x87_exp, "fldl2e\nfmul");

libm/src/math/arch/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,8 @@ cfg_if! {
4848
pub use i586::{ceil, floor};
4949
}
5050
}
51+
cfg_if! {
52+
if #[cfg(x86_no_sse)] {
53+
pub use i586::{x87_exp10f, x87_exp10, x87_expf, x87_exp, x87_exp2f, x87_exp2};
54+
}
55+
}

libm/src/math/exp.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,12 @@ const P5: f64 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */
8383
/// (where *e* is the base of the natural system of logarithms, approximately 2.71828).
8484
#[cfg_attr(assert_no_panic, no_panic::no_panic)]
8585
pub fn exp(mut x: f64) -> f64 {
86+
select_implementation! {
87+
name: x87_exp,
88+
use_arch_required: x86_no_sse,
89+
args: x,
90+
}
91+
8692
let x1p1023 = f64::from_bits(0x7fe0000000000000); // 0x1p1023 === 2 ^ 1023
8793
let x1p_149 = f64::from_bits(0x36a0000000000000); // 0x1p-149 === 2 ^ -149
8894

libm/src/math/exp10.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ const P10: &[f64] = &[
99
/// Calculates 10 raised to the power of `x` (f64).
1010
#[cfg_attr(assert_no_panic, no_panic::no_panic)]
1111
pub fn exp10(x: f64) -> f64 {
12+
select_implementation! {
13+
name: x87_exp10,
14+
use_arch_required: x86_no_sse,
15+
args: x,
16+
}
17+
1218
let (mut y, n) = modf(x);
1319
let u: u64 = n.to_bits();
1420
/* fabs(n) < 16 without raising invalid on nan */

libm/src/math/exp10f.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ const P10: &[f32] = &[
99
/// Calculates 10 raised to the power of `x` (f32).
1010
#[cfg_attr(assert_no_panic, no_panic::no_panic)]
1111
pub fn exp10f(x: f32) -> f32 {
12+
select_implementation! {
13+
name: x87_exp10f,
14+
use_arch_required: x86_no_sse,
15+
args: x,
16+
}
17+
1218
let (mut y, n) = modff(x);
1319
let u = n.to_bits();
1420
/* fabsf(n) < 8 without raising invalid on nan */

libm/src/math/exp2.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,12 @@ static TBL: [u64; TBLSIZE * 2] = [
324324
/// Calculate `2^x`, that is, 2 raised to the power `x`.
325325
#[cfg_attr(assert_no_panic, no_panic::no_panic)]
326326
pub fn exp2(mut x: f64) -> f64 {
327+
select_implementation! {
328+
name: x87_exp2,
329+
use_arch_required: x86_no_sse,
330+
args: x,
331+
}
332+
327333
let redux = f64::from_bits(0x4338000000000000) / TBLSIZE as f64;
328334
let p1 = f64::from_bits(0x3fe62e42fefa39ef);
329335
let p2 = f64::from_bits(0x3fcebfbdff82c575);

libm/src/math/exp2f.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@ static EXP2FT: [u64; TBLSIZE] = [
7575
/// Calculate `2^x`, that is, 2 raised to the power `x`.
7676
#[cfg_attr(assert_no_panic, no_panic::no_panic)]
7777
pub fn exp2f(mut x: f32) -> f32 {
78+
select_implementation! {
79+
name: x87_exp2f,
80+
use_arch_required: x86_no_sse,
81+
args: x,
82+
}
83+
7884
let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32;
7985
let p1 = f32::from_bits(0x3f317218);
8086
let p2 = f32::from_bits(0x3e75fdf0);

libm/src/math/expf.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ const P2: f32 = -2.7667332906e-3; /* -0xb55215.0p-32 */
3232
/// (where *e* is the base of the natural system of logarithms, approximately 2.71828).
3333
#[cfg_attr(assert_no_panic, no_panic::no_panic)]
3434
pub fn expf(mut x: f32) -> f32 {
35+
select_implementation! {
36+
name: x87_expf,
37+
use_arch_required: x86_no_sse,
38+
args: x,
39+
}
40+
3541
let x1p127 = f32::from_bits(0x7f000000); // 0x1p127f === 2 ^ 127
3642
let x1p_126 = f32::from_bits(0x800000); // 0x1p-126f === 2 ^ -126 /*original 0x1p-149f ??????????? */
3743
let mut hx = x.to_bits();

0 commit comments

Comments
 (0)