add bf16 support

This commit is contained in:
Trevor Spiteri 2019-10-16 11:32:53 +02:00
parent 5e7473b7f5
commit a674c77051
12 changed files with 295 additions and 178 deletions

View File

@ -26,7 +26,7 @@ fail-on-warnings = []
[dependencies]
typenum = "1.10"
az = { version = "0.3", optional = true }
half = { version = "1.2", optional = true }
half = { version = "1.4", optional = true }
serde = { version = "1.0.60", default-features = false, optional = true }
[dev-dependencies]

View File

@ -70,6 +70,8 @@ The conversions supported cover the following cases.
### Version 0.4.6 news (unreleased)
* Conversions to/from [`bf16`] are now provided when the `f16`
option is enabled.
* The following methods are now `const` functions:
[`saturating_neg`], [`saturating_add`], [`saturating_sub`],
[`saturating_mul_int`], [`saturating_abs`]
@ -78,7 +80,7 @@ The conversions supported cover the following cases.
[`saturating_abs`]: https://docs.rs/fixed/0.4.5/fixed/struct.FixedI32.html#method.saturating_abs
[`saturating_add`]: https://docs.rs/fixed/0.4.5/fixed/struct.FixedI32.html#method.saturating_add
[`saturating_mul_int`]: https://docs.rs/fixed/0.4.5/fixed/struct.FixedI32.html#method.saturating_mul_int
[`saturating_mul_sub`]: https://docs.rs/fixed/0.4.5/fixed/struct.FixedI32.html#method.saturating_mul_sub
[`saturating_sub`]: https://docs.rs/fixed/0.4.5/fixed/struct.FixedI32.html#method.saturating_sub
[`saturating_neg`]: https://docs.rs/fixed/0.4.5/fixed/struct.FixedI32.html#method.saturating_neg
### Version 0.4.5 news (2019-08-30)
@ -245,7 +247,7 @@ The *fixed* crate has three optional feature:
1. `az`, disabled by default. This implements the cast traits
provided by the [*az* crate].
2. `f16`, disabled by default. This provides conversion to/from
[`f16`]. This features requires the [*half* crate].
[`f16`] and [`bf16`]. This features requires the [*half* crate].
3. `serde`, disabled by default. This provides serialization support
for the fixed-point types. This feature requires the
[*serde* crate].
@ -311,6 +313,7 @@ additional terms or conditions.
[`U12`]: https://docs.rs/fixed/0.4.5/fixed/types/extra/type.U12.html
[`U20F12`]: https://docs.rs/fixed/0.4.5/fixed/types/type.U20F12.html
[`UpperHex`]: https://doc.rust-lang.org/nightly/std/fmt/trait.UpperHex.html
[`bf16`]: https://docs.rs/half/^1/half/struct.bf16.html
[`checked_from_num`]: https://docs.rs/fixed/0.4.5/fixed/struct.FixedI32.html#method.checked_from_num
[`f16`]: https://docs.rs/half/^1/half/struct.f16.html
[`from_num`]: https://docs.rs/fixed/0.4.5/fixed/struct.FixedI32.html#method.from_num

View File

@ -8,6 +8,8 @@ as-is, without any warranty. -->
Version 0.4.6 (unreleased)
==========================
* Conversions to/from `bf16` are now provided when the `f16` option
is enabled.
* The following methods are now `const` functions: `saturating_neg`,
`saturating_add`, `saturating_sub`, `saturating_mul_int`,
`saturating_abs`

View File

@ -21,7 +21,7 @@ use crate::{
use az::{Cast, CheckedCast, OverflowingCast, SaturatingCast, StaticCast, WrappingCast};
use core::mem;
#[cfg(feature = "f16")]
use half::f16;
use half::{bf16, f16};
macro_rules! run_time {
($Src:ident($LeEqUSrc:ident); $Dst:ident($LeEqUDst:ident)) => {
@ -302,7 +302,7 @@ macro_rules! run_time_num {
#[cfg(feature = "f16")]
run_time_num! {
$Fixed($LeEqU);
f16,
f16, bf16,
}
)* };
}
@ -389,7 +389,7 @@ macro_rules! compile_time_float {
#[cfg(feature = "f16")]
compile_time_float! {
$Fixed($LeEqU);
f16,
f16, bf16,
}
)* };
}

View File

@ -22,7 +22,7 @@ use crate::{
};
use core::cmp::Ordering;
#[cfg(feature = "f16")]
use half::f16;
use half::{bf16, f16};
macro_rules! fixed_cmp_fixed {
($Lhs:ident($LhsLeEqU:ident), $Rhs:ident($RhsLeEqU:ident)) => {
@ -365,6 +365,8 @@ macro_rules! fixed_cmp_all {
fixed_cmp_int! { $Fix($LeEqU), usize }
#[cfg(feature = "f16")]
fixed_cmp_float! { $Fix($LeEqU), f16 }
#[cfg(feature = "f16")]
fixed_cmp_float! { $Fix($LeEqU), bf16 }
fixed_cmp_float! { $Fix($LeEqU), f32 }
fixed_cmp_float! { $Fix($LeEqU), f64 }
};

View File

@ -156,42 +156,79 @@ mod tests {
fn cmp_f16() {
use half::{self, f16};
assert_eq!(f16::from_fixed(TAU), f16::from_f32(f32::consts::PI * 2.0));
assert_eq!(f16::from_fixed(FRAC_TAU_2), half::consts::PI);
assert_eq!(f16::from_fixed(FRAC_TAU_2), f16::PI);
assert_eq!(
f16::from_fixed(FRAC_TAU_3),
f16::from_f32(f32::consts::FRAC_PI_3 * 2.0)
);
assert_eq!(f16::from_fixed(FRAC_TAU_4), half::consts::FRAC_PI_2);
assert_eq!(f16::from_fixed(FRAC_TAU_6), half::consts::FRAC_PI_3);
assert_eq!(f16::from_fixed(FRAC_TAU_8), half::consts::FRAC_PI_4);
assert_eq!(f16::from_fixed(FRAC_TAU_12), half::consts::FRAC_PI_6);
assert_eq!(f16::from_fixed(FRAC_TAU_4), f16::FRAC_PI_2);
assert_eq!(f16::from_fixed(FRAC_TAU_6), f16::FRAC_PI_3);
assert_eq!(f16::from_fixed(FRAC_TAU_8), f16::FRAC_PI_4);
assert_eq!(f16::from_fixed(FRAC_TAU_12), f16::FRAC_PI_6);
assert_eq!(
f16::from_fixed(FRAC_1_TAU),
f16::from_f32(f32::consts::FRAC_1_PI * 0.5)
);
assert_eq!(f16::from_fixed(FRAC_2_TAU), half::consts::FRAC_1_PI);
assert_eq!(f16::from_fixed(FRAC_4_TAU), half::consts::FRAC_2_PI);
assert_eq!(f16::from_fixed(PI), half::consts::PI);
assert_eq!(f16::from_fixed(FRAC_PI_2), half::consts::FRAC_PI_2);
assert_eq!(f16::from_fixed(FRAC_PI_3), half::consts::FRAC_PI_3);
assert_eq!(f16::from_fixed(FRAC_PI_4), half::consts::FRAC_PI_4);
assert_eq!(f16::from_fixed(FRAC_PI_6), half::consts::FRAC_PI_6);
assert_eq!(f16::from_fixed(FRAC_PI_8), half::consts::FRAC_PI_8);
assert_eq!(f16::from_fixed(FRAC_1_PI), half::consts::FRAC_1_PI);
assert_eq!(f16::from_fixed(FRAC_2_PI), half::consts::FRAC_2_PI);
assert_eq!(f16::from_fixed(FRAC_2_TAU), f16::FRAC_1_PI);
assert_eq!(f16::from_fixed(FRAC_4_TAU), f16::FRAC_2_PI);
assert_eq!(f16::from_fixed(PI), f16::PI);
assert_eq!(f16::from_fixed(FRAC_PI_2), f16::FRAC_PI_2);
assert_eq!(f16::from_fixed(FRAC_PI_3), f16::FRAC_PI_3);
assert_eq!(f16::from_fixed(FRAC_PI_4), f16::FRAC_PI_4);
assert_eq!(f16::from_fixed(FRAC_PI_6), f16::FRAC_PI_6);
assert_eq!(f16::from_fixed(FRAC_PI_8), f16::FRAC_PI_8);
assert_eq!(f16::from_fixed(FRAC_1_PI), f16::FRAC_1_PI);
assert_eq!(f16::from_fixed(FRAC_2_PI), f16::FRAC_2_PI);
assert_eq!(f16::from_fixed(FRAC_2_SQRT_PI), f16::FRAC_2_SQRT_PI);
assert_eq!(f16::from_fixed(SQRT_2), f16::SQRT_2);
assert_eq!(f16::from_fixed(FRAC_1_SQRT_2), f16::FRAC_1_SQRT_2);
assert_eq!(f16::from_fixed(E), f16::E);
// assert_eq!(f16::from_fixed(LOG2_10), f16::LOG2_10);
assert_eq!(f16::from_fixed(LOG2_E), f16::LOG2_E);
// assert_eq!(f16::from_fixed(LOG10_2), f16::LOG10_2);
assert_eq!(f16::from_fixed(LOG10_E), f16::LOG10_E);
assert_eq!(f16::from_fixed(LN_2), f16::LN_2);
assert_eq!(f16::from_fixed(LN_10), f16::LN_10);
}
#[cfg(feature = "f16")]
#[test]
fn cmp_bf16() {
use half::{self, bf16};
assert_eq!(bf16::from_fixed(TAU), bf16::from_f32(f32::consts::PI * 2.0));
assert_eq!(bf16::from_fixed(FRAC_TAU_2), bf16::PI);
assert_eq!(
f16::from_fixed(FRAC_2_SQRT_PI),
half::consts::FRAC_2_SQRT_PI
bf16::from_fixed(FRAC_TAU_3),
bf16::from_f32(f32::consts::FRAC_PI_3 * 2.0)
);
assert_eq!(f16::from_fixed(SQRT_2), half::consts::SQRT_2);
assert_eq!(f16::from_fixed(FRAC_1_SQRT_2), half::consts::FRAC_1_SQRT_2);
assert_eq!(f16::from_fixed(E), half::consts::E);
// assert_eq!(f16::from_fixed(LOG2_10), half::consts::LOG2_10);
assert_eq!(f16::from_fixed(LOG2_E), half::consts::LOG2_E);
// assert_eq!(f16::from_fixed(LOG10_2), half::consts::LOG10_2);
assert_eq!(f16::from_fixed(LOG10_E), half::consts::LOG10_E);
assert_eq!(f16::from_fixed(LN_2), half::consts::LN_2);
assert_eq!(f16::from_fixed(LN_10), half::consts::LN_10);
assert_eq!(bf16::from_fixed(FRAC_TAU_4), bf16::FRAC_PI_2);
assert_eq!(bf16::from_fixed(FRAC_TAU_6), bf16::FRAC_PI_3);
assert_eq!(bf16::from_fixed(FRAC_TAU_8), bf16::FRAC_PI_4);
assert_eq!(bf16::from_fixed(FRAC_TAU_12), bf16::FRAC_PI_6);
assert_eq!(
bf16::from_fixed(FRAC_1_TAU),
bf16::from_f32(f32::consts::FRAC_1_PI * 0.5)
);
assert_eq!(bf16::from_fixed(FRAC_2_TAU), bf16::FRAC_1_PI);
assert_eq!(bf16::from_fixed(FRAC_4_TAU), bf16::FRAC_2_PI);
assert_eq!(bf16::from_fixed(PI), bf16::PI);
assert_eq!(bf16::from_fixed(FRAC_PI_2), bf16::FRAC_PI_2);
assert_eq!(bf16::from_fixed(FRAC_PI_3), bf16::FRAC_PI_3);
assert_eq!(bf16::from_fixed(FRAC_PI_4), bf16::FRAC_PI_4);
assert_eq!(bf16::from_fixed(FRAC_PI_6), bf16::FRAC_PI_6);
assert_eq!(bf16::from_fixed(FRAC_PI_8), bf16::FRAC_PI_8);
assert_eq!(bf16::from_fixed(FRAC_1_PI), bf16::FRAC_1_PI);
assert_eq!(bf16::from_fixed(FRAC_2_PI), bf16::FRAC_2_PI);
assert_eq!(bf16::from_fixed(FRAC_2_SQRT_PI), bf16::FRAC_2_SQRT_PI);
assert_eq!(bf16::from_fixed(SQRT_2), bf16::SQRT_2);
assert_eq!(bf16::from_fixed(FRAC_1_SQRT_2), bf16::FRAC_1_SQRT_2);
assert_eq!(bf16::from_fixed(E), bf16::E);
// assert_eq!(bf16::from_fixed(LOG2_10), bf16::LOG2_10);
assert_eq!(bf16::from_fixed(LOG2_E), bf16::LOG2_E);
// assert_eq!(bf16::from_fixed(LOG10_2), bf16::LOG10_2);
assert_eq!(bf16::from_fixed(LOG10_E), bf16::LOG10_E);
assert_eq!(bf16::from_fixed(LN_2), bf16::LN_2);
assert_eq!(bf16::from_fixed(LN_10), bf16::LN_10);
}
#[test]

View File

@ -25,7 +25,7 @@ use crate::{
};
use core::ops::Sub;
#[cfg(feature = "f16")]
use {crate::helpers::FloatHelper, half::f16};
use half::{bf16, f16};
macro_rules! convert {
(
@ -515,6 +515,8 @@ macro_rules! fixed_to_float_lossy {
($Fixed:ident($LeEqU:ident)) => {
#[cfg(feature = "f16")]
fixed_to_float_lossy! { $Fixed($LeEqU) -> f16 }
#[cfg(feature = "f16")]
fixed_to_float_lossy! { $Fixed($LeEqU) -> bf16 }
fixed_to_float_lossy! { $Fixed($LeEqU) -> f32 }
fixed_to_float_lossy! { $Fixed($LeEqU) -> f64 }
};
@ -543,6 +545,8 @@ macro_rules! int_to_float_lossy {
($Int:ident) => {
#[cfg(feature = "f16")]
int_to_float_lossy! { $Int -> f16 }
#[cfg(feature = "f16")]
int_to_float_lossy! { $Int -> bf16 }
int_to_float_lossy! { $Int -> f32 }
int_to_float_lossy! { $Int -> f64 }
};
@ -587,61 +591,43 @@ lossy! { usize }
#[cfg(feature = "f16")]
lossy! { f16 }
#[cfg(feature = "f16")]
impl LossyFrom<f16> for bf16 {
#[inline]
fn lossy_from(src: f16) -> bf16 {
bf16::from_f32(src.into())
}
}
#[cfg(feature = "f16")]
lossy! { f16: Into f32 }
#[cfg(feature = "f16")]
lossy! { f16: Into f64 }
#[cfg(feature = "f16")]
impl LossyFrom<bf16> for f16 {
#[inline]
fn lossy_from(src: bf16) -> f16 {
f16::from_f32(src.into())
}
}
#[cfg(feature = "f16")]
lossy! { bf16 }
#[cfg(feature = "f16")]
lossy! { bf16: Into f32 }
#[cfg(feature = "f16")]
lossy! { bf16: Into f64 }
#[cfg(feature = "f16")]
impl LossyFrom<f32> for f16 {
#[inline]
fn lossy_from(src: f32) -> Self {
// do not use f16::from_f32 because of https://github.com/starkat99/half-rs/issues/24
let (neg, exp, mantissa) = src.parts();
// src is ∞ or NaN
if exp == f32::EXP_MAX + 1 {
let mantissa = if mantissa != 0 {
// set highest mantissa bit for NaN to NaN conversion
(mantissa >> (f32::PREC - f16::PREC) | (1 << (f16::PREC - 2))) as u16
} else {
0
};
return f16::from_parts(neg, f16::EXP_MAX + 1, mantissa);
}
// src overflows
if exp > f16::EXP_MAX {
return f16::from_parts(neg, f16::EXP_MAX + 1, 0);
}
// src underflows
if exp < f16::EXP_MIN {
let remove_bits = -exp + f16::EXP_MIN + f32::PREC as i32 - f16::PREC as i32;
// src underflows to zero even if rounding up
if remove_bits > f32::PREC as i32 {
return f16::from_parts(neg, f16::EXP_MIN - 1, 0);
}
// add implicit one
let mantissa = mantissa | (1 << (f32::PREC - 1));
let round_mask = 1 << (remove_bits - 1);
// round up if round bit is true and either
// * odd, that is mantissa & (2 * round_mask) != 0
// * more bits below round, that is mantissa & (round_mask - 1) != 0
// The two later masks can be combined to (3 * round_mask - 1)
let round = mantissa & round_mask != 0 && mantissa & (3 * round_mask - 1) != 0;
// add rounding bit after conversion in case we go back to normal
let mantissa = (mantissa >> remove_bits) as u16;
let bits =
f16::bits_from_parts(neg, f16::EXP_MIN - 1, mantissa) + if round { 1 } else { 0 };
return f16::from_bits(bits);
}
let remove_bits = f32::PREC - f16::PREC;
let round_mask = 1 << (remove_bits - 1);
// round up if round bit is true and either
// * odd, that is mantissa & (2 * round_mask) != 0
// * more bits below round, that is mantissa & (round_mask - 1) != 0
// The two later masks can be combined to (3 * round_mask - 1)
let round = mantissa & round_mask != 0 && mantissa & (3 * round_mask - 1) != 0;
// add rounding bit after conversion in case we overflow
let mantissa = (mantissa >> remove_bits) as u16;
let bits = f16::bits_from_parts(neg, exp, mantissa) + if round { 1 } else { 0 };
f16::from_bits(bits)
f16::from_f32(src)
}
}
#[cfg(feature = "f16")]
impl LossyFrom<f32> for bf16 {
#[inline]
fn lossy_from(src: f32) -> Self {
bf16::from_f32(src)
}
}
lossy! { f32 }
@ -649,55 +635,16 @@ lossy! { f32: Into f64 }
#[cfg(feature = "f16")]
impl LossyFrom<f64> for f16 {
#[inline]
fn lossy_from(src: f64) -> Self {
// do not use f16::from_f64 because of https://github.com/starkat99/half-rs/issues/24
let (neg, exp, mantissa) = src.parts();
// src is ∞ or NaN
if exp == f64::EXP_MAX + 1 {
let mantissa = if mantissa != 0 {
// set highest mantissa bit for NaN to NaN conversion
(mantissa >> (f64::PREC - f16::PREC) | (1 << (f16::PREC - 2))) as u16
} else {
0
};
return f16::from_parts(neg, f16::EXP_MAX + 1, mantissa);
}
// src overflows
if exp > f16::EXP_MAX {
return f16::from_parts(neg, f16::EXP_MAX + 1, 0);
}
// src underflows
if exp < f16::EXP_MIN {
let remove_bits = -exp + f16::EXP_MIN + f64::PREC as i32 - f16::PREC as i32;
// src underflows to zero even if rounding up
if remove_bits > f64::PREC as i32 {
return f16::from_parts(neg, f16::EXP_MIN - 1, 0);
}
// add implicit one
let mantissa = mantissa | (1 << (f64::PREC - 1));
let round_mask = 1 << (remove_bits - 1);
// round up if round bit is true and either
// * odd, that is mantissa & (2 * round_mask) != 0
// * more bits below round, that is mantissa & (round_mask - 1) != 0
// The two later masks can be combined to (3 * round_mask - 1)
let round = mantissa & round_mask != 0 && mantissa & (3 * round_mask - 1) != 0;
// add rounding bit after conversion in case we go back to normal
let mantissa = (mantissa >> remove_bits) as u16;
let bits =
f16::bits_from_parts(neg, f16::EXP_MIN - 1, mantissa) + if round { 1 } else { 0 };
return f16::from_bits(bits);
}
let remove_bits = f64::PREC - f16::PREC;
let round_mask = 1 << (remove_bits - 1);
// round up if round bit is true and either
// * odd, that is mantissa & (2 * round_mask) != 0
// * more bits below round, that is mantissa & (round_mask - 1) != 0
// The two later masks can be combined to (3 * round_mask - 1)
let round = mantissa & round_mask != 0 && mantissa & (3 * round_mask - 1) != 0;
// add rounding bit after conversion in case we overflow
let mantissa = (mantissa >> remove_bits) as u16;
let bits = f16::bits_from_parts(neg, exp, mantissa) + if round { 1 } else { 0 };
f16::from_bits(bits)
f16::from_f64(src)
}
}
#[cfg(feature = "f16")]
impl LossyFrom<f64> for bf16 {
#[inline]
fn lossy_from(src: f64) -> Self {
bf16::from_f64(src)
}
}
lossy! { f64 as f32 }
@ -1095,6 +1042,53 @@ mod tests {
}
}
#[cfg(feature = "f16")]
#[test]
fn to_bf16() {
use half::bf16;
for u in 0x00..=0xff {
let fu = U1F7::from_bits(u);
assert_eq!(fu.to_num::<bf16>(), bf16::from_f32(f32::from(u) / 128.0));
let i = u as i8;
let fi = I1F7::from_bits(i);
assert_eq!(fi.to_num::<bf16>(), bf16::from_f32(f32::from(i) / 128.0));
for hi in &[
0u32,
0x0000_0100,
0x7fff_ff00,
0x8000_0000,
0x8100_0000,
0xffff_fe00,
0xffff_ff00,
] {
let uu = *hi | u32::from(u);
let fuu = U25F7::from_bits(uu);
assert_eq!(fuu.to_num::<bf16>(), bf16::from_f32(uu as f32 / 128.0));
let ii = uu as i32;
let fii = I25F7::from_bits(ii);
assert_eq!(fii.to_num::<bf16>(), bf16::from_f32(ii as f32 / 128.0));
}
for hi in &[
0u128,
0x0000_0000_0000_0000_0000_0000_0000_0100,
0x7fff_ffff_ffff_ffff_ffff_ffff_ffff_ff00,
0x8000_0000_0000_0000_0000_0000_0000_0000,
0x8100_0000_0000_0000_0000_0000_0000_0000,
0xffff_ffff_ffff_ffff_ffff_ffff_ffff_fe00,
0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ff00,
] {
let uu = *hi | u128::from(u);
let fuu = U121F7::from_bits(uu);
assert_eq!(fuu.to_num::<bf16>(), bf16::from_f64(uu as f64 / 128.0));
let ii = uu as i128;
let fii = I121F7::from_bits(ii);
assert_eq!(fii.to_num::<bf16>(), bf16::from_f64(ii as f64 / 128.0));
}
}
}
#[test]
fn to_f32() {
for u in 0x00..=0xff {
@ -1218,14 +1212,14 @@ mod tests {
fn lossy_f16() {
use crate::traits::LossyFrom;
use core::{f32, f64};
use half::{consts as f16_consts, f16};
use half::f16;
assert_eq!(f16::lossy_from(f32::NEG_INFINITY), f16_consts::NEG_INFINITY);
assert_eq!(f16::lossy_from(f32::NEG_INFINITY), f16::NEG_INFINITY);
assert!(f16::lossy_from(f32::NAN).is_nan());
assert_eq!(f16::lossy_from(1e-37f32), f16::from_bits(0));
// -1.625 << 15 is 1 11110 1010000000 is FA80
assert_eq!(f16::lossy_from(-32768f32 * 1.625), f16::from_bits(0xFA80));
assert_eq!(f16::lossy_from(32768f32 * 2.), f16_consts::INFINITY);
assert_eq!(f16::lossy_from(32768f32 * 2.), f16::INFINITY);
// 0x8020 is 0x1.004 << 15 is 0 11110 0000000001
assert_eq!(
f16::lossy_from(f32::from(0x8020u16)),
@ -1249,12 +1243,12 @@ mod tests {
);
assert_eq!(f16::lossy_from((-24f32).exp2() * 0.5), f16::from_bits(0));
assert_eq!(f16::lossy_from(f64::NEG_INFINITY), f16_consts::NEG_INFINITY);
assert_eq!(f16::lossy_from(f64::NEG_INFINITY), f16::NEG_INFINITY);
assert!(f16::lossy_from(f64::NAN).is_nan());
assert_eq!(f16::lossy_from(1e-37f64), f16::from_bits(0));
// -1.625 << 15 is 1 11110 1010000000 is FA80
assert_eq!(f16::lossy_from(-32768f64 * 1.625), f16::from_bits(0xFA80));
assert_eq!(f16::lossy_from(32768f64 * 2.), f16_consts::INFINITY);
assert_eq!(f16::lossy_from(32768f64 * 2.), f16::INFINITY);
// 0x8020 is 0x1.004 << 15 is 0 11110 0000000001
assert_eq!(
f16::lossy_from(f64::from(0x8020u16)),
@ -1278,4 +1272,64 @@ mod tests {
);
assert_eq!(f16::lossy_from((-24f32).exp2() * 0.5), f16::from_bits(0));
}
#[cfg(feature = "f16")]
#[test]
fn lossy_bf16() {
use crate::traits::LossyFrom;
use core::{f32, f64};
use half::bf16;
assert_eq!(bf16::lossy_from(f32::NEG_INFINITY), bf16::NEG_INFINITY);
assert!(bf16::lossy_from(f32::NAN).is_nan());
assert_eq!(bf16::lossy_from(f32::MIN_POSITIVE), bf16::MIN_POSITIVE);
// -1.625 << 127 is 1 11111110 1010000 is FF50
assert_eq!(
bf16::lossy_from(127f32.exp2() * -1.625),
bf16::from_bits(0xFF50)
);
// max is rounded up
assert_eq!(bf16::lossy_from(f32::MAX), bf16::INFINITY);
assert_eq!(
bf16::lossy_from(f32::from_bits(0x4175_7FFF)),
bf16::from_bits(0x4175)
);
assert_eq!(
bf16::lossy_from(f32::from_bits(0x4175_8000)),
bf16::from_bits(0x4176)
);
assert_eq!(
bf16::lossy_from(f32::from_bits(0x4175_8001)),
bf16::from_bits(0x4176)
);
assert_eq!(
bf16::lossy_from(f32::from_bits(0x4176_7FFF)),
bf16::from_bits(0x4176)
);
assert_eq!(
bf16::lossy_from(f32::from_bits(0x4176_8000)),
bf16::from_bits(0x4176)
);
assert_eq!(
bf16::lossy_from(f32::from_bits(0x4176_8001)),
bf16::from_bits(0x4177)
);
assert_eq!(bf16::lossy_from(f64::NEG_INFINITY), bf16::NEG_INFINITY);
assert!(bf16::lossy_from(f64::NAN).is_nan());
assert_eq!(bf16::lossy_from(1e-100f64), bf16::from_bits(0));
// -1.625 << 127 is 1 11111110 1010000 is FF50
assert_eq!(
bf16::lossy_from(127f64.exp2() * -1.625),
bf16::from_bits(0xFF50)
);
assert_eq!(bf16::lossy_from(128f64.exp2()), bf16::INFINITY);
// 1.0 >> 133 is minimum non-zero subnormal 0 0000000 0000001
assert_eq!(bf16::lossy_from((-133f64).exp2()), bf16::from_bits(0x0001));
assert_eq!(
bf16::lossy_from((-133f64).exp2() * 0.5001),
bf16::from_bits(0x0001)
);
assert_eq!(bf16::lossy_from((-133f32).exp2() * 0.5), bf16::from_bits(0));
}
}

View File

@ -16,7 +16,7 @@
use crate::helpers::{FloatKind, IntHelper, ToFixedHelper, ToFloatHelper, Widest};
use core::cmp::Ordering;
#[cfg(feature = "f16")]
use half::f16;
use half::{bf16, f16};
pub trait FloatHelper: Copy {
type Bits: IntHelper;
@ -207,5 +207,7 @@ macro_rules! sealed_float {
#[cfg(feature = "f16")]
sealed_float! { f16(u16, i16, 11) }
#[cfg(feature = "f16")]
sealed_float! { bf16(u16, i16, 8) }
sealed_float! { f32(u32, i32, 24) }
sealed_float! { f64(u64, i64, 53) }

View File

@ -149,7 +149,7 @@ The *fixed* crate has three optional feature:
1. `az`, disabled by default. This implements the cast traits
provided by the [*az* crate].
2. `f16`, disabled by default. This provides conversion to/from
[`f16`]. This features requires the [*half* crate].
[`f16`] and [`bf16`]. This features requires the [*half* crate].
3. `serde`, disabled by default. This provides serialization support
for the fixed-point types. This feature requires the
[*serde* crate].
@ -215,6 +215,7 @@ additional terms or conditions.
[`U12`]: types/extra/type.U12.html
[`U20F12`]: types/type.U20F12.html
[`UpperHex`]: https://doc.rust-lang.org/nightly/std/fmt/trait.UpperHex.html
[`bf16`]: https://docs.rs/half/^1/half/struct.bf16.html
[`checked_from_num`]: struct.FixedI32.html#method.checked_from_num
[`f16`]: https://docs.rs/half/^1/half/struct.f16.html
[`from_num`]: struct.FixedI32.html#method.from_num

View File

@ -25,9 +25,9 @@ The other number can be:
[`isize`], [`u8`], [`u16`], [`u32`], [`u64`], [`u128`], or
[`usize`].
* A floating-point number of type [`f32`] or [`f64`]. If the [`f16`
feature] is enabled, it can also be of type [`f16`]. For this
conversion, the method rounds to the nearest, with ties rounding
to even.
feature] is enabled, it can also be of type [`f16`] or [`bf16`].
For this conversion, the method rounds to the nearest, with ties
rounding to even.
* Any other number `src` for which [`ToFixed`] is implemented, in
which case this method returns [`src.to_fixed()`][`to_fixed`].
@ -70,6 +70,7 @@ assert_eq!(Fix::from_num(",
```
[`ToFixed`]: traits/trait.ToFixed.html
[`bf16`]: https://docs.rs/half/^1.2/half/struct.bf16.html
[`f16` feature]: index.html#optional-features
[`f16`]: https://docs.rs/half/^1.2/half/struct.f16.html
[`f32`]: https://doc.rust-lang.org/nightly/std/primitive.f32.html
@ -106,9 +107,9 @@ The other number can be:
[`isize`], [`u8`], [`u16`], [`u32`], [`u64`], [`u128`], or
[`usize`]. Any fractional bits are truncated.
* A floating-point number of type [`f32`] or [`f64`]. If the [`f16`
feature] is enabled, it can also be of type [`f16`]. For this
conversion, the method rounds to the nearest, with ties rounding
to even.
feature] is enabled, it can also be of type [`f16`] or [`bf16`].
For this conversion, the method rounds to the nearest, with ties
rounding to even.
* Any other type `Dst` for which [`FromFixed`] is implemented, in
which case this method returns
[`Dst::from_fixed(self)`][`from_fixed`].
@ -156,6 +157,7 @@ assert_eq!(",
```
[`FromFixed`]: traits/trait.FromFixed.html
[`bf16`]: https://docs.rs/half/^1.2/half/struct.bf16.html
[`f16` feature]: index.html#optional-features
[`f16`]: https://docs.rs/half/^1.2/half/struct.f16.html
[`f32`]: https://doc.rust-lang.org/nightly/std/primitive.f32.html
@ -192,9 +194,9 @@ The other number can be:
[`isize`], [`u8`], [`u16`], [`u32`], [`u64`], [`u128`], or
[`usize`].
* A floating-point number of type [`f32`] or [`f64`]. If the [`f16`
feature] is enabled, it can also be of type [`f16`]. For this
conversion, the method rounds to the nearest, with ties rounding
to even.
feature] is enabled, it can also be of type [`f16`] or [`bf16`].
For this conversion, the method rounds to the nearest, with ties
rounding to even.
* Any other number `src` for which [`ToFixed`] is implemented, in
which case this method returns [`src.checked_to_fixed()`][`checked_to_fixed`].
@ -242,6 +244,7 @@ assert!(Fix::checked_from_num(std::f64::NAN).is_none());
[`None`]: https://doc.rust-lang.org/nightly/std/option/enum.Option.html#variant.None
[`ToFixed`]: traits/trait.ToFixed.html
[`bf16`]: https://docs.rs/half/^1.2/half/struct.bf16.html
[`f16` feature]: index.html#optional-features
[`f16`]: https://docs.rs/half/^1.2/half/struct.f16.html
[`f32`]: https://doc.rust-lang.org/nightly/std/primitive.f32.html
@ -277,9 +280,9 @@ The other number can be:
[`isize`], [`u8`], [`u16`], [`u32`], [`u64`], [`u128`], or
[`usize`]. Any fractional bits are truncated.
* A floating-point number of type [`f32`] or [`f64`]. If the [`f16`
feature] is enabled, it can also be of type [`f16`]. For this
conversion, the method rounds to the nearest, with ties rounding
to even.
feature] is enabled, it can also be of type [`f16`] or [`bf16`].
For this conversion, the method rounds to the nearest, with ties
rounding to even.
* Any other type `Dst` for which [`FromFixed`] is implemented, in
which case this method returns
[`Dst::checked_from_fixed(self)`][`checked_from_fixed`].
@ -327,6 +330,7 @@ assert_eq!(one_point_625.checked_to_num::<f32>(), Some(1.625f32));
[`FromFixed`]: traits/trait.FromFixed.html
[`None`]: https://doc.rust-lang.org/nightly/std/option/enum.Option.html#variant.None
[`bf16`]: https://docs.rs/half/^1.2/half/struct.bf16.html
[`checked_from_fixed`]: traits/trait.FromFixed.html#tymethod.checked_from_fixed
[`f16` feature]: index.html#optional-features
[`f16`]: https://docs.rs/half/^1.2/half/struct.f16.html
@ -362,9 +366,9 @@ The other number can be:
[`isize`], [`u8`], [`u16`], [`u32`], [`u64`], [`u128`], or
[`usize`].
* A floating-point number of type [`f32`] or [`f64`]. If the [`f16`
feature] is enabled, it can also be of type [`f16`]. For this
conversion, the method rounds to the nearest, with ties rounding
to even.
feature] is enabled, it can also be of type [`f16`] or [`bf16`].
For this conversion, the method rounds to the nearest, with ties
rounding to even.
* Any other number `src` for which [`ToFixed`] is implemented, in
which case this method returns
[`src.saturating_to_fixed()`][`saturating_to_fixed`].
@ -415,6 +419,7 @@ assert_eq!(Fix::saturating_from_num(std::f64::NEG_INFINITY), Fix::min_value());
[NaN]: https://doc.rust-lang.org/nightly/std/primitive.f64.html#method.is_nan
[`ToFixed`]: traits/trait.ToFixed.html
[`bf16`]: https://docs.rs/half/^1.2/half/struct.bf16.html
[`f16` feature]: index.html#optional-features
[`f16`]: https://docs.rs/half/^1.2/half/struct.f16.html
[`f32`]: https://doc.rust-lang.org/nightly/std/primitive.f32.html
@ -450,9 +455,9 @@ The other number can be:
[`isize`], [`u8`], [`u16`], [`u32`], [`u64`], [`u128`], or
[`usize`]. Any fractional bits are truncated.
* A floating-point number of type [`f32`] or [`f64`]. If the [`f16`
feature] is enabled, it can also be of type [`f16`]. For this
conversion, the method rounds to the nearest, with ties rounding
to even.
feature] is enabled, it can also be of type [`f16`] or [`bf16`].
For this conversion, the method rounds to the nearest, with ties
rounding to even.
* Any other type `Dst` for which [`FromFixed`] is implemented, in
which case this method returns
[`Dst::saturating_from_fixed(self)`][`saturating_from_fixed`].
@ -496,6 +501,7 @@ assert_eq!(one_point_625.saturating_to_num::<f32>(), 1.625f32);
```
[`FromFixed`]: traits/trait.FromFixed.html
[`bf16`]: https://docs.rs/half/^1.2/half/struct.bf16.html
[`f16` feature]: index.html#optional-features
[`f16`]: https://docs.rs/half/^1.2/half/struct.f16.html
[`f32`]: https://doc.rust-lang.org/nightly/std/primitive.f32.html
@ -531,9 +537,9 @@ The other number can be:
[`isize`], [`u8`], [`u16`], [`u32`], [`u64`], [`u128`], or
[`usize`].
* A floating-point number of type [`f32`] or [`f64`]. If the [`f16`
feature] is enabled, it can also be of type [`f16`]. For this
conversion, the method rounds to the nearest, with ties rounding
to even.
feature] is enabled, it can also be of type [`f16`] or [`bf16`].
For this conversion, the method rounds to the nearest, with ties
rounding to even.
* Any other number `src` for which [`ToFixed`] is implemented, in
which case this method returns [`src.wrapping_to_fixed()`][`wrapping_to_fixed`].
@ -574,6 +580,7 @@ assert_eq!(Fix::wrapping_from_num(large), wrapped);
```
[`ToFixed`]: traits/trait.ToFixed.html
[`bf16`]: https://docs.rs/half/^1.2/half/struct.bf16.html
[`f16` feature]: index.html#optional-features
[`f16`]: https://docs.rs/half/^1.2/half/struct.f16.html
[`f32`]: https://doc.rust-lang.org/nightly/std/primitive.f32.html
@ -610,9 +617,9 @@ The other number can be:
[`isize`], [`u8`], [`u16`], [`u32`], [`u64`], [`u128`], or
[`usize`]. Any fractional bits are truncated.
* A floating-point number of type [`f32`] or [`f64`]. If the [`f16`
feature] is enabled, it can also be of type [`f16`]. For this
conversion, the method rounds to the nearest, with ties rounding
to even.
feature] is enabled, it can also be of type [`f16`] or [`bf16`].
For this conversion, the method rounds to the nearest, with ties
rounding to even.
* Any other type `Dst` for which [`FromFixed`] is implemented, in
which case this method returns
[`Dst::wrapping_from_fixed(self)`][`wrapping_from_fixed`].
@ -657,6 +664,7 @@ assert_eq!(one_point_625.wrapping_to_num::<f32>(), 1.625f32);
[`FromFixed`]: traits/trait.FromFixed.html
[`wrapping_from_fixed`]: traits/trait.FromFixed.html#tymethod.wrapping_from_fixed
[`bf16`]: https://docs.rs/half/^1.2/half/struct.bf16.html
[`f16` feature]: index.html#optional-features
[`f16`]: https://docs.rs/half/^1.2/half/struct.f16.html
[`f32`]: https://doc.rust-lang.org/nightly/std/primitive.f32.html
@ -694,7 +702,7 @@ The other number can be:
[`isize`], [`u8`], [`u16`], [`u32`], [`u64`], [`u128`], or
[`usize`].
* A floating-point number of type [`f32`] or [`f64`]. If the [`f16`
feature] is enabled, it can also be of type [`f16`]. For this
feature] is enabled, it can also be of type [`f16`] or [`bf16`]. For this
conversion, the method rounds to the nearest, with ties rounding
to even.
* Any other number `src` for which [`ToFixed`] is implemented, in
@ -740,6 +748,7 @@ assert_eq!(Fix::overflowing_from_num(large), (wrapped, true));
[`ToFixed`]: traits/trait.ToFixed.html
[`bool`]: https://doc.rust-lang.org/nightly/std/primitive.bool.html
[`bf16`]: https://docs.rs/half/^1.2/half/struct.bf16.html
[`f16` feature]: index.html#optional-features
[`f16`]: https://docs.rs/half/^1.2/half/struct.f16.html
[`f32`]: https://doc.rust-lang.org/nightly/std/primitive.f32.html
@ -779,9 +788,9 @@ The other number can be:
[`isize`], [`u8`], [`u16`], [`u32`], [`u64`], [`u128`], or
[`usize`]. Any fractional bits are truncated.
* A floating-point number of type [`f32`] or [`f64`]. If the [`f16`
feature] is enabled, it can also be of type [`f16`]. For this
conversion, the method rounds to the nearest, with ties rounding
to even.
feature] is enabled, it can also be of type [`f16`] or [`bf16`].
For this conversion, the method rounds to the nearest, with ties
rounding to even.
* Any other type `Dst` for which [`FromFixed`] is implemented, in
which case this method returns
[`Dst::overflowing_from_fixed(self)`][`overflowing_from_fixed`].
@ -828,6 +837,7 @@ assert_eq!(one_point_625.overflowing_to_num::<f32>(), (1.625f32, false));
[`FromFixed`]: traits/trait.FromFixed.html
[`bool`]: https://doc.rust-lang.org/nightly/std/primitive.bool.html
[`bf16`]: https://docs.rs/half/^1.2/half/struct.bf16.html
[`f16` feature]: index.html#optional-features
[`f16`]: https://docs.rs/half/^1.2/half/struct.f16.html
[`f32`]: https://doc.rust-lang.org/nightly/std/primitive.f32.html

View File

@ -34,7 +34,7 @@ use core::{
str::FromStr,
};
#[cfg(feature = "f16")]
use half::f16;
use half::{bf16, f16};
#[cfg(feature = "serde")]
use serde::{de::Deserialize, ser::Serialize};
@ -48,7 +48,7 @@ macro_rules! comment_features {
#[cfg(all(feature = "f16", not(feature = "serde")))]
doc_comment! {
$comment;
pub trait FixedOptionalFeatures: PartialOrd<f16> {}
pub trait FixedOptionalFeatures: PartialOrd<f16> + PartialOrd<bf16> {}
}
#[cfg(all(not(feature = "f16"), feature = "serde"))]
doc_comment! {
@ -60,7 +60,7 @@ macro_rules! comment_features {
$comment;
pub trait FixedOptionalFeatures
where
Self: PartialOrd<f16>,
Self: PartialOrd<f16> + PartialOrd<bf16>,
Self: Serialize + for<'de> Deserialize<'de>,
{
}
@ -73,8 +73,9 @@ comment_features! {
depending on the crates [optional features].
1. If the `f16` feature is enabled,
<code>[PartialOrd][`PartialOrd`]&lt;[f16][`f16`]&gt;</code> is a
supertrait of [`Fixed`].
<code>[PartialOrd][`PartialOrd`]&lt;[f16][`f16`]&gt;</code> and
<code>[PartialOrd][`PartialOrd`]&lt;[bf16][`bf16`]&gt;</code> are
supertraits of [`Fixed`].
2. If the `serde` feature is enabled, [`Serialize`] and
[`Deserialize`] are supertraits of [`Fixed`].
@ -82,6 +83,7 @@ depending on the crates [optional features].
[`Fixed`]: trait.Fixed.html
[`PartialOrd`]: https://doc.rust-lang.org/nightly/std/cmp/trait.PartialOrd.html
[`Serialize`]: https://docs.rs/serde/^1/serde/ser/trait.Serialize.html
[`bf16`]: https://docs.rs/half/^1/half/struct.bf16.html
[`f16`]: https://docs.rs/half/^1/half/struct.f16.html
[optional features]: ../index.html#optional-features
"
@ -1423,6 +1425,8 @@ macro_rules! impl_float {
#[cfg(feature = "f16")]
impl_float! { f16 }
#[cfg(feature = "f16")]
impl_float! { bf16 }
impl_float! { f32 }
impl_float! { f64 }

View File

@ -139,10 +139,10 @@ impl<F: Fixed> Wrapping<F> {
/// * An integer of type [`i8`], [`i16`], [`i32`], [`i64`], [`i128`],
/// [`isize`], [`u8`], [`u16`], [`u32`], [`u64`], [`u128`], or
/// [`usize`].
/// * A floating-point number of type [`f32`] or [`f64`]. If the [`f16`
/// feature] is enabled, it can also be of type [`f16`]. For this
/// conversion, the method rounds to the nearest, with ties rounding
/// to even.
/// * A floating-point number of type [`f32`] or [`f64`]. If the
/// [`f16` feature] is enabled, it can also be of type [`f16`]
/// or [`bf16`]. For this conversion, the method rounds to the
/// nearest, with ties rounding to even.
/// * Any other number `src` for which [`ToFixed`] is implemented, in
/// which case this method returns
/// <code>[Wrapping][`Wrapping`]([src.wrapping_to_fixed()][`wrapping_to_fixed`])</code>.
@ -177,6 +177,7 @@ impl<F: Fixed> Wrapping<F> {
///
/// [`ToFixed`]: traits/trait.ToFixed.html
/// [`Wrapping`]: struct.Wrapping.html
/// [`bf16`]: https://docs.rs/half/^1.2/half/struct.bf16.html
/// [`f16` feature]: index.html#optional-features
/// [`f16`]: https://docs.rs/half/^1.2/half/struct.f16.html
/// [`f32`]: https://doc.rust-lang.org/nightly/std/primitive.f32.html
@ -209,10 +210,10 @@ impl<F: Fixed> Wrapping<F> {
/// * An integer of type [`i8`], [`i16`], [`i32`], [`i64`], [`i128`],
/// [`isize`], [`u8`], [`u16`], [`u32`], [`u64`], [`u128`], or
/// [`usize`]. Any fractional bits are truncated.
/// * A floating-point number of type [`f32`] or [`f64`]. If the [`f16`
/// feature] is enabled, it can also be of type [`f16`]. For this
/// conversion, the method rounds to the nearest, with ties rounding
/// to even.
/// * A floating-point number of type [`f32`] or [`f64`]. If the
/// [`f16` feature] is enabled, it can also be of type [`f16`]
/// or [`bf16`]. For this conversion, the method rounds to the
/// nearest, with ties rounding to even.
/// * Any other type `Dst` for which [`FromFixed`] is implemented, in
/// which case this method returns
/// [`Dst::wrapping_from_fixed(self.0)`][`wrapping_from_fixed`].
@ -237,6 +238,7 @@ impl<F: Fixed> Wrapping<F> {
/// ```
///
/// [`FromFixed`]: traits/trait.FromFixed.html
/// [`bf16`]: https://docs.rs/half/^1.2/half/struct.bf16.html
/// [`f16` feature]: index.html#optional-features
/// [`f16`]: https://docs.rs/half/^1.2/half/struct.f16.html
/// [`f32`]: https://doc.rust-lang.org/nightly/std/primitive.f32.html