Skip to content

Commit

Permalink
Merge pull request #46 from janpfeifer/patch-1
Browse files Browse the repository at this point in the history
Added SmallestNonZero constant
  • Loading branch information
fxamacker committed May 5, 2024
2 parents db25b6a + 4b7f7e1 commit 9c0fe7e
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 1 deletion.
7 changes: 7 additions & 0 deletions float16.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,13 @@ const (
PrecisionOverflow
)

// SmallestNonzero is the smallest nonzero denormal value for float16 (0.000000059604645).
// It's the float16 equivalent for [math.SmallestNonzeroFloat32] and [math.SmallestNonzeroFloat64].
// For context, [math.SmallestNonzeroFloat32] used the formula 1 / 2**(127 - 1 + 23) to produce
// the smallest denormal value for float32 (1.401298464324817070923729583289916131280e-45).
// The equivalent formula for float16 is 1 / 2**(15 - 1 + 10). We use Float16(0x0001) to compile as const.
const SmallestNonzero = Float16(0x0001) // 5.9604645e-08 (effectively 0x1p-14 * 0x1p-10)

// PrecisionFromfloat32 returns Precision without performing
// the conversion. Conversions from both Infinity and NaN
// values will always report PrecisionExact even if NaN payload
Expand Down
6 changes: 6 additions & 0 deletions float16_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -794,5 +794,11 @@ func checkRoundTrippedPrecision(t *testing.T, u32 uint32, u16 uint16, u32bis uin
t.Errorf("PrecisionFromfloat32 in f32bits=0x%08x (%032b) (%f), out f16bits=0x%04x (%v), back=0x%08x (%f), got %v, wanted PrecisionExact, exp=%d, coef=%d, drpd=%d", u32, u32, f32, u16, f16, u32bis, f32bis, pre, exp32, coef32, dropped32)
}
}
}

func TestSmallestNonzero(t *testing.T) {
want := float32(0x1p-24) // -15 + 1 - 10
if float16.SmallestNonzero.Float32() != want {
t.Errorf("Invalid SmallestNonzero to float32 conversion: Float16=%s, wanted %g", float16.SmallestNonzero, want)
}
}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module github.com/x448/float16

go 1.11
go 1.13

0 comments on commit 9c0fe7e

Please sign in to comment.