Merge pull request #46 from janpfeifer/patch-1

Added SmallestNonZero constant
x448 · May 5, 2024 · 9c0fe7e · 9c0fe7e
2 parents db25b6a + 4b7f7e1
commit 9c0fe7e
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 1 deletion.
diff --git a/float16.go b/float16.go
@@ -40,6 +40,13 @@ const (
 	PrecisionOverflow
 )
 
+// SmallestNonzero is the smallest nonzero denormal value for float16 (0.000000059604645).
+// It's the float16 equivalent for [math.SmallestNonzeroFloat32] and [math.SmallestNonzeroFloat64].
+// For context, [math.SmallestNonzeroFloat32] used the formula 1 / 2**(127 - 1 + 23) to produce
+// the smallest denormal value for float32 (1.401298464324817070923729583289916131280e-45).
+// The equivalent formula for float16 is 1 / 2**(15 - 1 + 10). We use Float16(0x0001) to compile as const.
+const SmallestNonzero = Float16(0x0001) // 5.9604645e-08 (effectively 0x1p-14 * 0x1p-10)
+
 // PrecisionFromfloat32 returns Precision without performing
 // the conversion.  Conversions from both Infinity and NaN
 // values will always report PrecisionExact even if NaN payload

diff --git a/float16_test.go b/float16_test.go
@@ -794,5 +794,11 @@ func checkRoundTrippedPrecision(t *testing.T, u32 uint32, u16 uint16, u32bis uin
 			t.Errorf("PrecisionFromfloat32 in f32bits=0x%08x (%032b) (%f), out f16bits=0x%04x (%v), back=0x%08x (%f), got %v, wanted PrecisionExact, exp=%d, coef=%d, drpd=%d", u32, u32, f32, u16, f16, u32bis, f32bis, pre, exp32, coef32, dropped32)
 		}
 	}
+}
 
+func TestSmallestNonzero(t *testing.T) {
+	want := float32(0x1p-24) // -15 + 1 - 10
+	if float16.SmallestNonzero.Float32() != want {
+		t.Errorf("Invalid SmallestNonzero to float32 conversion: Float16=%s, wanted %g", float16.SmallestNonzero, want)
+	}
 }
diff --git a/go.mod b/go.mod
@@ -1,3 +1,3 @@
 module github.com/x448/float16
 
-go 1.11
+go 1.13