Fix #10, mantissa overflow (#11)

- **warning: breaking changes!** - Fix #10, mantissa overflow - Fix convert to subnormal numbers ( < 0.000061035...) - Fix printing subnormal numbers - update unit tests with **test_all** - update GitHub/actions to version 4 - remove DATE from examples as it has no added value. - minor edits
RobTillaart · Mar 6, 2024 · 2fdc7a1 · 2fdc7a1
1 parent 9dc24d2
commit 2fdc7a1
Show file tree

Hide file tree

Showing 21 changed files with 372 additions and 95 deletions.
diff --git a/.github/workflows/arduino-lint.yml b/.github/workflows/arduino-lint.yml
@@ -6,7 +6,7 @@ jobs:
   lint:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - uses: arduino/arduino-lint-action@v1
         with:
           library-manager: update

diff --git a/.github/workflows/arduino_test_runner.yml b/.github/workflows/arduino_test_runner.yml
@@ -8,7 +8,7 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - uses: ruby/setup-ruby@v1
         with:
           ruby-version: 2.6

diff --git a/.github/workflows/jsoncheck.yml b/.github/workflows/jsoncheck.yml
@@ -10,7 +10,7 @@ jobs:
   test:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: json-syntax-check
         uses: limitusus/json-syntax-check@v1
         with:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,12 +6,23 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
 and this project adheres to [Semantic Versioning](http://semver.org/).
 
 
+## [0.2.0] - 2024-03-05
+- **warning: breaking changes!**
+- Fix #10, mantissa overflow
+- Fix convert to subnormal numbers ( < 0.000061035...)
+- Fix printing subnormal numbers
+- update unit tests with **test_all**
+- update GitHub/actions to version 4
+- remove DATE from examples as it has no added value.
+- minor edits
+
+----
+
 ## [0.1.8] - 2023-11-02
 - update readme.md
 - add **isNan()** (experimental).
 - minor edits.
 
-
 ## [0.1.7] - 2022-11-07
 - add changelog.md
 - add rp2040 to build-CI

diff --git a/LICENSE b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2015-2023 Rob Tillaart
+Copyright (c) 2015-2024 Rob Tillaart
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

diff --git a/README.md b/README.md
@@ -26,6 +26,21 @@ a floating point number. As it uses only 2 bytes where float and double have typ
 4 and 8 bytes, gains can be made at the price of range and precision.
 
 
+#### Breaking change 0.2.0
+
+Version 0.2.0 has a breaking change as a conversion bug has been found.
+See for details in issue #10.
+For some specific values the mantissa overflowed when the float 16 was 
+assigned a value to. This overflow was not detected / corrected.
+
+During the analysis of this bug it became clear that the sub-normal numbers 
+were also implemented correctly. This is fixed too in 0.2.0.
+
+There is still an issue 0 versus -0
+
+**This makes all pre-0.2.0 version obsolete.** 
+
+
 ## Specifications
 
 
@@ -34,14 +49,16 @@ a floating point number. As it uses only 2 bytes where float and double have typ
 | size      | 2 bytes      | layout s  eeeee  mmmmmmmmmm  (1,5,10)
 | sign      | 1 bit        |
 | exponent  | 5 bit        |
-| mantissa  | 11 bit       | ~ 3 digits
+| mantissa  | 10 bit       | ~ 3 digits
 | minimum   | 5.96046 E−8  |  smallest positive number.
-|           | 1.0009765625 |  1 + 2^−10 = smallest nr larger than 1.
+|           | 1.0009765625 |  1 + 2^−10 = smallest number larger than 1.
 | maximum   | 65504        |
 |           |              |
 
 
-#### example values
+#### Example values
+
+Source: https://en.wikipedia.org/wiki/Half-precision_floating-point_format
 
 ```cpp
 /*
@@ -151,9 +168,18 @@ negation operator.
 #### Should
 
 - unit tests of the above.
+- how to handle 0 == -0  (0x0000 == 0x8000)
+- investigate ARM alternative half-precision
+_ARM processors support (via a floating point control register bit) 
+an "alternative half-precision" format, which does away with the 
+special case for an exponent value of 31 (111112).[10] It is almost 
+identical to the IEEE format, but there is no encoding for infinity or NaNs; 
+instead, an exponent of 31 encodes normalized numbers in the range 65536 to 131008._
+
 
 #### Could
 
+- copy constructor?
 - update documentation.
 - error handling.
   - divide by zero errors.

diff --git a/examples/float16_test0/float16_test0.ino b/examples/float16_test0/float16_test0.ino
@@ -2,9 +2,8 @@
 //    FILE: float16_test0.ino
 //  AUTHOR: Rob Tillaart
 // PURPOSE: test float16
-//    DATE: 2015-03-11
 //     URL: https://github.com/RobTillaart/float16
-//
+
 
 /*
    SIGN  EXP     MANTISSA
@@ -152,4 +151,4 @@ void test_numbers()
 }
 
 
-// -- END OF FILE --
+//  -- END OF FILE --
diff --git a/examples/float16_test1/float16_test1.ino b/examples/float16_test1/float16_test1.ino
@@ -2,9 +2,7 @@
 //    FILE: float16_test1.ino
 //  AUTHOR: Rob Tillaart
 // PURPOSE: test float16
-//    DATE: 2015-03-11
 //     URL: https://github.com/RobTillaart/float16
-//
 
 
 #include "float16.h"
@@ -40,6 +38,6 @@ void loop()
 }
 
 
-// -- END OF FILE --
+//  -- END OF FILE --
 
 
diff --git a/examples/float16_test_all/float16_test_all.ino b/examples/float16_test_all/float16_test_all.ino
@@ -2,14 +2,13 @@
 //    FILE: float16_test_all.ino
 //  AUTHOR: Rob Tillaart
 // PURPOSE: test float16
-//    DATE: 2021-11-27
 //     URL: https://github.com/RobTillaart/float16
-//
-
-// test all values except the NAN
-// test_1 takes ~ 2 minutes on UNO @ 115200baud
 
-// https://github.com/RobTillaart/float16/issues/2
+//  test all values except the NAN
+//  test_1 takes ~ 2 minutes on UNO @ 115200baud
+//  https://github.com/RobTillaart/float16/issues/2
+//
+//  test_3 is related to issue #10
 
 
 #include "float16.h"
@@ -34,6 +33,7 @@ void setup()
 
   test_1();
   test_2();
+  test_3();
 }
 
 
@@ -42,8 +42,41 @@ void loop()
 }
 
 
+//  test for issue #10
+void test_3()
+{
+  Serial.println(__FUNCTION__);
+  uint16_t y = 0;
+  uint16_t last = 0;
+
+  start = millis();
+  for (int32_t x = 1; x < 65535; x++)  //  test positive integers.
+  //  for (int32_t x = -1; x > -65535; x--)   //  test negative integers.
+  {
+    last = y;
+    f16 = x;
+    y = f16.getBinary();
+    if (y < last)
+    {
+      Serial.print("|  ");
+      Serial.print(x);
+      Serial.print("  |  ");
+      Serial.print(y, HEX);
+      Serial.print("  |  ");
+      Serial.print(last, HEX);
+      Serial.println("  |");
+    }
+  }
+  stop = millis();
+  Serial.println();
+  Serial.print("  TIME: ");
+  Serial.println(stop - start);
+}
+
+
 void test_2()
 {
+  Serial.println(__FUNCTION__);
   start = millis();
   for (uint32_t x = 0x0001; x < 0x7C01; x++)
   {
@@ -64,7 +97,8 @@ void test_2()
 
 void test_1()
 {
-  // POSITIVE NUMBERS
+  Serial.println(__FUNCTION__);
+  //  POSITIVE NUMBERS
   prev = 0;
   errors = 0;
   start = millis();
@@ -76,7 +110,7 @@ void test_1()
     Serial.print('\t');
     float current = f16.toDouble();
     Serial.print(current, 8);
-    if (prev > current)           // numbers should be increasing.
+    if (prev > current)           //  numbers should be increasing.
     {
       Serial.print("\t\tERROR");
       errors++;
@@ -94,7 +128,7 @@ void test_1()
   Serial.println();
 
 
-  // NEGATIVE NUMBERS
+  //  NEGATIVE NUMBERS
   prev = 0;
   errors = 0;
   start = millis();
@@ -106,7 +140,7 @@ void test_1()
     Serial.print('\t');
     float current = f16.toDouble();
     Serial.print(current, 8);
-    if (prev < current)           // negative numbers should be decreasing.
+    if (prev < current)           //  negative numbers should be decreasing.
     {
       Serial.print("\t\tERROR");
       errors++;
@@ -129,4 +163,4 @@ void test_1()
 
 
 
-// -- END OF FILE --
+//  -- END OF FILE --
diff --git a/examples/float16_test_all_2/float16_test_all_2.ino b/examples/float16_test_all_2/float16_test_all_2.ino
@@ -0,0 +1,113 @@
+//
+//    FILE: float16_test_all_2.ino
+//  AUTHOR: alecelular, Rob Tillaart
+// PURPOSE: test float16
+//     URL: https://github.com/RobTillaart/float16
+//     URL: https://github.com/RobTillaart/float16/issues/10
+
+
+#include "float16.h"
+
+//  issue 10 detected an Error -> 32760 / 32767
+
+float16 f16;
+
+
+void setup()
+{
+  delay(500);
+  Serial.begin(115200);
+  while (!Serial) delay(1);
+
+  Serial.println();
+  Serial.println(__FILE__);
+  Serial.print("FLOAT16_LIB_VERSION: ");
+  Serial.println(FLOAT16_LIB_VERSION);
+
+  f16.setDecimals(6);
+
+  test_all();
+
+  Serial.println("\ndone");
+}
+
+
+void loop()
+{
+}
+
+
+void test_all()
+{
+  Serial.println(__FUNCTION__);
+
+  //  test all possible positive patterns
+  //  test both the conversion to and from float.
+  for (uint32_t x = 0x0000; x < 0x7C01; x++)
+  {
+    f16.setBinary(x);
+    float16 f17 = f16.toDouble();
+
+    //  internal format should be equal. Except for -0 and 0.
+    if (x - f17.getBinary() != 0)
+    {
+      Serial.print(f16.toDouble(), 10);
+      Serial.print("\t");
+      Serial.print(f16.getBinary(), HEX);
+      Serial.print("\t");
+      Serial.print(f17.getBinary(), HEX);
+      Serial.print("\t");
+      Serial.print(x - f17.getBinary(), HEX);
+      Serial.println();
+    }
+  }
+  Serial.println();
+
+
+  //  test all possible negative patterns
+  for (uint32_t x = 0x8000; x < 0xFC01; x++)
+  {
+    f16.setBinary(x);
+    float16 f17 = f16.toDouble();
+
+    if (x - f17.getBinary() != 0)
+    {
+      Serial.print(f16.toDouble(), 10);
+      Serial.print("\t");
+      Serial.print(f16.getBinary(), HEX);
+      Serial.print("\t");
+      Serial.print(f17.getBinary(), HEX);
+      Serial.print("\t");
+      Serial.print(x - f17.getBinary(), HEX);
+      Serial.println();
+    }
+  }
+  Serial.println();
+
+}
+
+
+void test_0()
+{
+  Serial.println(__FUNCTION__);
+  for (uint32_t x = 32750; x < 32770; x++)
+    // for (uint32_t x = 8175; x < 8205; x++)
+  {
+    f16 = x;
+    Serial.print(x);
+    Serial.print("\t");
+    Serial.print(f16);
+    Serial.print("\t");
+    Serial.print(f16.toDouble(), 2);
+    Serial.print("\t");
+    Serial.println();
+    yield();
+  }
+
+  f16.setBinary(0x7800);
+  Serial.print(f16.toDouble(), 2);
+  Serial.println();
+}
+
+
+//  -- END OF FILE --