add basic math, optimize compare operators (#5)

* add basic math, optimize compare operators * fix negation * fix comparison
RobTillaart · Dec 2, 2021 · a2fdd9b · a2fdd9b
1 parent ea2e710
commit a2fdd9b
Show file tree

Hide file tree

Showing 13 changed files with 299 additions and 184 deletions.
diff --git a/README.md b/README.md
@@ -21,22 +21,23 @@ The library implements the **Printable** interface so one can directly print the
 float16 values in any stream e.g. Serial.
 
 The primary usage of the float16 data type is to efficiently store and transport 
-a floating point number. As it is only 2 bytes where float and double have typical 
-4 and 8, gains can be made at the price of range and precision.
+a floating point number. As it uses only 2 bytes where float and double have typical 
+4 and 8 bytes, gains can be made at the price of range and precision.
 
 
 ## Specifications
 
 
-| attribute | value        |  notes       |
-|:----------|:-------------|:-------------|
-| Size      | 2 bytes      |              |
-| sign      | 1 bit        |              |
-| mantissa  | 11 bit       | ~ 3 digits   |
-| exponent  | 4 bit        |              |
-| minimum   | 1.0009765625 |  1 + 2^−10   |
-| maximum   | 65504        |              |
-|           |              |              |
+| attribute | value        |  notes  |
+|:----------|:-------------|:--------|
+| size      | 2 bytes      | layout s  eeeee  mmmmmmmmmm
+| sign      | 1 bit        |            
+| exponent  | 5 bit        |            
+| mantissa  | 11 bit       | ~ 3 digits 
+| minimum   | 5.96046 E−8  |  smallest positive number. 
+|           | 1.0009765625 |  1 + 2^−10 = smallest nr larger than 1. 
+| maximum   | 65504        |            
+|           |              |            
 
 
 ## Interface
@@ -52,72 +53,73 @@ to elaborate
 
 #### Conversion
 
-- **double toDouble(void)** convert to double (or float)
+- **double toDouble(void)** convert to double (or float).
+- **uint16_t getBinary()** get the 2 byte binary representation.
+- **void setBinary(uint16_t u)** set the 2 bytes binary representation.
 - **size_t printTo(Print& p) const** Printable interface.
 - **void setDecimals(uint8_t d)** idem, used for printTo.
 - **uint8_t getDecimals()** idem.
 
-Note the setDecimals takes one byte per object which is not efficient for arrays.
+Note the setDecimals takes one byte per object which is not efficient for arrays of float16.
 See array example for efficient storage using set/getBinary() functions.
 
 
 #### Compare
 
-to elaborate
-
-
-## Notes
+Standard compare functions. Since 0.1.5 these are quite optimized, 
+so it is fast to compare e.g. 2 measurements.
 
+- **bool operator == (const float16& f)**
+- **bool operator != (const float16& f)**
+- **bool operator >  (const float16& f)**
+- **bool operator >= (const float16& f)**
+- **bool operator <  (const float16& f)**
+- **bool operator <= (const float16& f)**
 
-#### comparison functions
 
-First version of inequality operations are implemented by converting data to double and compare those. 
-The strategy is to get these working first and optionally optimize them later.
+#### Math (basic)
 
+Math is done by converting to double, do the math and convert back.
+These operators are added for convenience only. 
+Not planned to optimize these.
 
-## TODO (future)
+- **float16 operator + (const float16& f)**
+- **float16 operator - (const float16& f)**
+- **float16 operator \* (const float16& f)**
+- **float16 operator / (const float16& f)**
+- **float16& operator += (const float16& f)**
+- **float16& operator -= (const float16& f)**
+- **float16& operator \*= (const float16& f)**
+- **float16& operator /= (const float16& f)**
 
-to get focus on getting things done...
+negation operator.
+- **float16 operator - ()** fast negation.
 
+- **int sign()** returns 1 == positive, 0 == zero,  -1 == negative.
+- **bool isZero()** returns true if zero. slightly faster than **sign()**.
+- **bool isInf()** returns true if value is (-)infinite.
 
-#### 0.1.4
-
-the following should work:
-
-- update documentation
-- positive numbers
-- negative numbers
-- infinity
-- rounding to zero (e.g. 1e-30)
-- array of numbers.
-- unit tests of the above..
 
+## Notes
 
-#### 0.1.5
 
-- update documentation
-- comparison operators
-- unit tests of the above..
+## Future
 
 
 #### 0.1.6
 
-- update documentation
-- get basic math working (+-*/)
-- isNan()
-- isINF()
-- abs()
-- sgn()
-- unit tests of the above..
+- update documentation.
+- unit tests of the above.
+- isNan().
 
 
 #### later
 
-- update documentation
-- get basic math II working += -= *= /=
-- divide by zero errors.
-- f16tof32() + f32tof16()
-- rewrite toDouble with bit magic
-- ...
-
+- update documentation.
+- error handling.
+  - divide by zero errors.
+- look for optimizations.
+- rewrite **f16tof32()** with bit magic.
+- add storage example - with SD card, FRAM or EEPROM
+- add communication example - serial or Ethernet?
 
diff --git a/examples/float16_test0/float16_test0.ino b/examples/float16_test0/float16_test0.ino
@@ -6,27 +6,27 @@
 //    DATE: 2015-03-11
 //     URL: https://github.com/RobTillaart/float16
 //
-// Released to the public domain
-//
+
 
 /*
-0 01111 0000000000 = 1
-0 01111 0000000001 = 1 + 2−10 = 1.0009765625 (next smallest float after 1)
-1 10000 0000000000 = −2
+   SIGN  EXP     MANTISSA
+    0    01111    0000000000 = 1
+    0    01111    0000000001 = 1 + 2−10 = 1.0009765625 (next smallest float after 1)
+    1    10000    0000000000 = −2
 
-0 11110 1111111111 = 65504  (max half precision)
+    0    11110    1111111111 = 65504  (max half precision)
 
-0 00001 0000000000 = 2−14 ≈ 6.10352 × 10−5 (minimum positive normal)
-0 00000 1111111111 = 2−14 - 2−24 ≈ 6.09756 × 10−5 (maximum subnormal)
-0 00000 0000000001 = 2−24 ≈ 5.96046 × 10−8 (minimum positive subnormal)
+    0    00001    0000000000 = 2−14 ≈ 6.10352 × 10−5 (minimum positive normal)
+    0    00000    1111111111 = 2−14 - 2−24 ≈ 6.09756 × 10−5 (maximum subnormal)
+    0    00000    0000000001 = 2−24 ≈ 5.96046 × 10−8 (minimum positive subnormal)
 
-0 00000 0000000000 = 0
-1 00000 0000000000 = −0
+    0    00000    0000000000 = 0
+    1    00000    0000000000 = −0
 
-0 11111 0000000000 = infinity
-1 11111 0000000000 = −infinity
+    0    11111    0000000000 = infinity
+    1    11111    0000000000 = −infinity
 
-0 01101 0101010101 = 0.333251953125 ≈ 1/3
+    0    01101    0101010101 = 0.333251953125 ≈ 1/3
 */
 
 #include "float16.h"
@@ -62,8 +62,8 @@ void test_constructors()
   Serial.println("\ntest_constructors:");
   float16 a;
   Serial.println(a.toDouble(), 9);
-  Serial.println(a.getBinary(), HEX);  
-  
+  Serial.println(a.getBinary(), HEX);
+
   float16 b = 6;
   Serial.println(b.toDouble(), 9);
   Serial.println(b.getBinary(), HEX);
@@ -144,7 +144,7 @@ void test_numbers()
   Serial.println("** OVERFLOW **");
   float16 f(1000000.0);
   Serial.println(f.toDouble(), 9);
-  
+
   Serial.println("** UNDERFLOW **");
   float16 g(1 / 1000000.0);
   Serial.println(g.toDouble(), 9);

diff --git a/examples/float16_test1/float16_test1.ino b/examples/float16_test1/float16_test1.ino
@@ -7,6 +7,7 @@
 //     URL: https://github.com/RobTillaart/float16
 //
 
+
 #include "float16.h"
 
 float16 X;
@@ -19,11 +20,11 @@ void setup()
   Serial.println(__FILE__);
   Serial.print("FLOAT16_LIB_VERSION: ");
   Serial.println(FLOAT16_LIB_VERSION);
-  Serial.println("\nStart ");
 
   float f;
 
-  for (uint16_t n = 0; n < 65535; n++)
+  // dump all possible values
+  for (uint16_t n = 0; n < 65535; n++)  
   {
     f = X.f16tof32(n);
     Serial.print(n);

diff --git a/examples/float16_test_all/float16_test_all.ino b/examples/float16_test_all/float16_test_all.ino
@@ -77,7 +77,7 @@ void test_1()
     Serial.print('\t');
     float current = f16.toDouble();
     Serial.print(current, 8);
-    if (prev > current)
+    if (prev > current)           // numbers should be increasing.
     {
       Serial.print("\t\tERROR");
       errors++;
@@ -107,7 +107,7 @@ void test_1()
     Serial.print('\t');
     float current = f16.toDouble();
     Serial.print(current, 8);
-    if (prev < current)
+    if (prev < current)           // negative numbers should be decreasing.
     {
       Serial.print("\t\tERROR");
       errors++;

diff --git a/examples/float16_test_array/float16_test_array.ino b/examples/float16_test_array/float16_test_array.ino
@@ -22,9 +22,10 @@ void setup()
   Serial.println(__FILE__);
   Serial.print("FLOAT16_LIB_VERSION: ");
   Serial.println(FLOAT16_LIB_VERSION);
-  Serial.println("\nStart ");
+  Serial.println();
+
 
-  // simulate temperature with random numbers
+  // simulate temperature sensor with random numbers
   for (uint32_t n = 0; n < 10; n++)
   {
     temperature[n] = (random(1000) - 300) * 0.01;

diff --git a/examples/float16_test_negative/float16_test_negative.ino b/examples/float16_test_negative/float16_test_negative.ino
@@ -17,7 +17,7 @@ void setup()
   Serial.println(__FILE__);
   Serial.print("FLOAT16_LIB_VERSION: ");
   Serial.println(FLOAT16_LIB_VERSION);
-  Serial.println("\nStart ");
+  Serial.println();
 
 
   for( int i = -10; i < 2; i++)

diff --git a/examples/float16_test_performance/float16_test_performance.ino b/examples/float16_test_performance/float16_test_performance.ino
@@ -24,9 +24,9 @@ void setup()
   Serial.println(FLOAT16_LIB_VERSION);
   Serial.println();
 
-  f = random(1000000) * 0.001;
 
-  // CONSTRUCTORS
+  Serial.println("CONSTRUCTORS");
+  f = random(1000000) * 0.001;
   start = micros();
   float16 f16(f);
   stop = micros();
@@ -41,8 +41,10 @@ void setup()
   Serial.print("a = b: \t");
   Serial.println(stop - start);
   delay(10);
+  Serial.println();
+
 
-  // CONVERSION
+  Serial.println("CONVERSION");
   start = micros();
   f = f16.toDouble();
   stop = micros();
@@ -52,7 +54,7 @@ void setup()
   Serial.println();
 
 
-  // COMPARE
+  Serial.println("COMPARE");
   f17 = f16.toDouble() + 1;
 
   start = micros();
@@ -96,6 +98,85 @@ void setup()
   Serial.print("compare >  : \t");
   Serial.println(stop - start);
   delay(10);
+  Serial.println();
+
+
+  Serial.println("MATH I");
+  float16 f18;
+  start = micros();
+  f18 = f16 + f17;
+  stop = micros();
+  Serial.print("math +  : \t");
+  Serial.println(stop - start);
+  delay(10);
+  //  Serial.println(f16);
+  //  Serial.println(f17);
+  //  Serial.println(f18);
+
+  start = micros();
+  f18 = f16 - f17;
+  stop = micros();
+  Serial.print("math -  : \t");
+  Serial.println(stop - start);
+  delay(10);
+
+  start = micros();
+  f18 = f16 * f17;
+  stop = micros();
+  Serial.print("math *  : \t");
+  Serial.println(stop - start);
+  delay(10);
+
+  start = micros();
+  f18 = f16 + f17;
+  stop = micros();
+  Serial.print("math /  : \t");
+  Serial.println(stop - start);
+  delay(10);
+  Serial.println();
+
+  Serial.println("MATH II");
+  start = micros();
+  f18 += f16;
+  stop = micros();
+  Serial.print("math += : \t");
+  Serial.println(stop - start);
+  delay(10);
+
+  start = micros();
+  f18 -= f16;
+  stop = micros();
+  Serial.print("math -= : \t");
+  Serial.println(stop - start);
+  delay(10);
+
+  start = micros();
+  f18 *= f16;
+  stop = micros();
+  Serial.print("math *= : \t");
+  Serial.println(stop - start);
+  delay(10);
+
+  start = micros();
+  f18 /= f16;
+  stop = micros();
+  Serial.print("math /= : \t");
+  Serial.println(stop - start);
+  delay(10);
+  Serial.println();
+
+  Serial.println(f16);
+
+  Serial.println("MATH III - negation");
+  start = micros();
+  f18 = -f16;
+  stop = micros();
+  Serial.print("negation : \t");
+  Serial.println(stop - start);
+  delay(10);
+  Serial.println();
+
+  Serial.println(f18);
 
   Serial.println("\ndone");
 }

diff --git a/examples/float16_test_powers2/float16_test_powers2.ino b/examples/float16_test_powers2/float16_test_powers2.ino
@@ -18,7 +18,6 @@ void setup()
   Serial.println(__FILE__);
   Serial.print("FLOAT16_LIB_VERSION: ");
   Serial.println(FLOAT16_LIB_VERSION);
-  Serial.println("\nStart ");
 
   for (uint32_t n = 1; n < 65536; n *= 2)
   {