From 78bf064ec81a8efaeb26dcc51a3bff4cd8b88ede Mon Sep 17 00:00:00 2001 From: Rob Tillaart Date: Thu, 18 Apr 2024 12:07:12 +0200 Subject: [PATCH] Fix #12 - remove printable to improve footprint. (#13) - Fix #12, breaking change. Thanks to Andyjbm for the measurements. - remove Printable interface as it makes the effective footprint larger! - remove getDecimals() and setDecimals(). - patch examples and unit test for the above. - add example **float16_sizeof_array.ino**. - add **isPosInf()** and **isNegInf()** - add link to **float16ext** class with a larger range than float16. - update readme.md. - update unit-tests. --- CHANGELOG.md | 13 ++ README.md | 151 ++++++++++++------ .../float16_sizeof_array.ino | 44 +++++ .../float16_test_all/float16_test_all.ino | 2 - .../float16_test_all_2/float16_test_all_2.ino | 4 +- .../float16_test_array/float16_test_array.ino | 2 +- examples/float16_test_array/output_0.3.0.txt | 32 ++++ .../float16_test_performance.ino | 6 +- .../performance_0.3.0.txt | 53 ++++++ .../float16_test_powers2.ino | 2 +- .../float16_test_special.ino | 6 +- float16.cpp | 30 +++- float16.h | 21 ++- keywords.txt | 7 +- library.json | 2 +- library.properties | 2 +- test/unit_test_001.cpp | 49 ++++-- 17 files changed, 328 insertions(+), 98 deletions(-) create mode 100644 examples/float16_sizeof_array/float16_sizeof_array.ino create mode 100644 examples/float16_test_array/output_0.3.0.txt create mode 100644 examples/float16_test_performance/performance_0.3.0.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 374d001..5e94220 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,19 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [0.3.0] - 2024-04-17 +- Fix #12, breaking change. Thanks to Andyjbm for the measurements. +- remove Printable interface as it makes the effective footprint larger! +- remove getDecimals() and setDecimals(). +- patch examples and unit test for the above. +- add example **float16_sizeof_array.ino**. +- add **isPosInf()** and **isNegInf()** +- add link to **float16ext** class with a larger range than float16. +- update readme.md. +- update unit-tests. + +---- + ## [0.2.0] - 2024-03-05 - **warning: breaking changes!** - Fix #10, mantissa overflow diff --git a/README.md b/README.md index 9b6350e..bd1b1c8 100644 --- a/README.md +++ b/README.md @@ -16,15 +16,68 @@ Arduino library to implement float16 data type. ## Description This **experimental** library defines the float16 (2 byte) data type, including conversion -function to and from float32 type. It is definitely **work in progress**. - -The library implements the **Printable** interface so one can directly print the -float16 values in any stream e.g. Serial. +function to and from float32 type. The primary usage of the float16 data type is to efficiently store and transport a floating point number. As it uses only 2 bytes where float and double have typical 4 and 8 bytes, gains can be made at the price of range and precision. +Note that float16 only has ~3 significant digits. + +To print a float16, one need to convert it with toFloat(), toDouble() or toString(decimals). +The latter allows concatenation and further conversion to an char array. + +In pre 0.3.0 version the Printable interface was implemented, but it has been removed +as it caused excessive memory usage when declaring arrays of float16. + + +#### ARM alternative half-precision + +-https://en.wikipedia.org/wiki/Half-precision_floating-point_format#ARM_alternative_half-precision + +_ARM processors support (via a floating point control register bit) +an "alternative half-precision" format, which does away with the +special case for an exponent value of 31 (111112).[10] It is almost +identical to the IEEE format, but there is no encoding for infinity or NaNs; +instead, an exponent of 31 encodes normalized numbers in the range 65536 to 131008._ + +Implemented in https://github.com/RobTillaart/float16ext class. + + +#### Difference with float16 and float16ext + +The float16ext library has an extended range as it supports values from +- 65504 +to +- 131008. + +The float16ext does not support INF, -INF and NAN. These values are mapped upon +the largest positive, the largest negative and the largest positive number. + +The -0 and 0 values will both exist. + + +Although they share a lot of code float16 and float16ext should not be mixed. +In the future these libraries might merge / derive one from the other. + + +#### Breaking change 0.3.0 + +Version 0.3.0 has a breaking change. The **Printable** interface is removed as +it causes larger than expected arrays of float 16 (See #16). On ESP8266 every +float16 object was 8 bytes and on AVR it was 5 bytes instead of the expected 2 bytes. + +To support printing the class added two new conversion functions: +```cpp +f16.toFloat(); +f16.toString(decimals); + +Serial.println(f16.toFloat(), 4); +Serial.println(f16.toString(4)); +``` +This keeps printing relative easy. + +The footprint of the library is now smaller and one can now create compact array's +of float16 elements using only 2 bytes per element. + #### Breaking change 0.2.0 @@ -34,9 +87,9 @@ For some specific values the mantissa overflowed when the float 16 was assigned a value to. This overflow was not detected / corrected. During the analysis of this bug it became clear that the sub-normal numbers -were also implemented correctly. This is fixed too in 0.2.0. +were also not implemented correctly. This is fixed too in 0.2.0. -There is still an issue 0 versus -0 +There is still an issue with 0 versus -0 (sign gets lost in conversion). **This makes all pre-0.2.0 version obsolete.** @@ -44,16 +97,18 @@ There is still an issue 0 versus -0 ## Specifications -| attribute | value | notes | -|:----------|:-------------|:--------| -| size | 2 bytes | layout s eeeee mmmmmmmmmm (1,5,10) -| sign | 1 bit | -| exponent | 5 bit | -| mantissa | 10 bit | ~ 3 digits -| minimum | 5.96046 E−8 | smallest positive number. -| | 1.0009765625 | 1 + 2^−10 = smallest number larger than 1. -| maximum | 65504 | -| | | +| Attribute | Value | Notes | +|:------------|:----------------|:--------| +| size | 2 bytes | layout s eeeee mmmmmmmmmm (1, 5, 10) +| sign | 1 bit | +| exponent | 5 bit | +| mantissa | 10 bit | 3 - 4 digits +| minimum | ±5.96046 E−8 | smallest number. +| | ±1.0009765625 | 1 + 2^−10 = smallest number larger than 1. +| maximum | ±65504 | +| | | + +± = ALT 0177 #### Example values @@ -87,6 +142,10 @@ Source: https://en.wikipedia.org/wiki/Half-precision_floating-point_format #### Related - https://wokwi.com/projects/376313228108456961 (demo of its usage) +- https://github.com/RobTillaart/float16 +- https://github.com/RobTillaart/float16ext +- https://github.com/RobTillaart/fraction +- https://en.wikipedia.org/wiki/Half-precision_floating-point_format ## Interface @@ -97,28 +156,35 @@ Source: https://en.wikipedia.org/wiki/Half-precision_floating-point_format #### Constructors -- **float16(void)** defaults to zero. +- **float16(void)** defaults value to zero. - **float16(double f)** constructor. - **float16(const float16 &f)** copy constructor. #### Conversion -- **double toDouble(void)** convert to double (or float). +- **double toDouble(void)** convert value to double or float (if the same e.g. UNO). +- **float toFloat(void)** convert value to float. +- **String toString(unsigned int decimals = 2)** convert value to a String with decimals. +Please note that the accuracy is only 3-4 digits for the whole number so use decimals +with care. + + +#### Export and store + +To serialize the internal format e.g. to disk, two helper functions are available. + - **uint16_t getBinary()** get the 2 byte binary representation. - **void setBinary(uint16_t u)** set the 2 bytes binary representation. -- **size_t printTo(Print& p) const** Printable interface. -- **void setDecimals(uint8_t d)** idem, used for printTo. -- **uint8_t getDecimals()** idem. - -Note the setDecimals takes one byte per object which is not efficient for arrays of float16. -See array example for efficient storage using set/getBinary() functions. #### Compare -Standard compare functions. Since 0.1.5 these are quite optimized, -so it is fast to compare e.g. 2 measurements. +The library implement the standard compare functions. +These are optimized, so it is fast to compare 2 float16 values. + +Note: comparison with a float or double always include a conversion. +You can improve performance by converting e.g. a threshold only once before comparison. - **bool operator == (const float16& f)** - **bool operator != (const float16& f)** @@ -143,20 +209,16 @@ Not planned to optimize these. - **float16& operator \*= (const float16& f)** - **float16& operator /= (const float16& f)** -negation operator. +Negation operator. - **float16 operator - ()** fast negation. +Math helpers. - **int sign()** returns 1 == positive, 0 == zero, -1 == negative. - **bool isZero()** returns true if zero. slightly faster than **sign()**. -- **bool isInf()** returns true if value is (-)infinite. - - -#### Experimental 0.1.8 - -- **bool isNaN()** returns true if value is not a number. - - -## Notes +- **bool isNaN()** returns true if value is not a number. +- **bool isInf()** returns true if value is ± infinite. +- **bool isPosInf()** returns true if value is + infinite. +- **bool isNegInf()** returns true if value is - infinite. ## Future @@ -167,26 +229,19 @@ negation operator. #### Should -- unit tests of the above. - how to handle 0 == -0 (0x0000 == 0x8000) -- investigate ARM alternative half-precision -_ARM processors support (via a floating point control register bit) -an "alternative half-precision" format, which does away with the -special case for an exponent value of 31 (111112).[10] It is almost -identical to the IEEE format, but there is no encoding for infinity or NaNs; -instead, an exponent of 31 encodes normalized numbers in the range 65536 to 131008._ - #### Could -- copy constructor? -- update documentation. +- unit tests. - error handling. - divide by zero errors. - look for optimizations. - rewrite **f16tof32()** with bit magic. -- add storage example - with SD card, FRAM or EEPROM -- add communication example - serial or Ethernet? +- add examples + - persistent storage e.g. SD card, FRAM or EEPROM. + - communication e.g. Serial or Ethernet (XML, JSON)? + - sorting an array of float16? #### Wont diff --git a/examples/float16_sizeof_array/float16_sizeof_array.ino b/examples/float16_sizeof_array/float16_sizeof_array.ino new file mode 100644 index 0000000..df77759 --- /dev/null +++ b/examples/float16_sizeof_array/float16_sizeof_array.ino @@ -0,0 +1,44 @@ +// +// FILE: float16_sizeof_array.ino +// AUTHOR: Rob Tillaart +// PURPOSE: test float16 size +// URL: https://github.com/RobTillaart/float16 +// See #12 + +#include "Arduino.h" +#include "float16.h" + + +float16 test16[100]; +float test32[100]; + +void setup() +{ + Serial.begin(115200); + + Serial.println("FLOAT16"); + Serial.println(sizeof(test16) / sizeof(test16[0])); + Serial.println(sizeof(test16)); + Serial.println(sizeof(test16[0])); + Serial.println(); + + Serial.println("FLOAT32"); + Serial.println(sizeof(test32) / sizeof(test32[0])); + Serial.println(sizeof(test32)); + Serial.println(sizeof(test32[0])); + Serial.println(); + + // set some values to make sure the compiler doesn't optimise out the arrays. + test16[5] = 32; + test32[4] = 32; + +// Serial.println(test16[5].toDouble(), 3); +// Serial.println(test16[5].toFloat(), 3); +// Serial.println(test16[5].toString()); +// Serial.println(test16[5].toString(1)); +// Serial.println(test16[5].toString(3)); +}; + +void loop() +{ +}; diff --git a/examples/float16_test_all/float16_test_all.ino b/examples/float16_test_all/float16_test_all.ino index 4ab2b54..a9c0281 100644 --- a/examples/float16_test_all/float16_test_all.ino +++ b/examples/float16_test_all/float16_test_all.ino @@ -29,8 +29,6 @@ void setup() Serial.println(FLOAT16_LIB_VERSION); Serial.println("\nStart "); - f16.setDecimals(6); - test_1(); test_2(); test_3(); diff --git a/examples/float16_test_all_2/float16_test_all_2.ino b/examples/float16_test_all_2/float16_test_all_2.ino index b40c13d..73b0787 100644 --- a/examples/float16_test_all_2/float16_test_all_2.ino +++ b/examples/float16_test_all_2/float16_test_all_2.ino @@ -24,8 +24,6 @@ void setup() Serial.print("FLOAT16_LIB_VERSION: "); Serial.println(FLOAT16_LIB_VERSION); - f16.setDecimals(6); - test_all(); Serial.println("\ndone"); @@ -96,7 +94,7 @@ void test_0() f16 = x; Serial.print(x); Serial.print("\t"); - Serial.print(f16); + Serial.print(f16.toString(2)); Serial.print("\t"); Serial.print(f16.toDouble(), 2); Serial.print("\t"); diff --git a/examples/float16_test_array/float16_test_array.ino b/examples/float16_test_array/float16_test_array.ino index 11b9a7d..ffbcd23 100644 --- a/examples/float16_test_array/float16_test_array.ino +++ b/examples/float16_test_array/float16_test_array.ino @@ -5,7 +5,7 @@ // URL: https://github.com/RobTillaart/float16 -// show different storage needs +// show storage needs (fixed in 0.3.0) #include "float16.h" diff --git a/examples/float16_test_array/output_0.3.0.txt b/examples/float16_test_array/output_0.3.0.txt new file mode 100644 index 0000000..c360afd --- /dev/null +++ b/examples/float16_test_array/output_0.3.0.txt @@ -0,0 +1,32 @@ + +float16_test_array.ino +FLOAT16_LIB_VERSION: 0.3.0 + +0 5.07 +1 -0.51 +2 -2.27 +3 3.58 +4 6.30 +5 -0.28 +6 2.44 +7 5.78 +8 6.23 +9 4.09 + 0.30 + +0 5.07 +1 -0.51 +2 -2.27 +3 3.58 +4 6.30 +5 -0.28 +6 2.44 +7 5.78 +8 6.23 +9 4.09 + 0.30 + +SIZE: 20 +SIZE: 20 + +done diff --git a/examples/float16_test_performance/float16_test_performance.ino b/examples/float16_test_performance/float16_test_performance.ino index 2719221..5b4dacc 100644 --- a/examples/float16_test_performance/float16_test_performance.ino +++ b/examples/float16_test_performance/float16_test_performance.ino @@ -1,7 +1,7 @@ // // FILE: float16_test_performance.ino // AUTHOR: Rob Tillaart -// PURPOSE: test float16 +// PURPOSE: test float16 performance // URL: https://github.com/RobTillaart/float16 @@ -162,7 +162,7 @@ void setup() delay(10); Serial.println(); - Serial.println(f16); + Serial.println(f16.toString(4)); Serial.println("MATH III - negation"); start = micros(); @@ -173,7 +173,7 @@ void setup() delay(10); Serial.println(); - Serial.println(f18); + Serial.println(f18.toString(4)); Serial.println("\ndone"); } diff --git a/examples/float16_test_performance/performance_0.3.0.txt b/examples/float16_test_performance/performance_0.3.0.txt new file mode 100644 index 0000000..f600bca --- /dev/null +++ b/examples/float16_test_performance/performance_0.3.0.txt @@ -0,0 +1,53 @@ + +// +// test: UNO +// IDE: 1.8.19 +// + +FLOAT16_LIB_VERSION: 0.3.0 + +CONSTRUCTORS +Constructor: 28 +a = b: 4 + +CONVERSION +toDouble(): 360 + +COMPARE +compare == : 12 +compare != : 8 +compare < : 4 +compare <= : 4 +compare >= : 8 +compare > : 4 + +MATH I +math + : 756 +math - : 692 +math * : 692 +math / : 688 + +MATH II +math += : 756 +math -= : 692 +math *= : 692 +math /= : 704 + +17.8125 +MATH III - negation +negation : 4 + +-17.8125 + +done + + +------------------------------------------ + +// +// test: RP2040 +// IDE: 1.8.13 +// + + +------------------------------------------- \ No newline at end of file diff --git a/examples/float16_test_powers2/float16_test_powers2.ino b/examples/float16_test_powers2/float16_test_powers2.ino index c35925a..666f12e 100644 --- a/examples/float16_test_powers2/float16_test_powers2.ino +++ b/examples/float16_test_powers2/float16_test_powers2.ino @@ -21,7 +21,7 @@ void setup() float16 f16(n); Serial.print(n); Serial.print('\t'); - Serial.print(f16); + Serial.print(f16.toString()); Serial.print('\t'); Serial.print(abs(f16.toDouble() / n), 8); Serial.println(); diff --git a/examples/float16_test_special/float16_test_special.ino b/examples/float16_test_special/float16_test_special.ino index 4344a2e..e362807 100644 --- a/examples/float16_test_special/float16_test_special.ino +++ b/examples/float16_test_special/float16_test_special.ino @@ -31,14 +31,12 @@ void setup() Serial.print("FLOAT16_LIB_VERSION: "); Serial.println(FLOAT16_LIB_VERSION); - f16.setDecimals(6); - for (int i = 0; i < 32; i++) { f16.setBinary(value[i]); Serial.print(value[i], HEX); Serial.print("\t"); - Serial.print(f16); + Serial.print(f16.toString()); Serial.print("\t"); Serial.print(f16.toDouble(), 6); Serial.print("\t"); @@ -52,7 +50,7 @@ void setup() f16 = x; Serial.print(f16.getBinary(), HEX); Serial.print("\t"); - Serial.print(f16); + Serial.print(f16.toString()); Serial.print("\t"); Serial.print(f16.toDouble(), 6); Serial.print("\t"); diff --git a/float16.cpp b/float16.cpp index b5f1473..65da893 100644 --- a/float16.cpp +++ b/float16.cpp @@ -1,7 +1,7 @@ // // FILE: float16.cpp // AUTHOR: Rob Tillaart -// VERSION: 0.2.0 +// VERSION: 0.3.0 // PURPOSE: library for Float16s for Arduino // URL: http://en.wikipedia.org/wiki/Half-precision_floating-point_format @@ -15,18 +15,26 @@ float16::float16(double f) _value = f32tof16(f); } -// PRINTING -size_t float16::printTo(Print& p) const + +////////////////////////////////////////////////////////// +// +// CONVERTING & PRINTING +// +double float16::toDouble() const { - double d = this->f16tof32(_value); - return p.print(d, _decimals); + return f16tof32(_value); } -double float16::toDouble() const +float float16::toFloat() const { return f16tof32(_value); } +String float16::toString(unsigned int decimals) const +{ + return String((double)f16tof32(_value), decimals); +} + ////////////////////////////////////////////////////////// // @@ -164,6 +172,16 @@ bool float16::isInf() return ((_value == 0x7C00) || (_value == 0xFC00)); } +bool float16::isPosInf() +{ + return (_value == 0x7C00); +} + +bool float16::isNegInf() +{ + return (_value == 0xFC00); +} + ////////////////////////////////////////////////////////// // diff --git a/float16.h b/float16.h index ae87a53..a2281d8 100644 --- a/float16.h +++ b/float16.h @@ -2,7 +2,7 @@ // // FILE: float16.h // AUTHOR: Rob Tillaart -// VERSION: 0.2.0 +// VERSION: 0.3.0 // PURPOSE: Arduino library to implement float16 data type. // half-precision floating point format, // used for efficient storage and transport. @@ -11,10 +11,10 @@ #include "Arduino.h" -#define FLOAT16_LIB_VERSION (F("0.2.0")) +#define FLOAT16_LIB_VERSION (F("0.3.0")) -class float16: public Printable +class float16 { public: // Constructors @@ -22,21 +22,18 @@ class float16: public Printable float16(double f); float16(const float16 &f) { _value = f._value; }; - // Conversion + // Conversion and printing double toDouble(void) const; + float toFloat() const; + String toString(unsigned int decimals = 2) const; // keep esp32 happy. + // access the 2 byte representation. uint16_t getBinary() { return _value; }; void setBinary(uint16_t u) { _value = u; }; - // Printable - size_t printTo(Print& p) const; - void setDecimals(uint8_t d) { _decimals = d; }; - uint8_t getDecimals() { return _decimals; }; - // equalities bool operator == (const float16& f); bool operator != (const float16& f); - bool operator > (const float16& f); bool operator >= (const float16& f); bool operator < (const float16& f); @@ -61,7 +58,8 @@ class float16: public Printable bool isZero(); bool isNaN(); bool isInf(); - + bool isPosInf(); + bool isNegInf(); // CORE CONVERSION // should be private, needed for testing. @@ -70,7 +68,6 @@ class float16: public Printable private: - uint8_t _decimals = 4; uint16_t _value; }; diff --git a/keywords.txt b/keywords.txt index 0d7579f..11019c8 100644 --- a/keywords.txt +++ b/keywords.txt @@ -7,16 +7,19 @@ float16 KEYWORD1 # Methods and Functions (KEYWORD2) toDouble KEYWORD2 +toFloat KEYWORD2 +toString KEYWORD2 getBinary KEYWORD2 setBinary KEYWORD2 -setDecimals KEYWORD2 -getDecimals KEYWORD2 sign KEYWORD2 isZero KEYWORD2 isNan KEYWORD2 isInf KEYWORD2 +isPosInf KEYWORD2 +isNegInf KEYWORD2 + # Constants (LITERAL1) FLOAT16_LIB_VERSION LITERAL1 diff --git a/library.json b/library.json index 576e9fd..0dda0e8 100644 --- a/library.json +++ b/library.json @@ -15,7 +15,7 @@ "type": "git", "url": "https://github.com/RobTillaart/float16.git" }, - "version": "0.2.0", + "version": "0.3.0", "license": "MIT", "frameworks": "*", "platforms": "*", diff --git a/library.properties b/library.properties index 1d4615f..0c9bd66 100644 --- a/library.properties +++ b/library.properties @@ -1,5 +1,5 @@ name=float16 -version=0.2.0 +version=0.3.0 author=Rob Tillaart maintainer=Rob Tillaart sentence=Arduino library to implement float16 data type. diff --git a/test/unit_test_001.cpp b/test/unit_test_001.cpp index 1387bb4..4484c89 100644 --- a/test/unit_test_001.cpp +++ b/test/unit_test_001.cpp @@ -85,6 +85,16 @@ unittest(test_constructor) } +unittest(test_sizeof) +{ + float16 value(1); + float16 arr[10]; + + assertEqual(2, sizeof(value)); + assertEqual(20, sizeof(arr)); +} + + unittest(test_compare_equal) { float16 a(1); @@ -142,6 +152,21 @@ unittest(test_negation) } +unittest(test_infinity) +{ + float16 f16; + f16.setBinary(0x7C00); + assertTrue(f16.isInf()); + assertTrue(f16.isPosInf()); + assertFalse(f16.isNegInf()); + + f16.setBinary(0xFC00); + assertTrue(f16.isInf()); + assertFalse(f16.isPosInf()); + assertTrue(f16.isNegInf()); +} + + unittest(test_conversion) { for (int i = 0; i < 20; i++) @@ -153,19 +178,12 @@ unittest(test_conversion) } -unittest(test_printable) +unittest(test_toString) { - float16 f16(123.456); - // test default value. - assertEqual(4, f16.getDecimals()); - for (int i = 0; i < 6; i++) - { - f16.setDecimals(i); - assertEqual(i, f16.getDecimals()); - } - - // TODO - // printable? how to test? + float16 f16(-123.456); + fprintf(stderr, "note the limited accuracy (~4 digits).\n"); + assertNotEqual("-123.456", f16.toString(3)); + assertEqual("-123.4", f16.toString(1)); } @@ -184,21 +202,24 @@ unittest(test_all_values) b = a.toDouble(); if (x != b.getBinary()) // assert would give 65K lines output! { - fprintf(stderr, "fail at %d\n", x); + fprintf(stderr, "fail at %d != %d\n", x, b.getBinary()); } } + fprintf(stderr, "test all negative patterns\n"); + fprintf(stderr, "only fails -0\n"); for (uint32_t x = 0x8000; x < 0xFC01; x++) { a.setBinary(x); b = a.toDouble(); if (x != b.getBinary()) { - fprintf(stderr, "fail at %d\n", x); + fprintf(stderr, "fail at %d != %d\n", x, b.getBinary()); } } } + unittest_main()