From 9dc24d251fc3ebd0bb92cf535c6ba3c85f2aa35e Mon Sep 17 00:00:00 2001 From: Rob Tillaart Date: Thu, 2 Nov 2023 15:05:55 +0100 Subject: [PATCH] update readme.md (#9) - update readme.md - add **isNan()** (experimental). - minor edits. --- CHANGELOG.md | 7 +- README.md | 44 +++- .../performance_0.1.7.txt | 88 +++++++ float16.cpp | 243 +++++++++--------- float16.h | 121 +++++---- keywords.txt | 4 + library.json | 4 +- library.properties | 2 +- 8 files changed, 315 insertions(+), 198 deletions(-) create mode 100644 examples/float16_test_performance/performance_0.1.7.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index b2a408b..97fd2b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,13 +6,18 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [0.1.8] - 2023-11-02 +- update readme.md +- add **isNan()** (experimental). +- minor edits. + + ## [0.1.7] - 2022-11-07 - add changelog.md - add rp2040 to build-CI - update readme.md - update keywords.txt - ## [0.1.6] - 2021-12-18 - update library.json - update license diff --git a/README.md b/README.md index 4796440..8037a68 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,12 @@ - [![Arduino CI](https://github.com/RobTillaart/float16/workflows/Arduino%20CI/badge.svg)](https://github.com/marketplace/actions/arduino_ci) [![Arduino-lint](https://github.com/RobTillaart/float16/actions/workflows/arduino-lint.yml/badge.svg)](https://github.com/RobTillaart/float16/actions/workflows/arduino-lint.yml) [![JSON check](https://github.com/RobTillaart/float16/actions/workflows/jsoncheck.yml/badge.svg)](https://github.com/RobTillaart/float16/actions/workflows/jsoncheck.yml) -[![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/RobTillaart/float16/blob/master/LICENSE) -[![GitHub release](https://img.shields.io/github/release/RobTillaart/float16.svg?maxAge=3600)](https://github.com/RobTillaart/Complex/releases) +[![GitHub issues](https://img.shields.io/github/issues/RobTillaart/float16.svg)](https://github.com/RobTillaart/float16/issues) +[![License: MIT](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/RobTillaart/float16/blob/master/LICENSE) +[![GitHub release](https://img.shields.io/github/release/RobTillaart/float16.svg?maxAge=3600)](https://github.com/RobTillaart/float16/releases) +[![PlatformIO Registry](https://badges.registry.platformio.org/packages/robtillaart/library/float16.svg)](https://registry.platformio.org/libraries/robtillaart/float16) # float16 @@ -30,7 +31,7 @@ a floating point number. As it uses only 2 bytes where float and double have typ | attribute | value | notes | |:----------|:-------------|:--------| -| size | 2 bytes | layout s eeeee mmmmmmmmmm +| size | 2 bytes | layout s eeeee mmmmmmmmmm (1,5,10) | sign | 1 bit | | exponent | 5 bit | | mantissa | 11 bit | ~ 3 digits @@ -66,9 +67,16 @@ a floating point number. As it uses only 2 bytes where float and double have typ ``` +#### Related + +- https://wokwi.com/projects/376313228108456961 (demo of its usage) + + ## Interface -to elaborate +```cpp +#include "float16.h" +``` #### Constructors @@ -126,20 +134,25 @@ negation operator. - **bool isInf()** returns true if value is (-)infinite. +#### Experimental 0.1.8 + +- **bool isNaN()** returns true if value is not a number. + + ## Notes ## Future - -#### 0.1.x +#### Must - update documentation. -- unit tests of the above. -- isNan(). +#### Should + +- unit tests of the above. -#### later +#### Could - update documentation. - error handling. @@ -149,3 +162,14 @@ negation operator. - add storage example - with SD card, FRAM or EEPROM - add communication example - serial or Ethernet? +#### Wont + + +## Support + +If you appreciate my libraries, you can support the development and maintenance. +Improve the quality of the libraries by providing issues and Pull Requests, or +donate through PayPal or GitHub sponsors. + +Thank you, + diff --git a/examples/float16_test_performance/performance_0.1.7.txt b/examples/float16_test_performance/performance_0.1.7.txt new file mode 100644 index 0000000..4e1fcaf --- /dev/null +++ b/examples/float16_test_performance/performance_0.1.7.txt @@ -0,0 +1,88 @@ + +// +// test: UNO +// IDE: 1.8.13 +// + +FLOAT16_LIB_VERSION: 0.1.7 + +CONSTRUCTORS +Constructor: 28 +a = b: 4 + +CONVERSION +toDouble(): 428 + +COMPARE +compare == : 4 +compare != : 4 +compare < : 4 +compare <= : 4 +compare >= : 4 +compare > : 8 + +MATH I +math + : 864 +math - : 812 +math * : 812 +math / : 812 + +MATH II +math += : 872 +math -= : 820 +math *= : 804 +math /= : 804 + +17.8125 +MATH III - negation +negation : 4 + +-17.8125 + +done + +------------------------------------------ + +// +// test: RP2040 +// IDE: 1.8.13 +// + +FLOAT16_LIB_VERSION: 0.1.7 + +CONSTRUCTORS +Constructor: 19 +a = b: 2 + +CONVERSION +toDouble(): 96 + +COMPARE +compare == : 4 +compare != : 3 +compare < : 7 +compare <= : 6 +compare >= : 6 +compare > : 6 + +MATH I +math + : 67 +math - : 60 +math * : 58 +math / : 54 + +MATH II +math += : 62 +math -= : 61 +math *= : 61 +math /= : 45 + +767.0000 +MATH III - negation +negation : 7 + +-767.0000 + +done + +------------------------------------------- \ No newline at end of file diff --git a/float16.cpp b/float16.cpp index 327db77..4dd8da5 100644 --- a/float16.cpp +++ b/float16.cpp @@ -1,30 +1,26 @@ // // FILE: float16.cpp // AUTHOR: Rob Tillaart -// VERSION: 0.1.7 +// VERSION: 0.1.8 // PURPOSE: library for Float16s for Arduino // URL: http://en.wikipedia.org/wiki/Half-precision_floating-point_format -// -// HISTORY: see changelog.md #include "float16.h" -// #define DEBUG - -// CONSTRUCTOR +// CONSTRUCTOR float16::float16(double f) { _value = f32tof16(f); } -// PRINTING +// PRINTING size_t float16::printTo(Print& p) const { double d = this->f16tof32(_value); return p.print(d, _decimals); -}; +} double float16::toDouble() const { @@ -34,54 +30,54 @@ double float16::toDouble() const ////////////////////////////////////////////////////////// // -// EQUALITIES +// EQUALITIES // bool float16::operator == (const float16 &f) { - return (_value == f._value); + return (_value == f._value); } bool float16::operator != (const float16 &f) { - return (_value != f._value); + return (_value != f._value); } bool float16::operator > (const float16 &f) { - if ((_value & 0x8000) && ( f._value & 0x8000)) return _value < f._value; - if (_value & 0x8000) return false; - if (f._value & 0x8000) return true; - return _value > f._value; + if ((_value & 0x8000) && ( f._value & 0x8000)) return _value < f._value; + if (_value & 0x8000) return false; + if (f._value & 0x8000) return true; + return _value > f._value; } bool float16::operator >= (const float16 &f) { - if ((_value & 0x8000) && (f._value & 0x8000)) return _value <= f._value; - if (_value & 0x8000) return false; - if (f._value & 0x8000) return true; - return _value >= f._value; + if ((_value & 0x8000) && (f._value & 0x8000)) return _value <= f._value; + if (_value & 0x8000) return false; + if (f._value & 0x8000) return true; + return _value >= f._value; } bool float16::operator < (const float16 &f) { - if ((_value & 0x8000) && (f._value & 0x8000)) return _value > f._value; - if (_value & 0x8000) return true; - if (f._value & 0x8000) return false; - return _value < f._value; + if ((_value & 0x8000) && (f._value & 0x8000)) return _value > f._value; + if (_value & 0x8000) return true; + if (f._value & 0x8000) return false; + return _value < f._value; } bool float16::operator <= (const float16 &f) { - if ((_value & 0x8000) && (f._value & 0x8000)) return _value >= f._value; - if (_value & 0x8000) return true; - if (f._value & 0x8000) return false; - return _value <= f._value; + if ((_value & 0x8000) && (f._value & 0x8000)) return _value >= f._value; + if (_value & 0x8000) return true; + if (f._value & 0x8000) return false; + return _value <= f._value; } ////////////////////////////////////////////////////////// // -// NEGATION +// NEGATION // float16 float16::operator - () { @@ -93,7 +89,7 @@ float16 float16::operator - () ////////////////////////////////////////////////////////// // -// MATH +// MATH // float16 float16::operator + (const float16 &f) { @@ -142,9 +138,8 @@ float16& float16::operator /= (const float16 &f) ////////////////////////////////////////////////////////// // -// MATH HELPER FUNCTIONS +// MATH HELPER FUNCTIONS // - int float16::sign() { if (_value & 0x8000) return -1; @@ -157,10 +152,12 @@ bool float16::isZero() return ((_value & 0x7FFF) == 0x0000); } -// bool float16::isNaN() -// { - // return ((_value & 0x7FFF) == 0x0000); -// } +bool float16::isNaN() +{ + if ((_value & 0x7C00) != 0x7C00) return false; + if ((_value & 0x03FF) == 0x0000) return false; + return true; +} bool float16::isInf() { @@ -170,104 +167,104 @@ bool float16::isInf() ////////////////////////////////////////////////////////// // -// CORE CONVERSION +// CORE CONVERSION // float float16::f16tof32(uint16_t _value) const { - uint16_t sgn, man; - int exp; - double f; - - sgn = (_value & 0x8000) > 0; - exp = (_value & 0x7C00) >> 10; - man = (_value & 0x03FF); - - // ZERO - if ((_value & 0x7FFF) == 0) - { - return sgn ? -0 : 0; - } - // NAN & INF - if (exp == 0x001F) - { - if (man == 0) return sgn ? -INFINITY : INFINITY; - else return NAN; - } - - // SUBNORMAL/NORMAL - if (exp == 0) f = 0; - else f = 1; - - // PROCESS MANTISSE - for (int i = 9; i >= 0; i--) - { - f *= 2; - if (man & (1 << i)) f = f + 1; - } - f = f * pow(2.0, exp - 25); - if (exp == 0) - { - f = f * pow(2.0, -13); // 5.96046447754e-8; - } - return sgn ? -f : f; + uint16_t sgn, man; + int exp; + double f; + + sgn = (_value & 0x8000) > 0; + exp = (_value & 0x7C00) >> 10; + man = (_value & 0x03FF); + + // ZERO + if ((_value & 0x7FFF) == 0) + { + return sgn ? -0 : 0; + } + // NAN & INF + if (exp == 0x001F) + { + if (man == 0) return sgn ? -INFINITY : INFINITY; + else return NAN; + } + + // SUBNORMAL/NORMAL + if (exp == 0) f = 0; + else f = 1; + + // PROCESS MANTISSE + for (int i = 9; i >= 0; i--) + { + f *= 2; + if (man & (1 << i)) f = f + 1; + } + f = f * pow(2.0, exp - 25); + if (exp == 0) + { + f = f * pow(2.0, -13); // 5.96046447754e-8; + } + return sgn ? -f : f; } uint16_t float16::f32tof16(float f) const { - uint32_t t = *(uint32_t *) &f; - // man bits = 10; but we keep 11 for rounding - uint16_t man = (t & 0x007FFFFF) >> 12; - int16_t exp = (t & 0x7F800000) >> 23; - bool sgn = (t & 0x80000000); - - // handle 0 - if ((t & 0x7FFFFFFF) == 0) - { - return sgn ? 0x8000 : 0x0000; - } - // denormalized float32 does not fit in float16 - if (exp == 0x00) - { - return sgn ? 0x8000 : 0x0000; - } - // handle infinity & NAN - if (exp == 0x00FF) - { - if (man) return 0xFE00; // NAN - return sgn ? 0xFC00 : 0x7C00; // -INF : INF - } - - // normal numbers - exp = exp - 127 + 15; - // overflow does not fit => INF - if (exp > 30) - { - return sgn ? 0xFC00 : 0x7C00; // -INF : INF - } - // subnormal numbers - if (exp < -38) - { - return sgn ? 0x8000 : 0x0000; // -0 or 0 ? just 0 ? - } - if (exp <= 0) // subnormal - { - man >>= (exp + 14); - // rounding - man++; - man >>= 1; - if (sgn) return 0x8000 | man; - return man; - } - - // normal - // TODO rounding - exp <<= 10; + uint32_t t = *(uint32_t *) &f; + // man bits = 10; but we keep 11 for rounding + uint16_t man = (t & 0x007FFFFF) >> 12; + int16_t exp = (t & 0x7F800000) >> 23; + bool sgn = (t & 0x80000000); + + // handle 0 + if ((t & 0x7FFFFFFF) == 0) + { + return sgn ? 0x8000 : 0x0000; + } + // denormalized float32 does not fit in float16 + if (exp == 0x00) + { + return sgn ? 0x8000 : 0x0000; + } + // handle infinity & NAN + if (exp == 0x00FF) + { + if (man) return 0xFE00; // NAN + return sgn ? 0xFC00 : 0x7C00; // -INF : INF + } + + // normal numbers + exp = exp - 127 + 15; + // overflow does not fit => INF + if (exp > 30) + { + return sgn ? 0xFC00 : 0x7C00; // -INF : INF + } + // subnormal numbers + if (exp < -38) + { + return sgn ? 0x8000 : 0x0000; // -0 or 0 ? just 0 ? + } + if (exp <= 0) // subnormal + { + man >>= (exp + 14); + // rounding man++; man >>= 1; - if (sgn) return 0x8000 | exp | man; - return exp | man; + if (sgn) return 0x8000 | man; + return man; + } + + // normal + // TODO rounding + exp <<= 10; + man++; + man >>= 1; + if (sgn) return 0x8000 | exp | man; + return exp | man; } -// -- END OF FILE -- +// -- END OF FILE -- diff --git a/float16.h b/float16.h index 1bb3d89..fd30b52 100644 --- a/float16.h +++ b/float16.h @@ -2,80 +2,79 @@ // // FILE: float16.h // AUTHOR: Rob Tillaart -// VERSION: 0.1.7 +// VERSION: 0.1.8 // PURPOSE: Arduino library to implement float16 data type. -// half-precision floating point format, +// half-precision floating point format, // used for efficient storage and transport. // URL: https://github.com/RobTillaart/float16 -// #include "Arduino.h" -#define FLOAT16_LIB_VERSION (F("0.1.7")) +#define FLOAT16_LIB_VERSION (F("0.1.8")) class float16: public Printable { - public: - // Constructors - float16(void) { _value = 0x0000; }; - float16(double f); - float16(const float16 &f) { _value = f._value; }; - - // Conversion - double toDouble(void) const; - // access the 2 byte representation. - uint16_t getBinary() { return _value; }; - void setBinary(uint16_t u) { _value = u; }; - - // Printable - size_t printTo(Print& p) const; - void setDecimals(uint8_t d) { _decimals = d; }; - uint8_t getDecimals() { return _decimals; }; - - // equalities - bool operator == (const float16& f); - bool operator != (const float16& f); - - bool operator > (const float16& f); - bool operator >= (const float16& f); - bool operator < (const float16& f); - bool operator <= (const float16& f); - - // negation - float16 operator - (); - - // basic math - float16 operator + (const float16& f); - float16 operator - (const float16& f); - float16 operator * (const float16& f); - float16 operator / (const float16& f); - - float16& operator += (const float16& f); - float16& operator -= (const float16& f); - float16& operator *= (const float16& f); - float16& operator /= (const float16& f); - - // math helper functions - int sign(); // 1 = positive 0 = zero -1 = negative. - bool isZero(); -// bool isNaN(); - bool isInf(); - - - // CORE CONVERSION - // should be private but for testing... - float f16tof32(uint16_t) const; - uint16_t f32tof16(float) const; - - - private: - uint8_t _decimals = 4; - uint16_t _value; +public: + // Constructors + float16(void) { _value = 0x0000; }; + float16(double f); + float16(const float16 &f) { _value = f._value; }; + + // Conversion + double toDouble(void) const; + // access the 2 byte representation. + uint16_t getBinary() { return _value; }; + void setBinary(uint16_t u) { _value = u; }; + + // Printable + size_t printTo(Print& p) const; + void setDecimals(uint8_t d) { _decimals = d; }; + uint8_t getDecimals() { return _decimals; }; + + // equalities + bool operator == (const float16& f); + bool operator != (const float16& f); + + bool operator > (const float16& f); + bool operator >= (const float16& f); + bool operator < (const float16& f); + bool operator <= (const float16& f); + + // negation + float16 operator - (); + + // basic math + float16 operator + (const float16& f); + float16 operator - (const float16& f); + float16 operator * (const float16& f); + float16 operator / (const float16& f); + + float16& operator += (const float16& f); + float16& operator -= (const float16& f); + float16& operator *= (const float16& f); + float16& operator /= (const float16& f); + + // math helper functions + int sign(); // 1 = positive 0 = zero -1 = negative. + bool isZero(); + bool isNaN(); + bool isInf(); + + + // CORE CONVERSION + // should be private but for testing... + float f16tof32(uint16_t) const; + uint16_t f32tof16(float) const; + + +private: + uint8_t _decimals = 4; + uint16_t _value; }; -// -- END OF FILE -- +// -- END OF FILE -- diff --git a/keywords.txt b/keywords.txt index dc1662f..0d7579f 100644 --- a/keywords.txt +++ b/keywords.txt @@ -13,6 +13,10 @@ setBinary KEYWORD2 setDecimals KEYWORD2 getDecimals KEYWORD2 +sign KEYWORD2 +isZero KEYWORD2 +isNan KEYWORD2 +isInf KEYWORD2 # Constants (LITERAL1) FLOAT16_LIB_VERSION LITERAL1 diff --git a/library.json b/library.json index e1317eb..1e17842 100644 --- a/library.json +++ b/library.json @@ -15,9 +15,9 @@ "type": "git", "url": "https://github.com/RobTillaart/float16.git" }, - "version": "0.1.7", + "version": "0.1.8", "license": "MIT", - "frameworks": "arduino", + "frameworks": "*", "platforms": "*", "headers": "float16.h" } diff --git a/library.properties b/library.properties index c0e5cb5..ff3eee0 100644 --- a/library.properties +++ b/library.properties @@ -1,5 +1,5 @@ name=float16 -version=0.1.7 +version=0.1.8 author=Rob Tillaart maintainer=Rob Tillaart sentence=Arduino library to implement float16 data type.