From c10a901eb4dd29ad3c872472f1e24527c33326c0 Mon Sep 17 00:00:00 2001 From: Rob Tillaart Date: Fri, 1 Nov 2024 14:06:13 +0100 Subject: [PATCH] 0.4.1 FastShiftOut --- libraries/FastShiftOut/CHANGELOG.md | 3 + .../FastShiftOut_scope_test.ino | 4 +- .../FastShiftOut_test/performance_0.4.0.txt | 38 ++++++++ .../FastShiftOut_test/performance_0.4.1.txt | 85 ++++++++++++++++++ libraries/FastShiftOut/FastShiftOut.cpp | 89 +++++++++++-------- libraries/FastShiftOut/FastShiftOut.h | 6 +- libraries/FastShiftOut/README.md | 27 +++--- libraries/FastShiftOut/library.json | 2 +- libraries/FastShiftOut/library.properties | 2 +- 9 files changed, 200 insertions(+), 56 deletions(-) create mode 100644 libraries/FastShiftOut/Examples/FastShiftOut_test/performance_0.4.1.txt diff --git a/libraries/FastShiftOut/CHANGELOG.md b/libraries/FastShiftOut/CHANGELOG.md index acff32b2..d5a588e5 100644 --- a/libraries/FastShiftOut/CHANGELOG.md +++ b/libraries/FastShiftOut/CHANGELOG.md @@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [0.4.1] - 2024-10-31 +- fix #17, add more optimizations, kudos to nt314p + ## [0.4.0] - 2024-09-03 - fix #15, loop unroll option, improving performance, kudos to nt314p - fixed bug in test program (see #15) diff --git a/libraries/FastShiftOut/Examples/FastShiftOut_scope_test/FastShiftOut_scope_test.ino b/libraries/FastShiftOut/Examples/FastShiftOut_scope_test/FastShiftOut_scope_test.ino index fa9a6893..511524a7 100644 --- a/libraries/FastShiftOut/Examples/FastShiftOut_scope_test/FastShiftOut_scope_test.ino +++ b/libraries/FastShiftOut/Examples/FastShiftOut_scope_test/FastShiftOut_scope_test.ino @@ -62,8 +62,8 @@ void loop() // shiftOut(12, 13, MSBFIRST, 0x55); FSO.write(0x55); - delayMicroseconds(100); + delayMicroseconds(50); } -// -- END OF FILE -- +// -- END OF FILE -- diff --git a/libraries/FastShiftOut/Examples/FastShiftOut_test/performance_0.4.0.txt b/libraries/FastShiftOut/Examples/FastShiftOut_test/performance_0.4.0.txt index 181d418a..b43ea8d8 100644 --- a/libraries/FastShiftOut/Examples/FastShiftOut_test/performance_0.4.0.txt +++ b/libraries/FastShiftOut/Examples/FastShiftOut_test/performance_0.4.0.txt @@ -43,3 +43,41 @@ println(3.14159265, 4): 629.96 done ... + +no loop unroll version + +Performance - time in us + write: 15.34 + write: 29.43 + Delta: 14.10 + +writeLSBFIRST: 14.34 +writeLSBFIRST: 28.42 + Delta: 14.09 + +writeMSBFIRST: 14.34 +writeMSBFIRST: 28.42 + Delta: 14.08 + +Standard shiftOut1: 89.85 +Standard shiftOut2: 179.44 + Delta: 89.60 + + write16: 29.31 + write16: 58.35 + Delta: 29.04 + + write24: 43.38 + write24: 86.51 + Delta: 43.13 + + write32: 57.47 + write32: 114.68 + Delta: 57.22 + + +Test print interface +println("Hello world"): 222.68 +println(1357): 262.60 +println(3.14159265, 4): 650.68 + diff --git a/libraries/FastShiftOut/Examples/FastShiftOut_test/performance_0.4.1.txt b/libraries/FastShiftOut/Examples/FastShiftOut_test/performance_0.4.1.txt new file mode 100644 index 00000000..7bd21c8d --- /dev/null +++ b/libraries/FastShiftOut/Examples/FastShiftOut_test/performance_0.4.1.txt @@ -0,0 +1,85 @@ +IDE: 1.8.19 +Board: UNO + +loop unrolled version + +FASTSHIFTOUT_LIB_VERSION: 0.4.1 + +Performance - time in us + write: 10.37 + write: 19.49 + Delta: 9.12 + +writeLSBFIRST: 9.37 +writeLSBFIRST: 18.49 + Delta: 9.12 + +writeMSBFIRST: 9.37 +writeMSBFIRST: 18.49 + Delta: 9.12 + +Standard shiftOut1: 89.85 +Standard shiftOut2: 179.45 + Delta: 89.60 + + write16: 19.37 + write16: 38.48 + Delta: 19.11 + + write24: 28.48 + write24: 56.72 + Delta: 28.23 + + write32: 37.60 + write32: 74.95 + Delta: 37.34 + + +Test print interface +println("Hello world"): 158.12 +println(1357): 232.80 +println(3.14159265, 4): 610.92 + + +done ... + + +no loop unroll version + +Performance - time in us + write: 14.08 + write: 26.91 + Delta: 12.83 + +writeLSBFIRST: 13.08 +writeLSBFIRST: 25.90 + Delta: 12.82 + +writeMSBFIRST: 13.08 +writeMSBFIRST: 25.90 + Delta: 12.82 + +Standard shiftOut1: 89.85 +Standard shiftOut2: 179.44 + Delta: 89.59 + + write16: 26.78 + write16: 53.32 + Delta: 26.54 + + write24: 39.62 + write24: 78.98 + Delta: 39.36 + + write32: 52.44 + write32: 104.62 + Delta: 52.18 + + +Test print interface +println("Hello world"): 206.32 +println(1357): 255.04 +println(3.14159265, 4): 640.52 + + +done ... diff --git a/libraries/FastShiftOut/FastShiftOut.cpp b/libraries/FastShiftOut/FastShiftOut.cpp index d8d61f3f..c97cc33e 100644 --- a/libraries/FastShiftOut/FastShiftOut.cpp +++ b/libraries/FastShiftOut/FastShiftOut.cpp @@ -1,7 +1,7 @@ // // FILE: FastShiftOut.cpp // AUTHOR: Rob Tillaart -// VERSION: 0.4.0 +// VERSION: 0.4.1 // PURPOSE: ShiftOut that implements the Print interface // DATE: 2013-08-22 // URL: https://github.com/RobTillaart/FastShiftOut @@ -170,8 +170,12 @@ size_t FastShiftOut::writeLSBFIRST(uint8_t data) uint8_t oldSREG = SREG; noInterrupts(); - if ((value & 0x01) == 0) *localDataOutRegister &= outmask2; - else *localDataOutRegister |= outmask1; + // See discussion #17 + uint8_t d0 = *localDataOutRegister & outmask2; // cache 0 + uint8_t d1 = d0 | outmask1; // cache 1 + + if ((value & 0x01) == 0) *localDataOutRegister = d0; + else *localDataOutRegister = d1; // *_clockRegister |= cbmask1; // *_clockRegister &= cbmask2; // following code is allowed as interrupts are disabled. @@ -180,44 +184,44 @@ size_t FastShiftOut::writeLSBFIRST(uint8_t data) *localClockRegister = r | cbmask1; // set one bit *localClockRegister = r; // reset bit - if ((value & 0x02) == 0) *localDataOutRegister &= outmask2; - else *localDataOutRegister |= outmask1; + if ((value & 0x02) == 0) *localDataOutRegister = d0; + else *localDataOutRegister = d1; r = *localClockRegister; *localClockRegister = r | cbmask1; // set one bit *localClockRegister = r; // reset it - if ((value & 0x04) == 0) *localDataOutRegister &= outmask2; - else *localDataOutRegister |= outmask1; + if ((value & 0x04) == 0) *localDataOutRegister = d0; + else *localDataOutRegister = d1; r = *localClockRegister; *localClockRegister = r | cbmask1; // set one bit *localClockRegister = r; // reset it - if ((value & 0x08) == 0) *localDataOutRegister &= outmask2; - else *localDataOutRegister |= outmask1; + if ((value & 0x08) == 0) *localDataOutRegister = d0; + else *localDataOutRegister = d1; r = *localClockRegister; *localClockRegister = r | cbmask1; // set one bit *localClockRegister = r; // reset it - if ((value & 0x10) == 0) *localDataOutRegister &= outmask2; - else *localDataOutRegister |= outmask1; + if ((value & 0x10) == 0) *localDataOutRegister = d0; + else *localDataOutRegister = d1; r = *localClockRegister; *localClockRegister = r | cbmask1; // set one bit *localClockRegister = r; // reset it - if ((value & 0x20) == 0) *localDataOutRegister &= outmask2; - else *localDataOutRegister |= outmask1; + if ((value & 0x20) == 0) *localDataOutRegister = d0; + else *localDataOutRegister = d1; r = *localClockRegister; *localClockRegister = r | cbmask1; // set one bit *localClockRegister = r; // reset it - if ((value & 0x40) == 0) *localDataOutRegister &= outmask2; - else *localDataOutRegister |= outmask1; + if ((value & 0x40) == 0) *localDataOutRegister = d0; + else *localDataOutRegister = d1; r = *localClockRegister; *localClockRegister = r | cbmask1; // set one bit *localClockRegister = r; // reset it - if ((value & 0x80) == 0) *localDataOutRegister &= outmask2; - else *localDataOutRegister |= outmask1; + if ((value & 0x80) == 0) *localDataOutRegister = d0; + else *localDataOutRegister = d1; r = *localClockRegister; *localClockRegister = r | cbmask1; // set one bit *localClockRegister = r; // reset it @@ -238,11 +242,14 @@ size_t FastShiftOut::writeLSBFIRST(uint8_t data) uint8_t oldSREG = SREG; noInterrupts(); + // See discussion #17 + uint8_t d0 = *localDataOutRegister & outmask2; // cache 0 + uint8_t d1 = d0 | outmask1; // cache 1 for (uint8_t m = 1; m > 0; m <<= 1) { // process one bit - if ((value & m) == 0) *localDataOutRegister &= outmask2; - else *localDataOutRegister |= outmask1; + if ((value & m) == 0) *localDataOutRegister = d0; + else *localDataOutRegister = d1; uint8_t r = *localClockRegister; *localClockRegister = r | cbmask1; // set one bit *localClockRegister = r; // reset it @@ -284,8 +291,12 @@ size_t FastShiftOut::writeMSBFIRST(uint8_t data) uint8_t oldSREG = SREG; noInterrupts(); - if ((value & 0x80) == 0) *localDataOutRegister &= outmask2; - else *localDataOutRegister |= outmask1; + // See discussion #17 + uint8_t d0 = *localDataOutRegister & outmask2; // cache 0 + uint8_t d1 = d0 | outmask1; // cache 1 + + if ((value & 0x80) == 0) *localDataOutRegister = d0; + else *localDataOutRegister = d1; // *localClockRegister |= cbmask1; // *localClockRegister &= cbmask2; // following code is allowed as interrupts are disabled. @@ -294,44 +305,44 @@ size_t FastShiftOut::writeMSBFIRST(uint8_t data) *localClockRegister = r | cbmask1; // set one bit *localClockRegister = r; // reset it - if ((value & 0x40) == 0) *localDataOutRegister &= outmask2; - else *localDataOutRegister |= outmask1; + if ((value & 0x40) == 0) *localDataOutRegister = d0; + else *localDataOutRegister = d1; r = *localClockRegister; *localClockRegister = r | cbmask1; // set one bit *localClockRegister = r; // reset it - if ((value & 0x20) == 0) *localDataOutRegister &= outmask2; - else *localDataOutRegister |= outmask1; + if ((value & 0x20) == 0) *localDataOutRegister = d0; + else *localDataOutRegister = d1; r = *localClockRegister; *localClockRegister = r | cbmask1; // set one bit *localClockRegister = r; // reset it - if ((value & 0x10) == 0) *localDataOutRegister &= outmask2; - else *localDataOutRegister |= outmask1; + if ((value & 0x10) == 0) *localDataOutRegister = d0; + else *localDataOutRegister = d1; r = *localClockRegister; *localClockRegister = r | cbmask1; // set one bit *localClockRegister = r; // reset it - if ((value & 0x08) == 0) *localDataOutRegister &= outmask2; - else *localDataOutRegister |= outmask1; + if ((value & 0x08) == 0) *localDataOutRegister = d0; + else *localDataOutRegister = d1; r = *localClockRegister; *localClockRegister = r | cbmask1; // set one bit *localClockRegister = r; // reset it - if ((value & 0x04) == 0) *localDataOutRegister &= outmask2; - else *localDataOutRegister |= outmask1; + if ((value & 0x04) == 0) *localDataOutRegister = d0; + else *localDataOutRegister = d1; r = *localClockRegister; *localClockRegister = r | cbmask1; // set one bit *localClockRegister = r; // reset it - if ((value & 0x02) == 0) *localDataOutRegister &= outmask2; - else *localDataOutRegister |= outmask1; + if ((value & 0x02) == 0) *localDataOutRegister = d0; + else *localDataOutRegister = d1; r = *localClockRegister; *localClockRegister = r | cbmask1; // set one bit *localClockRegister = r; // reset it - if ((value & 0x01) == 0) *localDataOutRegister &= outmask2; - else *localDataOutRegister |= outmask1; + if ((value & 0x01) == 0) *localDataOutRegister = d0; + else *localDataOutRegister = d1; r = *localClockRegister; *localClockRegister = r | cbmask1; // set one bit *localClockRegister = r; // reset it @@ -352,16 +363,20 @@ size_t FastShiftOut::writeMSBFIRST(uint8_t data) uint8_t oldSREG = SREG; noInterrupts(); + // See discussion #17 + uint8_t d0 = *localDataOutRegister & outmask2; // cache 0 + uint8_t d1 = d0 | outmask1; // cache 1 for (uint8_t m = 0x80; m > 0; m >>= 1) { // process one bit - if ((value & m) == 0) *localDataOutRegister &= outmask2; - else *localDataOutRegister |= outmask1; + if ((value & m) == 0) *localDataOutRegister = d0; + else *localDataOutRegister = d1; uint8_t r = *localClockRegister; *localClockRegister = r | cbmask1; // set one bit *localClockRegister = r; // reset it } + // restore interrupt state SREG = oldSREG; diff --git a/libraries/FastShiftOut/FastShiftOut.h b/libraries/FastShiftOut/FastShiftOut.h index 58816609..5d2e3b89 100644 --- a/libraries/FastShiftOut/FastShiftOut.h +++ b/libraries/FastShiftOut/FastShiftOut.h @@ -2,7 +2,7 @@ // // FILE: FastShiftOut.h // AUTHOR: Rob Tillaart -// VERSION: 0.4.0 +// VERSION: 0.4.1 // PURPOSE: shiftOut class that implements the Print interface // DATE: 2013-08-22 // URL: https://github.com/RobTillaart/FastShiftOut @@ -11,10 +11,10 @@ #include "Arduino.h" #include "Print.h" -#define FASTSHIFTOUT_LIB_VERSION (F("0.4.0")) +#define FASTSHIFTOUT_LIB_VERSION (F("0.4.1")) // uncomment next line to get SPEED OPTIMIZED CODE -#define FASTSHIFTOUT_AVR_LOOP_UNROLLED 1 +// #define FASTSHIFTOUT_AVR_LOOP_UNROLLED 1 class FastShiftOut : public Print diff --git a/libraries/FastShiftOut/README.md b/libraries/FastShiftOut/README.md index b22decbe..f7bf32e2 100644 --- a/libraries/FastShiftOut/README.md +++ b/libraries/FastShiftOut/README.md @@ -58,23 +58,25 @@ Numbers may vary depending on bit-order flag. Indicative time in microseconds, Arduino UNO, IDE 1.8.19, measured over 1000 calls. (delta between 2 calls and 1 call to eliminate overhead) -| function | 0.2.4 | 0.3.1 | 0.3.3 | 0.4.0 | 0.4.0L | -|:-------------------------|--------:|---------:|---------:|---------:|---------:| -| write() | 21.66 | 22.48 | 22.27 | 14.10 | 11.51 | -| writeLSBFIRST() | 22.94 | 23.37 | 22.25 | 14.09 | 11.50 | -| writeMSBFIRST() | 20.30 | 21.86 | 22.26 | 14.08 | 11.50 | -| reference shiftOut() | 89.74 | 89.74 | 89.59 | 89.60 | 89.60 | -| write16() | na | na | 45.39 | 29.06 | 23.89 | -| write24() | na | na | 67.66 | 43.12 | 35.40 | -| write32() | na | na | 89.91 | 57.22 | 46.90 | -| println("Hello world") | na | 328.92 | 328.92 | 222.68 | 189.20 | -| println(1357) | na | 313.56 | 311.60 | 262.60 | 247.12 | -| println(3.14159265, 4) | na | 717.36 | 716.04 | 650.68 | 629.96 | +| function | 0.2.4 | 0.3.1 | 0.3.3 | 0.4.0 | 0.4.0L | 0.4.1 | 0.4.1L | +|:-------------------------|--------:|---------:|---------:|---------:|---------:|---------:|---------:| +| write() | 21.66 | 22.48 | 22.27 | 14.10 | 11.51 | 12.83 | 9.12 | +| writeLSBFIRST() | 22.94 | 23.37 | 22.25 | 14.09 | 11.50 | 12.82 | 9.12 | +| writeMSBFIRST() | 20.30 | 21.86 | 22.26 | 14.08 | 11.50 | 12.82 | 9.12 | +| reference shiftOut() | 89.74 | 89.74 | 89.59 | 89.60 | 89.60 | 89.59 | 89.60 | +| write16() | na | na | 45.39 | 29.06 | 23.89 | 26.34 | 19.11 | +| write24() | na | na | 67.66 | 43.12 | 35.40 | 39.36 | 28.23 | +| write32() | na | na | 89.91 | 57.22 | 46.90 | 52.18 | 37.34 | +| println("Hello world") | na | 328.92 | 328.92 | 222.68 | 189.20 | 206.32 | 158.12 | +| println(1357) | na | 313.56 | 311.60 | 262.60 | 247.12 | 255.04 | 232.80 | +| println(3.14159265, 4) | na | 717.36 | 716.04 | 650.68 | 629.96 | 640.52 | 610.92 | - Note: 0.3.3 has improved the measurement, not the code sec. - Note: 0.3.3 numbers fixed when implementing 0.4.0. (error in test sketch). - Note: 0.4.0 measured with loop unroll flag disabled. - Note: 0.4.0L measured with loop unrolled flag enabled. +- Note: 0.4.1 / 0.4.1L idem. +- Note: Loop unrolled is (8046 - 7818 = 128) bytes larger in size. ### Related @@ -84,6 +86,7 @@ Indicative time in microseconds, Arduino UNO, IDE 1.8.19, measured over 1000 cal - https://github.com/RobTillaart/FastShiftOut - https://github.com/RobTillaart/ShiftInSlow - https://github.com/RobTillaart/ShiftOutSlow +- https://github.com/RobTillaart/SWSPI (experimental) ## Interface diff --git a/libraries/FastShiftOut/library.json b/libraries/FastShiftOut/library.json index 4d3775e5..5b31cf34 100644 --- a/libraries/FastShiftOut/library.json +++ b/libraries/FastShiftOut/library.json @@ -15,7 +15,7 @@ "type": "git", "url": "https://github.com/RobTillaart/FastShiftOut.git" }, - "version": "0.4.0", + "version": "0.4.1", "license": "MIT", "frameworks": "*", "platforms": "*", diff --git a/libraries/FastShiftOut/library.properties b/libraries/FastShiftOut/library.properties index 92b0073e..42748496 100644 --- a/libraries/FastShiftOut/library.properties +++ b/libraries/FastShiftOut/library.properties @@ -1,5 +1,5 @@ name=FastShiftOut -version=0.4.0 +version=0.4.1 author=Rob Tillaart maintainer=Rob Tillaart sentence=Arduino library for (AVR) optimized shiftOut - e.g. 74HC595