Skip to content

Commit

Permalink
0.4.1 FastShiftOut
Browse files Browse the repository at this point in the history
  • Loading branch information
RobTillaart committed Nov 1, 2024
1 parent 75cf304 commit c10a901
Show file tree
Hide file tree
Showing 9 changed files with 200 additions and 56 deletions.
3 changes: 3 additions & 0 deletions libraries/FastShiftOut/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).


## [0.4.1] - 2024-10-31
- fix #17, add more optimizations, kudos to nt314p

## [0.4.0] - 2024-09-03
- fix #15, loop unroll option, improving performance, kudos to nt314p
- fixed bug in test program (see #15)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ void loop()
// shiftOut(12, 13, MSBFIRST, 0x55);

FSO.write(0x55);
delayMicroseconds(100);
delayMicroseconds(50);
}


// -- END OF FILE --
// -- END OF FILE --
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,41 @@ println(3.14159265, 4): 629.96

done ...


no loop unroll version

Performance - time in us
write: 15.34
write: 29.43
Delta: 14.10

writeLSBFIRST: 14.34
writeLSBFIRST: 28.42
Delta: 14.09

writeMSBFIRST: 14.34
writeMSBFIRST: 28.42
Delta: 14.08

Standard shiftOut1: 89.85
Standard shiftOut2: 179.44
Delta: 89.60

write16: 29.31
write16: 58.35
Delta: 29.04

write24: 43.38
write24: 86.51
Delta: 43.13

write32: 57.47
write32: 114.68
Delta: 57.22


Test print interface
println("Hello world"): 222.68
println(1357): 262.60
println(3.14159265, 4): 650.68

Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
IDE: 1.8.19
Board: UNO

loop unrolled version

FASTSHIFTOUT_LIB_VERSION: 0.4.1

Performance - time in us
write: 10.37
write: 19.49
Delta: 9.12

writeLSBFIRST: 9.37
writeLSBFIRST: 18.49
Delta: 9.12

writeMSBFIRST: 9.37
writeMSBFIRST: 18.49
Delta: 9.12

Standard shiftOut1: 89.85
Standard shiftOut2: 179.45
Delta: 89.60

write16: 19.37
write16: 38.48
Delta: 19.11

write24: 28.48
write24: 56.72
Delta: 28.23

write32: 37.60
write32: 74.95
Delta: 37.34


Test print interface
println("Hello world"): 158.12
println(1357): 232.80
println(3.14159265, 4): 610.92


done ...


no loop unroll version

Performance - time in us
write: 14.08
write: 26.91
Delta: 12.83

writeLSBFIRST: 13.08
writeLSBFIRST: 25.90
Delta: 12.82

writeMSBFIRST: 13.08
writeMSBFIRST: 25.90
Delta: 12.82

Standard shiftOut1: 89.85
Standard shiftOut2: 179.44
Delta: 89.59

write16: 26.78
write16: 53.32
Delta: 26.54

write24: 39.62
write24: 78.98
Delta: 39.36

write32: 52.44
write32: 104.62
Delta: 52.18


Test print interface
println("Hello world"): 206.32
println(1357): 255.04
println(3.14159265, 4): 640.52


done ...
89 changes: 52 additions & 37 deletions libraries/FastShiftOut/FastShiftOut.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//
// FILE: FastShiftOut.cpp
// AUTHOR: Rob Tillaart
// VERSION: 0.4.0
// VERSION: 0.4.1
// PURPOSE: ShiftOut that implements the Print interface
// DATE: 2013-08-22
// URL: https://github.com/RobTillaart/FastShiftOut
Expand Down Expand Up @@ -170,8 +170,12 @@ size_t FastShiftOut::writeLSBFIRST(uint8_t data)
uint8_t oldSREG = SREG;
noInterrupts();

if ((value & 0x01) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
// See discussion #17
uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
uint8_t d1 = d0 | outmask1; // cache 1

if ((value & 0x01) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
// *_clockRegister |= cbmask1;
// *_clockRegister &= cbmask2;
// following code is allowed as interrupts are disabled.
Expand All @@ -180,44 +184,44 @@ size_t FastShiftOut::writeLSBFIRST(uint8_t data)
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset bit

if ((value & 0x02) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x02) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x04) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x04) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x08) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x08) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x10) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x10) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x20) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x20) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x40) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x40) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x80) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x80) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it
Expand All @@ -238,11 +242,14 @@ size_t FastShiftOut::writeLSBFIRST(uint8_t data)
uint8_t oldSREG = SREG;
noInterrupts();

// See discussion #17
uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
uint8_t d1 = d0 | outmask1; // cache 1
for (uint8_t m = 1; m > 0; m <<= 1)
{
// process one bit
if ((value & m) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & m) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
uint8_t r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it
Expand Down Expand Up @@ -284,8 +291,12 @@ size_t FastShiftOut::writeMSBFIRST(uint8_t data)
uint8_t oldSREG = SREG;
noInterrupts();

if ((value & 0x80) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
// See discussion #17
uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
uint8_t d1 = d0 | outmask1; // cache 1

if ((value & 0x80) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
// *localClockRegister |= cbmask1;
// *localClockRegister &= cbmask2;
// following code is allowed as interrupts are disabled.
Expand All @@ -294,44 +305,44 @@ size_t FastShiftOut::writeMSBFIRST(uint8_t data)
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x40) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x40) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x20) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x20) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x10) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x10) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x08) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x08) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x04) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x04) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x02) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x02) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it

if ((value & 0x01) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & 0x01) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it
Expand All @@ -352,16 +363,20 @@ size_t FastShiftOut::writeMSBFIRST(uint8_t data)
uint8_t oldSREG = SREG;
noInterrupts();

// See discussion #17
uint8_t d0 = *localDataOutRegister & outmask2; // cache 0
uint8_t d1 = d0 | outmask1; // cache 1
for (uint8_t m = 0x80; m > 0; m >>= 1)
{
// process one bit
if ((value & m) == 0) *localDataOutRegister &= outmask2;
else *localDataOutRegister |= outmask1;
if ((value & m) == 0) *localDataOutRegister = d0;
else *localDataOutRegister = d1;
uint8_t r = *localClockRegister;
*localClockRegister = r | cbmask1; // set one bit
*localClockRegister = r; // reset it
}


// restore interrupt state
SREG = oldSREG;

Expand Down
6 changes: 3 additions & 3 deletions libraries/FastShiftOut/FastShiftOut.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//
// FILE: FastShiftOut.h
// AUTHOR: Rob Tillaart
// VERSION: 0.4.0
// VERSION: 0.4.1
// PURPOSE: shiftOut class that implements the Print interface
// DATE: 2013-08-22
// URL: https://github.com/RobTillaart/FastShiftOut
Expand All @@ -11,10 +11,10 @@
#include "Arduino.h"
#include "Print.h"

#define FASTSHIFTOUT_LIB_VERSION (F("0.4.0"))
#define FASTSHIFTOUT_LIB_VERSION (F("0.4.1"))

// uncomment next line to get SPEED OPTIMIZED CODE
#define FASTSHIFTOUT_AVR_LOOP_UNROLLED 1
// #define FASTSHIFTOUT_AVR_LOOP_UNROLLED 1


class FastShiftOut : public Print
Expand Down
Loading

0 comments on commit c10a901

Please sign in to comment.