Skip to content

Commit de3c18e

Browse files
authored
Capture broadcasted messages for re-broadcasting even at panic (#627)
Fix a bug where if `Host.RequestBroadcast` panics the requested message is never added to the re-broadcast state. Fix an inconsistent behaviour where error at requested broadcasts via re-broadcast are returned instead of being silently logged. Enhance emulator to allow pluggable signign logic with two additional signing implementation: erroneous and panic. Implement tests that assert expected behaviour at signing error or panic. Fixes #236
1 parent 20c0b55 commit de3c18e

File tree

8 files changed

+205
-47
lines changed

8 files changed

+205
-47
lines changed

emulator/driver.go

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package emulator
33
import (
44
"bytes"
55
"context"
6+
"errors"
67
"testing"
78

89
"github.com/filecoin-project/go-f3/gpbft"
@@ -22,9 +23,9 @@ type Driver struct {
2223
}
2324

2425
// NewDriver instantiates a new Driver with the given GPBFT options. See
25-
// Driver.StartInstance.
26+
// Driver.RequireStartInstance.
2627
func NewDriver(t *testing.T, o ...gpbft.Option) *Driver {
27-
h := newHost(t)
28+
h := newHost(t, AdhocSigning())
2829
participant, err := gpbft.NewParticipant(h, o...)
2930
require.NoError(t, err)
3031
return &Driver{
@@ -34,23 +35,32 @@ func NewDriver(t *testing.T, o ...gpbft.Option) *Driver {
3435
}
3536
}
3637

38+
func (d *Driver) SetSigning(signing Signing) { d.host.Signing = signing }
39+
3740
// StartInstance sets the current instances and starts emulation for it by signalling the
3841
// start of instance to the emulated honest gpbft.Participant.
3942
//
4043
// See NewInstance.
41-
func (d *Driver) StartInstance(id uint64) {
42-
d.require.NoError(d.subject.StartInstanceAt(id, d.host.Time()))
44+
func (d *Driver) StartInstance(id uint64) error {
45+
if err := d.subject.StartInstanceAt(id, d.host.Time()); err != nil {
46+
return err
47+
}
4348
// Trigger alarm once based on the implicit assumption that go-f3 uses alarm to
4449
// kickstart an instance internally.
45-
d.RequireDeliverAlarm()
50+
if triggered, err := d.DeliverAlarm(); err != nil {
51+
return err
52+
} else if !triggered {
53+
return errors.New("alarm did not trigger at start")
54+
}
55+
return nil
4656
}
4757

4858
// AddInstance adds an instance to the list of instances known by the driver.
4959
func (d *Driver) AddInstance(instance *Instance) {
5060
d.require.NoError(d.host.addInstance(instance))
5161
}
5262

53-
func (d *Driver) deliverAlarm() (bool, error) {
63+
func (d *Driver) DeliverAlarm() (bool, error) {
5464
if d.host.maybeReceiveAlarm() {
5565
return true, d.subject.ReceiveAlarm()
5666
}
@@ -72,8 +82,8 @@ func (d *Driver) prepareMessage(partialMessage *gpbft.GMessage) *gpbft.GMessage
7282

7383
mb := instance.NewMessageBuilder(partialMessage.Vote, partialMessage.Justification, withValidTicket)
7484
mb.NetworkName = d.host.NetworkName()
75-
mb.SigningMarshaler = d.host.adhocSigning
76-
msg, err := mb.Build(context.Background(), d.host.adhocSigning, partialMessage.Sender)
85+
mb.SigningMarshaler = d.host
86+
msg, err := mb.Build(context.Background(), d.host, partialMessage.Sender)
7787
d.require.NoError(err)
7888
d.require.NotNil(msg)
7989
if !withValidTicket {

emulator/driver_assertions.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,14 @@ func (d *Driver) RequireErrOnDeliverMessage(message *gpbft.GMessage, err error,
1919
}
2020
}
2121

22+
// RequireStartInstance asserts that instance with the given ID is started. See
23+
// StartInstance.
24+
func (d *Driver) RequireStartInstance(id uint64) {
25+
d.require.NoError(d.StartInstance(id))
26+
}
27+
2228
func (d *Driver) RequireDeliverAlarm() {
23-
delivered, err := d.deliverAlarm()
29+
delivered, err := d.DeliverAlarm()
2430
d.require.NoError(err)
2531
d.require.True(delivered)
2632
}

emulator/host.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ const networkName = "emulator-net"
1515
var _ gpbft.Host = (*driverHost)(nil)
1616

1717
type driverHost struct {
18-
adhocSigning
18+
Signing
1919

2020
t *testing.T
2121
id gpbft.ActorID
@@ -25,10 +25,11 @@ type driverHost struct {
2525
chain map[uint64]*Instance
2626
}
2727

28-
func newHost(t *testing.T) *driverHost {
28+
func newHost(t *testing.T, signing Signing) *driverHost {
2929
return &driverHost{
30-
t: t,
31-
chain: make(map[uint64]*Instance),
30+
Signing: signing,
31+
t: t,
32+
chain: make(map[uint64]*Instance),
3233
}
3334
}
3435

emulator/instance.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,15 @@ type Instance struct {
2121
powerTable *gpbft.PowerTable
2222
beacon []byte
2323
decision *gpbft.Justification
24+
signing Signing
2425
}
2526

2627
// NewInstance instantiates a new Instance for emulation. If absent, the
2728
// constructor will implicitly generate any missing but required values such as
2829
// public keys Power Table CID, etc. for the given params. The given proposal
2930
// must contain at least one tipset.
3031
//
31-
// See Driver.StartInstance.
32+
// See Driver.RequireStartInstance.
3233
func NewInstance(t *testing.T, id uint64, powerEntries gpbft.PowerEntries, proposal ...gpbft.TipSet) *Instance {
3334
// UX of the gpbft API is pretty painful; encapsulate the pain of getting an
3435
// instance going here at the price of accepting partial data and implicitly
@@ -67,9 +68,11 @@ func NewInstance(t *testing.T, id uint64, powerEntries gpbft.PowerEntries, propo
6768
Commitments: [32]byte{},
6869
PowerTable: ptCid,
6970
},
71+
signing: AdhocSigning(),
7072
}
7173
}
7274

75+
func (i *Instance) SetSigning(signing Signing) { i.signing = signing }
7376
func (i *Instance) Proposal() gpbft.ECChain { return i.proposal }
7477
func (i *Instance) GetDecision() *gpbft.Justification { return i.decision }
7578
func (i *Instance) ID() uint64 { return i.id }
@@ -134,7 +137,7 @@ func (i *Instance) NewJustification(round uint64, step gpbft.Phase, vote gpbft.E
134137
}
135138

136139
func (i *Instance) NewJustificationWithPayload(payload gpbft.Payload, from ...gpbft.ActorID) *gpbft.Justification {
137-
msg := signing.MarshalPayloadForSigning(networkName, &payload)
140+
msg := i.signing.MarshalPayloadForSigning(networkName, &payload)
138141
qr := gpbft.QuorumResult{
139142
Signers: make([]int, len(from)),
140143
PubKeys: make([]gpbft.PubKey, len(from)),
@@ -144,13 +147,13 @@ func (i *Instance) NewJustificationWithPayload(payload gpbft.Payload, from ...gp
144147
index, found := i.powerTable.Lookup[actor]
145148
require.True(i.t, found)
146149
entry := i.powerTable.Entries[index]
147-
signature, err := signing.Sign(context.Background(), entry.PubKey, msg)
150+
signature, err := i.signing.Sign(context.Background(), entry.PubKey, msg)
148151
require.NoError(i.t, err)
149152
qr.Signatures[j] = signature
150153
qr.PubKeys[j] = entry.PubKey
151154
qr.Signers[j] = index
152155
}
153-
aggregate, err := signing.Aggregate(qr.PubKeys, qr.Signatures)
156+
aggregate, err := i.signing.Aggregate(qr.PubKeys, qr.Signatures)
154157
require.NoError(i.t, err)
155158
return &gpbft.Justification{
156159
Vote: payload,

emulator/signing.go

Lines changed: 47 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,30 @@ import (
1010
)
1111

1212
var (
13-
_ gpbft.Verifier = (*adhocSigning)(nil)
14-
_ gpbft.Signer = (*adhocSigning)(nil)
15-
_ gpbft.SigningMarshaler = (*adhocSigning)(nil)
16-
17-
signing adhocSigning
13+
_ Signing = (*adhocSigning)(nil)
14+
_ Signing = (*erroneousSigning)(nil)
15+
_ Signing = (*panicSigning)(nil)
1816
)
1917

20-
// adhocSigning marshals, signs and verifies messages on behalf of any given
18+
type Signing interface {
19+
gpbft.Verifier
20+
gpbft.Signer
21+
gpbft.SigningMarshaler
22+
}
23+
24+
// AdhocSigning marshals, signs and verifies messages on behalf of any given
2125
// public key but uniquely and deterministically so using crc32 hash function for
2226
// performance. This implementation is not secure nor collision resistant. A
2327
// typical Instance power table is small enough to make the risk of collisions
2428
// negligible.
29+
func AdhocSigning() Signing { return adhocSigning{} }
30+
31+
// ErroneousSigning returns an error for every Signing API that can return an error.
32+
func ErroneousSigning() Signing { return erroneousSigning{} }
33+
34+
// PanicSigning panics for every Signing API.
35+
func PanicSigning() Signing { return panicSigning{} }
36+
2537
type adhocSigning struct{}
2638

2739
func (s adhocSigning) Sign(_ context.Context, sender gpbft.PubKey, msg []byte) ([]byte, error) {
@@ -84,3 +96,32 @@ func (s adhocSigning) VerifyAggregate(payload, got []byte, signers []gpbft.PubKe
8496
func (s adhocSigning) MarshalPayloadForSigning(name gpbft.NetworkName, payload *gpbft.Payload) []byte {
8597
return payload.MarshalForSigning(name)
8698
}
99+
100+
type erroneousSigning struct{}
101+
102+
func (p erroneousSigning) Verify(gpbft.PubKey, []byte, []byte) error {
103+
return errors.New("err Verify")
104+
}
105+
106+
func (p erroneousSigning) VerifyAggregate([]byte, []byte, []gpbft.PubKey) error {
107+
return errors.New("err VerifyAggregate")
108+
}
109+
110+
func (p erroneousSigning) Aggregate([]gpbft.PubKey, [][]byte) ([]byte, error) {
111+
return nil, errors.New("err Aggregate")
112+
}
113+
func (p erroneousSigning) Sign(context.Context, gpbft.PubKey, []byte) ([]byte, error) {
114+
return nil, errors.New("err Sign")
115+
}
116+
117+
func (p erroneousSigning) MarshalPayloadForSigning(gpbft.NetworkName, *gpbft.Payload) []byte {
118+
return nil
119+
}
120+
121+
type panicSigning struct{}
122+
123+
func (p panicSigning) Verify(gpbft.PubKey, []byte, []byte) error { panic("π") }
124+
func (p panicSigning) VerifyAggregate([]byte, []byte, []gpbft.PubKey) error { panic("π") }
125+
func (p panicSigning) Aggregate([]gpbft.PubKey, [][]byte) ([]byte, error) { panic("π") }
126+
func (p panicSigning) Sign(context.Context, gpbft.PubKey, []byte) ([]byte, error) { panic("π") }
127+
func (p panicSigning) MarshalPayloadForSigning(gpbft.NetworkName, *gpbft.Payload) []byte { panic("π") }

gpbft/gpbft.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -872,11 +872,14 @@ func (i *instance) broadcast(round uint64, step Phase, value ECChain, createTick
872872
if createTicket {
873873
mb.BeaconForTicket = i.beacon
874874
}
875+
876+
// Capture the broadcast and metrics first. Because, otherwise the instance will
877+
// end up with partial re-broadcast messages if RequestBroadcast panics.
878+
i.broadcasted.record(mb)
879+
metrics.broadcastCounter.Add(context.TODO(), 1, metric.WithAttributes(attrPhase[p.Step]))
875880
if err := i.participant.host.RequestBroadcast(mb); err != nil {
876881
i.log("failed to request broadcast: %v", err)
877882
}
878-
i.broadcasted.record(mb)
879-
metrics.broadcastCounter.Add(context.TODO(), 1, metric.WithAttributes(attrPhase[p.Step]))
880883
}
881884

882885
// tryRebroadcast checks whether re-broadcast timeout has elapsed, and if so
@@ -957,10 +960,13 @@ func (i *instance) rebroadcast() error {
957960
mbs := i.broadcasted.messagesByRound[round]
958961
for _, mb := range mbs {
959962
if err := i.participant.host.RequestBroadcast(mb); err != nil {
960-
return err
963+
// Silently log the error and proceed. This is consistent with the behaviour of
964+
// instance for regular broadcasts.
965+
i.log("failed to request rebroadcast %s at round %d: %v", mb.Payload.Step, mb.Payload.Round, err)
966+
} else {
967+
i.log("rebroadcasting %s at round %d for value %s", mb.Payload.Step.String(), mb.Payload.Round, mb.Payload.Value)
968+
metrics.reBroadcastCounter.Add(context.TODO(), 1)
961969
}
962-
i.log("rebroadcasting %s at round %d for value %s", mb.Payload.Step.String(), mb.Payload.Round, mb.Payload.Value)
963-
metrics.reBroadcastCounter.Add(context.TODO(), 1)
964970
}
965971
}
966972
return nil

0 commit comments

Comments
 (0)