mirror of
https://github.com/fluencelabs/tendermint
synced 2025-04-25 06:42:16 +00:00
stop gracefully instead of trying to resume ops
Refs #2072 We most probably shouldn't be running any further when there is some unexpected panic. Some unknown error happened, and so we don't know if that will result in the validator signing an invalid thing. It might be worthwhile to explore a mechanism for manual resuming via some console or secure RPC system, but for now, halting the chain upon unexpected consensus bugs sounds like the better option.
This commit is contained in:
parent
b82138b002
commit
d09a3a6d3a
@ -553,10 +553,30 @@ func (cs *ConsensusState) newStep() {
|
||||
// Updates (state transitions) happen on timeouts, complete proposals, and 2/3 majorities.
|
||||
// ConsensusState must be locked before any internal state is updated.
|
||||
func (cs *ConsensusState) receiveRoutine(maxSteps int) {
|
||||
onExit := func(cs *ConsensusState) {
|
||||
// NOTE: the internalMsgQueue may have signed messages from our
|
||||
// priv_val that haven't hit the WAL, but its ok because
|
||||
// priv_val tracks LastSig
|
||||
|
||||
// close wal now that we're done writing to it
|
||||
cs.wal.Stop()
|
||||
cs.wal.Wait()
|
||||
|
||||
close(cs.done)
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
cs.Logger.Error("CONSENSUS FAILURE!!!", "err", r, "stack", string(debug.Stack()))
|
||||
go cs.receiveRoutine(0)
|
||||
// stop gracefully
|
||||
//
|
||||
// NOTE: We most probably shouldn't be running any further when there is
|
||||
// some unexpected panic. Some unknown error happened, and so we don't
|
||||
// know if that will result in the validator signing an invalid thing. It
|
||||
// might be worthwhile to explore a mechanism for manual resuming via
|
||||
// some console or secure RPC system, but for now, halting the chain upon
|
||||
// unexpected consensus bugs sounds like the better option.
|
||||
onExit(cs)
|
||||
}
|
||||
}()
|
||||
|
||||
@ -589,15 +609,7 @@ func (cs *ConsensusState) receiveRoutine(maxSteps int) {
|
||||
// go to the next step
|
||||
cs.handleTimeout(ti, rs)
|
||||
case <-cs.Quit():
|
||||
// NOTE: the internalMsgQueue may have signed messages from our
|
||||
// priv_val that haven't hit the WAL, but its ok because
|
||||
// priv_val tracks LastSig
|
||||
|
||||
// close wal now that we're done writing to it
|
||||
cs.wal.Stop()
|
||||
cs.wal.Wait()
|
||||
|
||||
close(cs.done)
|
||||
onExit(cs)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user