diff --git a/consensus/replay.go b/consensus/replay.go index ac9b9910..16d12e34 100644 --- a/consensus/replay.go +++ b/consensus/replay.go @@ -2,7 +2,6 @@ package consensus import ( "bytes" - "errors" "fmt" "hash/crc32" "io" @@ -95,10 +94,13 @@ func (cs *ConsensusState) catchupReplay(csHeight int64) error { cs.replayMode = true defer func() { cs.replayMode = false }() - // Ensure that ENDHEIGHT for this height doesn't exist - // NOTE: This is just a sanity check. As far as we know things work fine without it, - // and Handshake could reuse ConsensusState if it weren't for this check (since we can crash after writing ENDHEIGHT). - gr, found, err := cs.wal.SearchForEndHeight(csHeight) + // Ensure that ENDHEIGHT for this height doesn't exist. + // NOTE: This is just a sanity check. As far as we know things work fine + // without it, and Handshake could reuse ConsensusState if it weren't for + // this check (since we can crash after writing ENDHEIGHT). + // + // Ignore data corruption errors since this is a sanity check. + gr, found, err := cs.wal.SearchForEndHeight(csHeight, &WALSearchOptions{IgnoreDataCorruptionErrors: true}) if err != nil { return err } @@ -112,14 +114,16 @@ func (cs *ConsensusState) catchupReplay(csHeight int64) error { } // Search for last height marker - gr, found, err = cs.wal.SearchForEndHeight(csHeight - 1) + // + // Ignore data corruption errors in previous heights because we only care about last height + gr, found, err = cs.wal.SearchForEndHeight(csHeight-1, &WALSearchOptions{IgnoreDataCorruptionErrors: true}) if err == io.EOF { cs.Logger.Error("Replay: wal.group.Search returned EOF", "#ENDHEIGHT", csHeight-1) } else if err != nil { return err } if !found { - return errors.New(cmn.Fmt("Cannot replay height %d. WAL does not contain #ENDHEIGHT for %d.", csHeight, csHeight-1)) + return fmt.Errorf("Cannot replay height %d. WAL does not contain #ENDHEIGHT for %d.", csHeight, csHeight-1) } defer gr.Close() // nolint: errcheck @@ -132,9 +136,13 @@ func (cs *ConsensusState) catchupReplay(csHeight int64) error { msg, err = dec.Decode() if err == io.EOF { break + } else if IsDataCorruptionError(err) { + cs.Logger.Debug("data has been corrupted in last height of consensus WAL", "err", err, "height", csHeight) + panic(fmt.Sprintf("data has been corrupted (%v) in last height %d of consensus WAL", err, csHeight)) } else if err != nil { return err } + // NOTE: since the priv key is set when the msgs are received // it will attempt to eg double sign but we can just ignore it // since the votes will be replayed and we'll get to the next step @@ -202,7 +210,7 @@ func (h *Handshaker) Handshake(proxyApp proxy.AppConns) error { // handshake is done via info request on the query conn res, err := proxyApp.Query().InfoSync(abci.RequestInfo{version.Version}) if err != nil { - return errors.New(cmn.Fmt("Error calling Info: %v", err)) + return fmt.Errorf("Error calling Info: %v", err) } blockHeight := int64(res.LastBlockHeight) @@ -218,7 +226,7 @@ func (h *Handshaker) Handshake(proxyApp proxy.AppConns) error { // replay blocks up to the latest in the blockstore _, err = h.ReplayBlocks(appHash, blockHeight, proxyApp) if err != nil { - return errors.New(cmn.Fmt("Error on replay: %v", err)) + return fmt.Errorf("Error on replay: %v", err) } h.logger.Info("Completed ABCI Handshake - Tendermint and App are synced", "appHeight", blockHeight, "appHash", fmt.Sprintf("%X", appHash)) @@ -358,7 +366,7 @@ func (h *Handshaker) replayBlock(height int64, proxyApp proxy.AppConnConsensus) func (h *Handshaker) checkAppHash(appHash []byte) error { if !bytes.Equal(h.state.AppHash, appHash) { - panic(errors.New(cmn.Fmt("Tendermint state.AppHash does not match AppHash after replay. Got %X, expected %X", appHash, h.state.AppHash)).Error()) + panic(fmt.Errorf("Tendermint state.AppHash does not match AppHash after replay. Got %X, expected %X", appHash, h.state.AppHash).Error()) } return nil } diff --git a/consensus/replay_test.go b/consensus/replay_test.go index 7a06d96f..7d02ecd2 100644 --- a/consensus/replay_test.go +++ b/consensus/replay_test.go @@ -246,8 +246,8 @@ func (w *crashingWAL) Save(m WALMessage) { } func (w *crashingWAL) Group() *auto.Group { return w.next.Group() } -func (w *crashingWAL) SearchForEndHeight(height int64) (gr *auto.GroupReader, found bool, err error) { - return w.next.SearchForEndHeight(height) +func (w *crashingWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (gr *auto.GroupReader, found bool, err error) { + return w.next.SearchForEndHeight(height, options) } func (w *crashingWAL) Start() error { return w.next.Start() } @@ -478,7 +478,7 @@ func buildTMStateFromChain(config *cfg.Config, state *sm.State, chain []*types.B func makeBlockchainFromWAL(wal WAL) ([]*types.Block, []*types.Commit, error) { // Search for height marker - gr, found, err := wal.SearchForEndHeight(0) + gr, found, err := wal.SearchForEndHeight(0, &WALSearchOptions{}) if err != nil { return nil, nil, err } diff --git a/consensus/wal.go b/consensus/wal.go index 5ac605d0..7dc8d2e8 100644 --- a/consensus/wal.go +++ b/consensus/wal.go @@ -17,6 +17,11 @@ import ( cmn "github.com/tendermint/tmlibs/common" ) +const ( + // must be greater than params.BlockGossipParams.BlockPartSizeBytes + a few bytes + maxMsgSizeBytes = 1024 * 1024 // 1MB +) + //-------------------------------------------------------- // types and functions for savings consensus messages @@ -48,7 +53,7 @@ var _ = wire.RegisterInterface( type WAL interface { Save(WALMessage) Group() *auto.Group - SearchForEndHeight(height int64) (gr *auto.GroupReader, found bool, err error) + SearchForEndHeight(height int64, options *WALSearchOptions) (gr *auto.GroupReader, found bool, err error) Start() error Stop() error @@ -133,12 +138,18 @@ func (wal *baseWAL) Save(msg WALMessage) { } } +// WALSearchOptions are optional arguments to SearchForEndHeight. +type WALSearchOptions struct { + // IgnoreDataCorruptionErrors set to true will result in skipping data corruption errors. + IgnoreDataCorruptionErrors bool +} + // SearchForEndHeight searches for the EndHeightMessage with the height and // returns an auto.GroupReader, whenever it was found or not and an error. // Group reader will be nil if found equals false. // // CONTRACT: caller must close group reader. -func (wal *baseWAL) SearchForEndHeight(height int64) (gr *auto.GroupReader, found bool, err error) { +func (wal *baseWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (gr *auto.GroupReader, found bool, err error) { var msg *TimedWALMessage // NOTE: starting from the last file in the group because we're usually @@ -158,7 +169,9 @@ func (wal *baseWAL) SearchForEndHeight(height int64) (gr *auto.GroupReader, foun // check next file break } - if err != nil { + if options.IgnoreDataCorruptionErrors && IsDataCorruptionError(err) { + // do nothing + } else if err != nil { gr.Close() return nil, false, err } @@ -210,6 +223,25 @@ func (enc *WALEncoder) Encode(v *TimedWALMessage) error { /////////////////////////////////////////////////////////////////////////////// +// IsDataCorruptionError returns true if data has been corrupted inside WAL. +func IsDataCorruptionError(err error) bool { + _, ok := err.(DataCorruptionError) + return ok +} + +// DataCorruptionError is an error that occures if data on disk was corrupted. +type DataCorruptionError struct { + cause error +} + +func (e DataCorruptionError) Error() string { + return fmt.Sprintf("DataCorruptionError[%v]", e.cause) +} + +func (e DataCorruptionError) Cause() error { + return e.cause +} + // A WALDecoder reads and decodes custom-encoded WAL messages from an input // stream. See WALEncoder for the format used. // @@ -228,7 +260,7 @@ func NewWALDecoder(rd io.Reader) *WALDecoder { func (dec *WALDecoder) Decode() (*TimedWALMessage, error) { b := make([]byte, 4) - n, err := dec.rd.Read(b) + _, err := dec.rd.Read(b) if err == io.EOF { return nil, err } @@ -238,7 +270,7 @@ func (dec *WALDecoder) Decode() (*TimedWALMessage, error) { crc := binary.BigEndian.Uint32(b) b = make([]byte, 4) - n, err = dec.rd.Read(b) + _, err = dec.rd.Read(b) if err == io.EOF { return nil, err } @@ -247,26 +279,30 @@ func (dec *WALDecoder) Decode() (*TimedWALMessage, error) { } length := binary.BigEndian.Uint32(b) + if length > maxMsgSizeBytes { + return nil, DataCorruptionError{fmt.Errorf("length %d exceeded maximum possible value of %d bytes", length, maxMsgSizeBytes)} + } + data := make([]byte, length) - n, err = dec.rd.Read(data) + _, err = dec.rd.Read(data) if err == io.EOF { return nil, err } if err != nil { - return nil, fmt.Errorf("not enough bytes for data: %v (want: %d, read: %v)", err, length, n) + return nil, fmt.Errorf("failed to read data: %v", err) } // check checksum before decoding data actualCRC := crc32.Checksum(data, crc32c) if actualCRC != crc { - return nil, fmt.Errorf("checksums do not match: (read: %v, actual: %v)", crc, actualCRC) + return nil, DataCorruptionError{fmt.Errorf("checksums do not match: (read: %v, actual: %v)", crc, actualCRC)} } var nn int var res *TimedWALMessage // nolint: gosimple res = wire.ReadBinary(&TimedWALMessage{}, bytes.NewBuffer(data), int(length), &nn, &err).(*TimedWALMessage) if err != nil { - return nil, fmt.Errorf("failed to decode data: %v", err) + return nil, DataCorruptionError{fmt.Errorf("failed to decode data: %v", err)} } return res, err @@ -276,7 +312,7 @@ type nilWAL struct{} func (nilWAL) Save(m WALMessage) {} func (nilWAL) Group() *auto.Group { return nil } -func (nilWAL) SearchForEndHeight(height int64) (gr *auto.GroupReader, found bool, err error) { +func (nilWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (gr *auto.GroupReader, found bool, err error) { return nil, false, nil } func (nilWAL) Start() error { return nil } diff --git a/consensus/wal_fuzz.go b/consensus/wal_fuzz.go new file mode 100644 index 00000000..e15097c3 --- /dev/null +++ b/consensus/wal_fuzz.go @@ -0,0 +1,31 @@ +// +build gofuzz + +package consensus + +import ( + "bytes" + "io" +) + +func Fuzz(data []byte) int { + dec := NewWALDecoder(bytes.NewReader(data)) + for { + msg, err := dec.Decode() + if err == io.EOF { + break + } + if err != nil { + if msg != nil { + panic("msg != nil on error") + } + return 0 + } + var w bytes.Buffer + enc := NewWALEncoder(&w) + err = enc.Encode(msg) + if err != nil { + panic(err) + } + } + return 1 +} diff --git a/consensus/wal_generator.go b/consensus/wal_generator.go index 157d6d40..b1ea268e 100644 --- a/consensus/wal_generator.go +++ b/consensus/wal_generator.go @@ -180,7 +180,7 @@ func (w *byteBufferWAL) Save(m WALMessage) { func (w *byteBufferWAL) Group() *auto.Group { panic("not implemented") } -func (w *byteBufferWAL) SearchForEndHeight(height int64) (gr *auto.GroupReader, found bool, err error) { +func (w *byteBufferWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (gr *auto.GroupReader, found bool, err error) { return nil, false, nil } diff --git a/consensus/wal_test.go b/consensus/wal_test.go index 8ec1a7c2..c7f08739 100644 --- a/consensus/wal_test.go +++ b/consensus/wal_test.go @@ -54,7 +54,7 @@ func TestSearchForEndHeight(t *testing.T) { } h := int64(3) - gr, found, err := wal.SearchForEndHeight(h) + gr, found, err := wal.SearchForEndHeight(h, &WALSearchOptions{}) assert.NoError(t, err, cmn.Fmt("expected not to err on height %d", h)) assert.True(t, found, cmn.Fmt("expected to find end height for %d", h)) assert.NotNil(t, gr, "expected group not to be nil") diff --git a/docs/specification/corruption.rst b/docs/specification/corruption.rst new file mode 100644 index 00000000..33e5ba0b --- /dev/null +++ b/docs/specification/corruption.rst @@ -0,0 +1,69 @@ +Corruption +========== + +Important step +-------------- + +Make sure you have a backup of the Tendermint data directory. + +Possible causes +--------------- + +Remember that most corruption is caused by hardware issues: + +- RAID controllers with faulty / worn out battery backup, and an unexpected power loss +- Hard disk drives with write-back cache enabled, and an unexpected power loss +- Cheap SSDs with insufficient power-loss protection, and an unexpected power-loss +- Defective RAM +- Defective or overheating CPU(s) + +Other causes can be: + +- Database systems configured with fsync=off and an OS crash or power loss +- Filesystems configured to use write barriers plus a storage layer that ignores write barriers. LVM is a particular culprit. +- Tendermint bugs +- Operating system bugs +- Admin error + - directly modifying Tendermint data-directory contents + +(Source: https://wiki.postgresql.org/wiki/Corruption) + +WAL Corruption +-------------- + +If consensus WAL is corrupted at the lastest height and you are trying to start +Tendermint, replay will fail with panic. + +Recovering from data corruption can be hard and time-consuming. Here are two approaches you can take: + +1) Delete the WAL file and restart Tendermint. It will attempt to sync with other peers. +2) Try to repair the WAL file manually: + 1. Create a backup of the corrupted WAL file: + + .. code:: bash + + cp "$TMHOME/data/cs.wal/wal" > /tmp/corrupted_wal_backup + + 2. Use ./scripts/wal2json to create a human-readable version + + .. code:: bash + + ./scripts/wal2json/wal2json "$TMHOME/data/cs.wal/wal" > /tmp/corrupted_wal + + 3. Search for a "CORRUPTED MESSAGE" line. + 4. By looking at the previous message and the message after the corrupted one + and looking at the logs, try to rebuild the message. If the consequent + messages are marked as corrupted too (this may happen if length header + got corrupted or some writes did not make it to the WAL ~ truncation), + then remove all the lines starting from the corrupted one and restart + Tendermint. + + .. code:: bash + + $EDITOR /tmp/corrupted_wal + + 5. After editing, convert this file back into binary form by running: + + .. code:: bash + + ./scripts/json2wal/json2wal /tmp/corrupted_wal > "$TMHOME/data/cs.wal/wal"