mirror of
https://github.com/fluencelabs/tendermint
synced 2025-04-24 22:32:15 +00:00
Earlier this week somebody posted this in GoS Riot chat: ``` E[2019-02-12|10:38:37.596] Corrupted entry. Skipping... module=consensus wal=/home/gaia/.gaiad/data/cs.wal/wal err="DataCorruptionError[length 878916964 exceeded maximum possible value of 1048576 bytes]" E[2019-02-12|10:38:37.596] Corrupted entry. Skipping... module=consensus wal=/home/gaia/.gaiad/data/cs.wal/wal err="DataCorruptionError[length 825701731 exceeded maximum possible value of 1048576 bytes]" E[2019-02-12|10:38:37.596] Corrupted entry. Skipping... module=consensus wal=/home/gaia/.gaiad/data/cs.wal/wal err="DataCorruptionError[length 1631073634 exceeded maximum possible value of 1048576 bytes]" E[2019-02-12|10:38:37.596] Corrupted entry. Skipping... module=consensus wal=/home/gaia/.gaiad/data/cs.wal/wal err="DataCorruptionError[length 912418148 exceeded maximum possible value of 1048576 bytes]" E[2019-02-12|10:38:37.600] Corrupted entry. Skipping... module=consensus wal=/home/gaia/.gaiad/data/cs.wal/wal err="DataCorruptionError[failed to read data: EOF]" E[2019-02-12|10:38:37.600] Error on catchup replay. Proceeding to start ConsensusState anyway module=consensus err="Cannot replay height 7242. WAL does not contain #ENDHEIGHT for 7241" E[2019-02-12|10:38:37.861] Error dialing peer module=p2p err="dial tcp 35.183.126.181:26656: i/o timeout ``` Note the length error messages. What has happened is the length field got corrupted probably. I've looked at the code and noticed that we don't check the msg size during encoding. This PR fixes that. It also improves a few error messages in WALDecoder.
208 lines
5.3 KiB
Go
208 lines
5.3 KiB
Go
package consensus
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto/rand"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"os"
|
|
"path/filepath"
|
|
|
|
// "sync"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/tendermint/tendermint/consensus/types"
|
|
"github.com/tendermint/tendermint/libs/autofile"
|
|
"github.com/tendermint/tendermint/libs/log"
|
|
tmtypes "github.com/tendermint/tendermint/types"
|
|
tmtime "github.com/tendermint/tendermint/types/time"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func TestWALTruncate(t *testing.T) {
|
|
walDir, err := ioutil.TempDir("", "wal")
|
|
require.NoError(t, err)
|
|
defer os.RemoveAll(walDir)
|
|
|
|
walFile := filepath.Join(walDir, "wal")
|
|
|
|
//this magic number 4K can truncate the content when RotateFile. defaultHeadSizeLimit(10M) is hard to simulate.
|
|
//this magic number 1 * time.Millisecond make RotateFile check frequently. defaultGroupCheckDuration(5s) is hard to simulate.
|
|
wal, err := NewWAL(walFile,
|
|
autofile.GroupHeadSizeLimit(4096),
|
|
autofile.GroupCheckDuration(1*time.Millisecond),
|
|
)
|
|
require.NoError(t, err)
|
|
wal.SetLogger(log.TestingLogger())
|
|
err = wal.Start()
|
|
require.NoError(t, err)
|
|
defer func() {
|
|
wal.Stop()
|
|
// wait for the wal to finish shutting down so we
|
|
// can safely remove the directory
|
|
wal.Wait()
|
|
}()
|
|
|
|
//60 block's size nearly 70K, greater than group's headBuf size(4096 * 10), when headBuf is full, truncate content will Flush to the file.
|
|
//at this time, RotateFile is called, truncate content exist in each file.
|
|
err = WALGenerateNBlocks(wal.Group(), 60)
|
|
require.NoError(t, err)
|
|
|
|
time.Sleep(1 * time.Millisecond) //wait groupCheckDuration, make sure RotateFile run
|
|
|
|
wal.Group().Flush()
|
|
|
|
h := int64(50)
|
|
gr, found, err := wal.SearchForEndHeight(h, &WALSearchOptions{})
|
|
assert.NoError(t, err, fmt.Sprintf("expected not to err on height %d", h))
|
|
assert.True(t, found, fmt.Sprintf("expected to find end height for %d", h))
|
|
assert.NotNil(t, gr, "expected group not to be nil")
|
|
defer gr.Close()
|
|
|
|
dec := NewWALDecoder(gr)
|
|
msg, err := dec.Decode()
|
|
assert.NoError(t, err, "expected to decode a message")
|
|
rs, ok := msg.Msg.(tmtypes.EventDataRoundState)
|
|
assert.True(t, ok, "expected message of type EventDataRoundState")
|
|
assert.Equal(t, rs.Height, h+1, fmt.Sprintf("wrong height"))
|
|
}
|
|
|
|
func TestWALEncoderDecoder(t *testing.T) {
|
|
now := tmtime.Now()
|
|
msgs := []TimedWALMessage{
|
|
{Time: now, Msg: EndHeightMessage{0}},
|
|
{Time: now, Msg: timeoutInfo{Duration: time.Second, Height: 1, Round: 1, Step: types.RoundStepPropose}},
|
|
}
|
|
|
|
b := new(bytes.Buffer)
|
|
|
|
for _, msg := range msgs {
|
|
b.Reset()
|
|
|
|
enc := NewWALEncoder(b)
|
|
err := enc.Encode(&msg)
|
|
require.NoError(t, err)
|
|
|
|
dec := NewWALDecoder(b)
|
|
decoded, err := dec.Decode()
|
|
require.NoError(t, err)
|
|
|
|
assert.Equal(t, msg.Time.UTC(), decoded.Time)
|
|
assert.Equal(t, msg.Msg, decoded.Msg)
|
|
}
|
|
}
|
|
|
|
func TestWALWritePanicsIfMsgIsTooBig(t *testing.T) {
|
|
walDir, err := ioutil.TempDir("", "wal")
|
|
require.NoError(t, err)
|
|
defer os.RemoveAll(walDir)
|
|
walFile := filepath.Join(walDir, "wal")
|
|
|
|
wal, err := NewWAL(walFile)
|
|
require.NoError(t, err)
|
|
err = wal.Start()
|
|
require.NoError(t, err)
|
|
defer func() {
|
|
wal.Stop()
|
|
// wait for the wal to finish shutting down so we
|
|
// can safely remove the directory
|
|
wal.Wait()
|
|
}()
|
|
|
|
assert.Panics(t, func() { wal.Write(make([]byte, maxMsgSizeBytes+1)) })
|
|
}
|
|
|
|
func TestWALSearchForEndHeight(t *testing.T) {
|
|
walBody, err := WALWithNBlocks(6)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
walFile := tempWALWithData(walBody)
|
|
|
|
wal, err := NewWAL(walFile)
|
|
require.NoError(t, err)
|
|
wal.SetLogger(log.TestingLogger())
|
|
|
|
h := int64(3)
|
|
gr, found, err := wal.SearchForEndHeight(h, &WALSearchOptions{})
|
|
assert.NoError(t, err, fmt.Sprintf("expected not to err on height %d", h))
|
|
assert.True(t, found, fmt.Sprintf("expected to find end height for %d", h))
|
|
assert.NotNil(t, gr, "expected group not to be nil")
|
|
defer gr.Close()
|
|
|
|
dec := NewWALDecoder(gr)
|
|
msg, err := dec.Decode()
|
|
assert.NoError(t, err, "expected to decode a message")
|
|
rs, ok := msg.Msg.(tmtypes.EventDataRoundState)
|
|
assert.True(t, ok, "expected message of type EventDataRoundState")
|
|
assert.Equal(t, rs.Height, h+1, fmt.Sprintf("wrong height"))
|
|
}
|
|
|
|
/*
|
|
var initOnce sync.Once
|
|
|
|
func registerInterfacesOnce() {
|
|
initOnce.Do(func() {
|
|
var _ = wire.RegisterInterface(
|
|
struct{ WALMessage }{},
|
|
wire.ConcreteType{[]byte{}, 0x10},
|
|
)
|
|
})
|
|
}
|
|
*/
|
|
|
|
func nBytes(n int) []byte {
|
|
buf := make([]byte, n)
|
|
n, _ = rand.Read(buf)
|
|
return buf[:n]
|
|
}
|
|
|
|
func benchmarkWalDecode(b *testing.B, n int) {
|
|
// registerInterfacesOnce()
|
|
|
|
buf := new(bytes.Buffer)
|
|
enc := NewWALEncoder(buf)
|
|
|
|
data := nBytes(n)
|
|
enc.Encode(&TimedWALMessage{Msg: data, Time: time.Now().Round(time.Second).UTC()})
|
|
|
|
encoded := buf.Bytes()
|
|
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
buf.Reset()
|
|
buf.Write(encoded)
|
|
dec := NewWALDecoder(buf)
|
|
if _, err := dec.Decode(); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
b.ReportAllocs()
|
|
}
|
|
|
|
func BenchmarkWalDecode512B(b *testing.B) {
|
|
benchmarkWalDecode(b, 512)
|
|
}
|
|
|
|
func BenchmarkWalDecode10KB(b *testing.B) {
|
|
benchmarkWalDecode(b, 10*1024)
|
|
}
|
|
func BenchmarkWalDecode100KB(b *testing.B) {
|
|
benchmarkWalDecode(b, 100*1024)
|
|
}
|
|
func BenchmarkWalDecode1MB(b *testing.B) {
|
|
benchmarkWalDecode(b, 1024*1024)
|
|
}
|
|
func BenchmarkWalDecode10MB(b *testing.B) {
|
|
benchmarkWalDecode(b, 10*1024*1024)
|
|
}
|
|
func BenchmarkWalDecode100MB(b *testing.B) {
|
|
benchmarkWalDecode(b, 100*1024*1024)
|
|
}
|
|
func BenchmarkWalDecode1GB(b *testing.B) {
|
|
benchmarkWalDecode(b, 1024*1024*1024)
|
|
}
|