tendermint/consensus/wal_test.go
Thane Thomson dff3deb2a9 cs: sync WAL more frequently (#3300)
As per #3043, this adds a ticker to sync the WAL every 2s while the WAL is running.

* Flush WAL every 2s

This adds a ticker that flushes the WAL every 2s while the WAL is
running. This is related to #3043.

* Fix spelling

* Increase timeout to 2mins for slower build environments

* Make WAL sync interval configurable

* Add TODO to replace testChan with more comprehensive testBus

* Remove extraneous debug statement

* Remove testChan in favour of using system time

As per
https://github.com/tendermint/tendermint/pull/3300#discussion_r255886586,
this removes the `testChan` WAL member and replaces the approach with a
system time-oriented one. In this new approach, we keep track of the
system time at which each flush and periodic flush successfully
occurred.

The naming of the various functions is also updated here to be more
consistent with "flushing" as opposed to "sync'ing".

* Update naming convention and ensure lock for timestamp update

* Add Flush method as part of WAL interface

Adds a `Flush` method as part of the WAL interface to enforce the idea
that we can manually trigger a WAL flush from outside of the WAL. This
is employed in the consensus state management to flush the WAL prior to
signing votes/proposals, as per https://github.com/tendermint/tendermint/issues/3043#issuecomment-453853630

* Update CHANGELOG_PENDING

* Remove mutex approach and replace with DI

The dependency injection approach to dealing with testing concerns could
allow similar effects to some kind of "testing bus"-based approach. This
commit introduces an example of this, where instead of relying on
(potentially fragile) timing of things between the code and the test, we
inject code into the function under test that can signal the test
through a channel.

This allows us to avoid the `time.Sleep()`-based approach previously
employed.

* Update comment on WAL flushing during vote signing

Co-Authored-By: thanethomson <connect@thanethomson.com>

* Simplify flush interval definition

Co-Authored-By: thanethomson <connect@thanethomson.com>

* Expand commentary on WAL disk flushing

Co-Authored-By: thanethomson <connect@thanethomson.com>

* Add broken test to illustrate WAL sync test problem

Removes test-related state (dependency injection code) from the WAL data
structure and adds test code to illustrate the problem with using
`WALGenerateNBlocks` and `wal.SearchForEndHeight` to test periodic
sync'ing.

* Fix test error messages

* Use WAL group buffer size to check for flush

A function is added to `libs/autofile/group.go#Group` in order to return
the size of the buffered data (i.e. data that has not yet been flushed
to disk). The test now checks that, prior to a `time.Sleep`, the group
buffer has data in it. After the `time.Sleep` (during which time the
periodic flush should have been called), the buffer should be empty.

* Remove config root dir removal from #3291

* Add godoc for NewWAL mentioning periodic sync
2019-02-20 09:45:18 +04:00

250 lines
6.3 KiB
Go

package consensus
import (
"bytes"
"crypto/rand"
"io/ioutil"
"os"
"path/filepath"
// "sync"
"testing"
"time"
"github.com/tendermint/tendermint/consensus/types"
"github.com/tendermint/tendermint/libs/autofile"
"github.com/tendermint/tendermint/libs/log"
tmtypes "github.com/tendermint/tendermint/types"
tmtime "github.com/tendermint/tendermint/types/time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
const (
walTestFlushInterval = time.Duration(100) * time.Millisecond
)
func TestWALTruncate(t *testing.T) {
walDir, err := ioutil.TempDir("", "wal")
require.NoError(t, err)
defer os.RemoveAll(walDir)
walFile := filepath.Join(walDir, "wal")
//this magic number 4K can truncate the content when RotateFile. defaultHeadSizeLimit(10M) is hard to simulate.
//this magic number 1 * time.Millisecond make RotateFile check frequently. defaultGroupCheckDuration(5s) is hard to simulate.
wal, err := NewWAL(walFile,
autofile.GroupHeadSizeLimit(4096),
autofile.GroupCheckDuration(1*time.Millisecond),
)
require.NoError(t, err)
wal.SetLogger(log.TestingLogger())
err = wal.Start()
require.NoError(t, err)
defer func() {
wal.Stop()
// wait for the wal to finish shutting down so we
// can safely remove the directory
wal.Wait()
}()
//60 block's size nearly 70K, greater than group's headBuf size(4096 * 10), when headBuf is full, truncate content will Flush to the file.
//at this time, RotateFile is called, truncate content exist in each file.
err = WALGenerateNBlocks(t, wal.Group(), 60)
require.NoError(t, err)
time.Sleep(1 * time.Millisecond) //wait groupCheckDuration, make sure RotateFile run
wal.Group().Flush()
h := int64(50)
gr, found, err := wal.SearchForEndHeight(h, &WALSearchOptions{})
assert.NoError(t, err, "expected not to err on height %d", h)
assert.True(t, found, "expected to find end height for %d", h)
assert.NotNil(t, gr)
defer gr.Close()
dec := NewWALDecoder(gr)
msg, err := dec.Decode()
assert.NoError(t, err, "expected to decode a message")
rs, ok := msg.Msg.(tmtypes.EventDataRoundState)
assert.True(t, ok, "expected message of type EventDataRoundState")
assert.Equal(t, rs.Height, h+1, "wrong height")
}
func TestWALEncoderDecoder(t *testing.T) {
now := tmtime.Now()
msgs := []TimedWALMessage{
{Time: now, Msg: EndHeightMessage{0}},
{Time: now, Msg: timeoutInfo{Duration: time.Second, Height: 1, Round: 1, Step: types.RoundStepPropose}},
}
b := new(bytes.Buffer)
for _, msg := range msgs {
b.Reset()
enc := NewWALEncoder(b)
err := enc.Encode(&msg)
require.NoError(t, err)
dec := NewWALDecoder(b)
decoded, err := dec.Decode()
require.NoError(t, err)
assert.Equal(t, msg.Time.UTC(), decoded.Time)
assert.Equal(t, msg.Msg, decoded.Msg)
}
}
func TestWALWritePanicsIfMsgIsTooBig(t *testing.T) {
walDir, err := ioutil.TempDir("", "wal")
require.NoError(t, err)
defer os.RemoveAll(walDir)
walFile := filepath.Join(walDir, "wal")
wal, err := NewWAL(walFile)
require.NoError(t, err)
err = wal.Start()
require.NoError(t, err)
defer func() {
wal.Stop()
// wait for the wal to finish shutting down so we
// can safely remove the directory
wal.Wait()
}()
assert.Panics(t, func() { wal.Write(make([]byte, maxMsgSizeBytes+1)) })
}
func TestWALSearchForEndHeight(t *testing.T) {
walBody, err := WALWithNBlocks(t, 6)
if err != nil {
t.Fatal(err)
}
walFile := tempWALWithData(walBody)
wal, err := NewWAL(walFile)
require.NoError(t, err)
wal.SetLogger(log.TestingLogger())
h := int64(3)
gr, found, err := wal.SearchForEndHeight(h, &WALSearchOptions{})
assert.NoError(t, err, "expected not to err on height %d", h)
assert.True(t, found, "expected to find end height for %d", h)
assert.NotNil(t, gr)
defer gr.Close()
dec := NewWALDecoder(gr)
msg, err := dec.Decode()
assert.NoError(t, err, "expected to decode a message")
rs, ok := msg.Msg.(tmtypes.EventDataRoundState)
assert.True(t, ok, "expected message of type EventDataRoundState")
assert.Equal(t, rs.Height, h+1, "wrong height")
}
func TestWALPeriodicSync(t *testing.T) {
walDir, err := ioutil.TempDir("", "wal")
require.NoError(t, err)
defer os.RemoveAll(walDir)
walFile := filepath.Join(walDir, "wal")
wal, err := NewWAL(walFile, autofile.GroupCheckDuration(1*time.Millisecond))
require.NoError(t, err)
wal.SetFlushInterval(walTestFlushInterval)
wal.SetLogger(log.TestingLogger())
require.NoError(t, wal.Start())
defer func() {
wal.Stop()
wal.Wait()
}()
err = WALGenerateNBlocks(t, wal.Group(), 5)
require.NoError(t, err)
// We should have data in the buffer now
assert.NotZero(t, wal.Group().Buffered())
time.Sleep(walTestFlushInterval + (10 * time.Millisecond))
// The data should have been flushed by the periodic sync
assert.Zero(t, wal.Group().Buffered())
h := int64(4)
gr, found, err := wal.SearchForEndHeight(h, &WALSearchOptions{})
assert.NoError(t, err, "expected not to err on height %d", h)
assert.True(t, found, "expected to find end height for %d", h)
assert.NotNil(t, gr)
if gr != nil {
gr.Close()
}
}
/*
var initOnce sync.Once
func registerInterfacesOnce() {
initOnce.Do(func() {
var _ = wire.RegisterInterface(
struct{ WALMessage }{},
wire.ConcreteType{[]byte{}, 0x10},
)
})
}
*/
func nBytes(n int) []byte {
buf := make([]byte, n)
n, _ = rand.Read(buf)
return buf[:n]
}
func benchmarkWalDecode(b *testing.B, n int) {
// registerInterfacesOnce()
buf := new(bytes.Buffer)
enc := NewWALEncoder(buf)
data := nBytes(n)
enc.Encode(&TimedWALMessage{Msg: data, Time: time.Now().Round(time.Second).UTC()})
encoded := buf.Bytes()
b.ResetTimer()
for i := 0; i < b.N; i++ {
buf.Reset()
buf.Write(encoded)
dec := NewWALDecoder(buf)
if _, err := dec.Decode(); err != nil {
b.Fatal(err)
}
}
b.ReportAllocs()
}
func BenchmarkWalDecode512B(b *testing.B) {
benchmarkWalDecode(b, 512)
}
func BenchmarkWalDecode10KB(b *testing.B) {
benchmarkWalDecode(b, 10*1024)
}
func BenchmarkWalDecode100KB(b *testing.B) {
benchmarkWalDecode(b, 100*1024)
}
func BenchmarkWalDecode1MB(b *testing.B) {
benchmarkWalDecode(b, 1024*1024)
}
func BenchmarkWalDecode10MB(b *testing.B) {
benchmarkWalDecode(b, 10*1024*1024)
}
func BenchmarkWalDecode100MB(b *testing.B) {
benchmarkWalDecode(b, 100*1024*1024)
}
func BenchmarkWalDecode1GB(b *testing.B) {
benchmarkWalDecode(b, 1024*1024*1024)
}