tendermint/consensus/wal_generator.go
Anca Zamfir 4d7cd8055b blockchain: Reorg reactor (#3561)
* go routines in blockchain reactor

* Added reference to the go routine diagram

* Initial commit

* cleanup

* Undo testing_logger change, committed by mistake

* Fix the test loggers

* pulled some fsm code into pool.go

* added pool tests

* changes to the design

added block requests under peer

moved the request trigger in the reactor poolRoutine, triggered now by a ticker

in general moved everything required for making block requests smarter in the poolRoutine

added a simple map of heights to keep track of what will need to be requested next

added a few more tests

* send errors to FSM in a different channel than blocks

send errors (RemovePeer) from switch on a different channel than the
one receiving blocks
renamed channels
added more pool tests

* more pool tests

* lint errors

* more tests

* more tests

* switch fast sync to new implementation

* fixed data race in tests

* cleanup

* finished fsm tests

* address golangci comments :)

* address golangci comments :)

* Added timeout on next block needed to advance

* updating docs and cleanup

* fix issue in test from previous cleanup

* cleanup

* Added termination scenarios, tests and more cleanup

* small fixes to adr, comments and cleanup

* Fix bug in sendRequest()

If we tried to send a request to a peer not present in the switch, a
missing continue statement caused the request to be blackholed in a peer
that was removed and never retried.

While this bug was manifesting, the reactor kept asking for other
blocks that would be stored and never consumed. Added the number of
unconsumed blocks in the math for requesting blocks ahead of current
processing height so eventually there will be no more blocks requested
until the already received ones are consumed.

* remove bpPeer's didTimeout field

* Use distinct err codes for peer timeout and FSM timeouts

* Don't allow peers to update with lower height

* review comments from Ethan and Zarko

* some cleanup, renaming, comments

* Move block execution in separate goroutine

* Remove pool's numPending

* review comments

* fix lint, remove old blockchain reactor and duplicates in fsm tests

* small reorg around peer after review comments

* add the reactor spec

* verify block only once

* review comments

* change to int for max number of pending requests

* cleanup and godoc

* Add configuration flag fast sync version

* golangci fixes

* fix config template

* move both reactor versions under blockchain

* cleanup, golint, renaming stuff

* updated documentation, fixed more golint warnings

* integrate with behavior package

* sync with master

* gofmt

* add changelog_pending entry

* move to improvments

* suggestion to changelog entry
2019-07-23 10:58:52 +02:00

207 lines
6.7 KiB
Go

package consensus
import (
"bufio"
"bytes"
"fmt"
"io"
"path/filepath"
"testing"
"time"
"github.com/pkg/errors"
"github.com/tendermint/tendermint/abci/example/kvstore"
cfg "github.com/tendermint/tendermint/config"
cmn "github.com/tendermint/tendermint/libs/common"
"github.com/tendermint/tendermint/libs/log"
"github.com/tendermint/tendermint/mock"
"github.com/tendermint/tendermint/privval"
"github.com/tendermint/tendermint/proxy"
sm "github.com/tendermint/tendermint/state"
"github.com/tendermint/tendermint/store"
"github.com/tendermint/tendermint/types"
"github.com/tendermint/tm-cmn/db"
)
// WALGenerateNBlocks generates a consensus WAL. It does this by spinning up a
// stripped down version of node (proxy app, event bus, consensus state) with a
// persistent kvstore application and special consensus wal instance
// (byteBufferWAL) and waits until numBlocks are created. If the node fails to produce given numBlocks, it returns an error.
func WALGenerateNBlocks(t *testing.T, wr io.Writer, numBlocks int) (err error) {
config := getConfig(t)
app := kvstore.NewPersistentKVStoreApplication(filepath.Join(config.DBDir(), "wal_generator"))
logger := log.TestingLogger().With("wal_generator", "wal_generator")
logger.Info("generating WAL (last height msg excluded)", "numBlocks", numBlocks)
/////////////////////////////////////////////////////////////////////////////
// COPY PASTE FROM node.go WITH A FEW MODIFICATIONS
// NOTE: we can't import node package because of circular dependency.
// NOTE: we don't do handshake so need to set state.Version.Consensus.App directly.
privValidatorKeyFile := config.PrivValidatorKeyFile()
privValidatorStateFile := config.PrivValidatorStateFile()
privValidator := privval.LoadOrGenFilePV(privValidatorKeyFile, privValidatorStateFile)
genDoc, err := types.GenesisDocFromFile(config.GenesisFile())
if err != nil {
return errors.Wrap(err, "failed to read genesis file")
}
blockStoreDB := db.NewMemDB()
stateDB := blockStoreDB
state, err := sm.MakeGenesisState(genDoc)
if err != nil {
return errors.Wrap(err, "failed to make genesis state")
}
state.Version.Consensus.App = kvstore.ProtocolVersion
sm.SaveState(stateDB, state)
blockStore := store.NewBlockStore(blockStoreDB)
proxyApp := proxy.NewAppConns(proxy.NewLocalClientCreator(app))
proxyApp.SetLogger(logger.With("module", "proxy"))
if err := proxyApp.Start(); err != nil {
return errors.Wrap(err, "failed to start proxy app connections")
}
defer proxyApp.Stop()
eventBus := types.NewEventBus()
eventBus.SetLogger(logger.With("module", "events"))
if err := eventBus.Start(); err != nil {
return errors.Wrap(err, "failed to start event bus")
}
defer eventBus.Stop()
mempool := mock.Mempool{}
evpool := sm.MockEvidencePool{}
blockExec := sm.NewBlockExecutor(stateDB, log.TestingLogger(), proxyApp.Consensus(), mempool, evpool)
consensusState := NewConsensusState(config.Consensus, state.Copy(), blockExec, blockStore, mempool, evpool)
consensusState.SetLogger(logger)
consensusState.SetEventBus(eventBus)
if privValidator != nil {
consensusState.SetPrivValidator(privValidator)
}
// END OF COPY PASTE
/////////////////////////////////////////////////////////////////////////////
// set consensus wal to buffered WAL, which will write all incoming msgs to buffer
numBlocksWritten := make(chan struct{})
wal := newByteBufferWAL(logger, NewWALEncoder(wr), int64(numBlocks), numBlocksWritten)
// see wal.go#103
wal.Write(EndHeightMessage{0})
consensusState.wal = wal
if err := consensusState.Start(); err != nil {
return errors.Wrap(err, "failed to start consensus state")
}
select {
case <-numBlocksWritten:
consensusState.Stop()
return nil
case <-time.After(1 * time.Minute):
consensusState.Stop()
return fmt.Errorf("waited too long for tendermint to produce %d blocks (grep logs for `wal_generator`)", numBlocks)
}
}
//WALWithNBlocks returns a WAL content with numBlocks.
func WALWithNBlocks(t *testing.T, numBlocks int) (data []byte, err error) {
var b bytes.Buffer
wr := bufio.NewWriter(&b)
if err := WALGenerateNBlocks(t, wr, numBlocks); err != nil {
return []byte{}, err
}
wr.Flush()
return b.Bytes(), nil
}
func randPort() int {
// returns between base and base + spread
base, spread := 20000, 20000
return base + cmn.RandIntn(spread)
}
func makeAddrs() (string, string, string) {
start := randPort()
return fmt.Sprintf("tcp://0.0.0.0:%d", start),
fmt.Sprintf("tcp://0.0.0.0:%d", start+1),
fmt.Sprintf("tcp://0.0.0.0:%d", start+2)
}
// getConfig returns a config for test cases
func getConfig(t *testing.T) *cfg.Config {
c := cfg.ResetTestRoot(t.Name())
// and we use random ports to run in parallel
tm, rpc, grpc := makeAddrs()
c.P2P.ListenAddress = tm
c.RPC.ListenAddress = rpc
c.RPC.GRPCListenAddress = grpc
return c
}
// byteBufferWAL is a WAL which writes all msgs to a byte buffer. Writing stops
// when the heightToStop is reached. Client will be notified via
// signalWhenStopsTo channel.
type byteBufferWAL struct {
enc *WALEncoder
stopped bool
heightToStop int64
signalWhenStopsTo chan<- struct{}
logger log.Logger
}
// needed for determinism
var fixedTime, _ = time.Parse(time.RFC3339, "2017-01-02T15:04:05Z")
func newByteBufferWAL(logger log.Logger, enc *WALEncoder, nBlocks int64, signalStop chan<- struct{}) *byteBufferWAL {
return &byteBufferWAL{
enc: enc,
heightToStop: nBlocks,
signalWhenStopsTo: signalStop,
logger: logger,
}
}
// Save writes message to the internal buffer except when heightToStop is
// reached, in which case it will signal the caller via signalWhenStopsTo and
// skip writing.
func (w *byteBufferWAL) Write(m WALMessage) {
if w.stopped {
w.logger.Debug("WAL already stopped. Not writing message", "msg", m)
return
}
if endMsg, ok := m.(EndHeightMessage); ok {
w.logger.Debug("WAL write end height message", "height", endMsg.Height, "stopHeight", w.heightToStop)
if endMsg.Height == w.heightToStop {
w.logger.Debug("Stopping WAL at height", "height", endMsg.Height)
w.signalWhenStopsTo <- struct{}{}
w.stopped = true
return
}
}
w.logger.Debug("WAL Write Message", "msg", m)
err := w.enc.Encode(&TimedWALMessage{fixedTime, m})
if err != nil {
panic(fmt.Sprintf("failed to encode the msg %v", m))
}
}
func (w *byteBufferWAL) WriteSync(m WALMessage) {
w.Write(m)
}
func (w *byteBufferWAL) FlushAndSync() error { return nil }
func (w *byteBufferWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) {
return nil, false, nil
}
func (w *byteBufferWAL) Start() error { return nil }
func (w *byteBufferWAL) Stop() error { return nil }
func (w *byteBufferWAL) Wait() {}