2018-06-20 17:35:30 -07:00
|
|
|
package consensus
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"bytes"
|
|
|
|
"fmt"
|
2018-09-25 19:22:45 +08:00
|
|
|
"io"
|
2018-06-20 17:35:30 -07:00
|
|
|
"path/filepath"
|
2019-02-18 08:45:27 +01:00
|
|
|
"testing"
|
2018-06-20 17:35:30 -07:00
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/pkg/errors"
|
2019-05-04 10:41:31 +04:00
|
|
|
|
2018-06-22 06:59:02 +02:00
|
|
|
"github.com/tendermint/tendermint/abci/example/kvstore"
|
2018-06-20 17:35:30 -07:00
|
|
|
cfg "github.com/tendermint/tendermint/config"
|
2018-07-01 22:36:49 -04:00
|
|
|
cmn "github.com/tendermint/tendermint/libs/common"
|
|
|
|
"github.com/tendermint/tendermint/libs/log"
|
2019-05-04 10:41:31 +04:00
|
|
|
"github.com/tendermint/tendermint/mock"
|
2018-08-10 00:25:57 -05:00
|
|
|
"github.com/tendermint/tendermint/privval"
|
|
|
|
"github.com/tendermint/tendermint/proxy"
|
|
|
|
sm "github.com/tendermint/tendermint/state"
|
blockchain: Reorg reactor (#3561)
* go routines in blockchain reactor
* Added reference to the go routine diagram
* Initial commit
* cleanup
* Undo testing_logger change, committed by mistake
* Fix the test loggers
* pulled some fsm code into pool.go
* added pool tests
* changes to the design
added block requests under peer
moved the request trigger in the reactor poolRoutine, triggered now by a ticker
in general moved everything required for making block requests smarter in the poolRoutine
added a simple map of heights to keep track of what will need to be requested next
added a few more tests
* send errors to FSM in a different channel than blocks
send errors (RemovePeer) from switch on a different channel than the
one receiving blocks
renamed channels
added more pool tests
* more pool tests
* lint errors
* more tests
* more tests
* switch fast sync to new implementation
* fixed data race in tests
* cleanup
* finished fsm tests
* address golangci comments :)
* address golangci comments :)
* Added timeout on next block needed to advance
* updating docs and cleanup
* fix issue in test from previous cleanup
* cleanup
* Added termination scenarios, tests and more cleanup
* small fixes to adr, comments and cleanup
* Fix bug in sendRequest()
If we tried to send a request to a peer not present in the switch, a
missing continue statement caused the request to be blackholed in a peer
that was removed and never retried.
While this bug was manifesting, the reactor kept asking for other
blocks that would be stored and never consumed. Added the number of
unconsumed blocks in the math for requesting blocks ahead of current
processing height so eventually there will be no more blocks requested
until the already received ones are consumed.
* remove bpPeer's didTimeout field
* Use distinct err codes for peer timeout and FSM timeouts
* Don't allow peers to update with lower height
* review comments from Ethan and Zarko
* some cleanup, renaming, comments
* Move block execution in separate goroutine
* Remove pool's numPending
* review comments
* fix lint, remove old blockchain reactor and duplicates in fsm tests
* small reorg around peer after review comments
* add the reactor spec
* verify block only once
* review comments
* change to int for max number of pending requests
* cleanup and godoc
* Add configuration flag fast sync version
* golangci fixes
* fix config template
* move both reactor versions under blockchain
* cleanup, golint, renaming stuff
* updated documentation, fixed more golint warnings
* integrate with behavior package
* sync with master
* gofmt
* add changelog_pending entry
* move to improvments
* suggestion to changelog entry
2019-07-23 10:58:52 +02:00
|
|
|
"github.com/tendermint/tendermint/store"
|
2018-08-10 00:25:57 -05:00
|
|
|
"github.com/tendermint/tendermint/types"
|
2019-07-31 11:34:17 +02:00
|
|
|
db "github.com/tendermint/tm-db"
|
2018-06-20 17:35:30 -07:00
|
|
|
)
|
|
|
|
|
2019-02-20 07:45:18 +02:00
|
|
|
// WALGenerateNBlocks generates a consensus WAL. It does this by spinning up a
|
2018-06-20 17:35:30 -07:00
|
|
|
// stripped down version of node (proxy app, event bus, consensus state) with a
|
|
|
|
// persistent kvstore application and special consensus wal instance
|
2018-09-25 19:22:45 +08:00
|
|
|
// (byteBufferWAL) and waits until numBlocks are created. If the node fails to produce given numBlocks, it returns an error.
|
2019-02-18 08:45:27 +01:00
|
|
|
func WALGenerateNBlocks(t *testing.T, wr io.Writer, numBlocks int) (err error) {
|
|
|
|
config := getConfig(t)
|
2018-06-20 17:35:30 -07:00
|
|
|
|
|
|
|
app := kvstore.NewPersistentKVStoreApplication(filepath.Join(config.DBDir(), "wal_generator"))
|
|
|
|
|
|
|
|
logger := log.TestingLogger().With("wal_generator", "wal_generator")
|
|
|
|
logger.Info("generating WAL (last height msg excluded)", "numBlocks", numBlocks)
|
|
|
|
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
// COPY PASTE FROM node.go WITH A FEW MODIFICATIONS
|
2018-10-18 16:51:17 -04:00
|
|
|
// NOTE: we can't import node package because of circular dependency.
|
|
|
|
// NOTE: we don't do handshake so need to set state.Version.Consensus.App directly.
|
2018-12-22 05:58:27 +08:00
|
|
|
privValidatorKeyFile := config.PrivValidatorKeyFile()
|
|
|
|
privValidatorStateFile := config.PrivValidatorStateFile()
|
|
|
|
privValidator := privval.LoadOrGenFilePV(privValidatorKeyFile, privValidatorStateFile)
|
2018-06-20 17:35:30 -07:00
|
|
|
genDoc, err := types.GenesisDocFromFile(config.GenesisFile())
|
|
|
|
if err != nil {
|
2018-09-25 19:22:45 +08:00
|
|
|
return errors.Wrap(err, "failed to read genesis file")
|
2018-06-20 17:35:30 -07:00
|
|
|
}
|
|
|
|
blockStoreDB := db.NewMemDB()
|
2019-05-02 05:15:53 +08:00
|
|
|
stateDB := blockStoreDB
|
2018-06-20 17:35:30 -07:00
|
|
|
state, err := sm.MakeGenesisState(genDoc)
|
|
|
|
if err != nil {
|
2018-09-25 19:22:45 +08:00
|
|
|
return errors.Wrap(err, "failed to make genesis state")
|
2018-06-20 17:35:30 -07:00
|
|
|
}
|
2018-10-18 16:51:17 -04:00
|
|
|
state.Version.Consensus.App = kvstore.ProtocolVersion
|
2019-05-02 05:15:53 +08:00
|
|
|
sm.SaveState(stateDB, state)
|
blockchain: Reorg reactor (#3561)
* go routines in blockchain reactor
* Added reference to the go routine diagram
* Initial commit
* cleanup
* Undo testing_logger change, committed by mistake
* Fix the test loggers
* pulled some fsm code into pool.go
* added pool tests
* changes to the design
added block requests under peer
moved the request trigger in the reactor poolRoutine, triggered now by a ticker
in general moved everything required for making block requests smarter in the poolRoutine
added a simple map of heights to keep track of what will need to be requested next
added a few more tests
* send errors to FSM in a different channel than blocks
send errors (RemovePeer) from switch on a different channel than the
one receiving blocks
renamed channels
added more pool tests
* more pool tests
* lint errors
* more tests
* more tests
* switch fast sync to new implementation
* fixed data race in tests
* cleanup
* finished fsm tests
* address golangci comments :)
* address golangci comments :)
* Added timeout on next block needed to advance
* updating docs and cleanup
* fix issue in test from previous cleanup
* cleanup
* Added termination scenarios, tests and more cleanup
* small fixes to adr, comments and cleanup
* Fix bug in sendRequest()
If we tried to send a request to a peer not present in the switch, a
missing continue statement caused the request to be blackholed in a peer
that was removed and never retried.
While this bug was manifesting, the reactor kept asking for other
blocks that would be stored and never consumed. Added the number of
unconsumed blocks in the math for requesting blocks ahead of current
processing height so eventually there will be no more blocks requested
until the already received ones are consumed.
* remove bpPeer's didTimeout field
* Use distinct err codes for peer timeout and FSM timeouts
* Don't allow peers to update with lower height
* review comments from Ethan and Zarko
* some cleanup, renaming, comments
* Move block execution in separate goroutine
* Remove pool's numPending
* review comments
* fix lint, remove old blockchain reactor and duplicates in fsm tests
* small reorg around peer after review comments
* add the reactor spec
* verify block only once
* review comments
* change to int for max number of pending requests
* cleanup and godoc
* Add configuration flag fast sync version
* golangci fixes
* fix config template
* move both reactor versions under blockchain
* cleanup, golint, renaming stuff
* updated documentation, fixed more golint warnings
* integrate with behavior package
* sync with master
* gofmt
* add changelog_pending entry
* move to improvments
* suggestion to changelog entry
2019-07-23 10:58:52 +02:00
|
|
|
blockStore := store.NewBlockStore(blockStoreDB)
|
|
|
|
|
2018-09-19 09:35:09 -04:00
|
|
|
proxyApp := proxy.NewAppConns(proxy.NewLocalClientCreator(app))
|
2018-06-20 17:35:30 -07:00
|
|
|
proxyApp.SetLogger(logger.With("module", "proxy"))
|
|
|
|
if err := proxyApp.Start(); err != nil {
|
2018-09-25 19:22:45 +08:00
|
|
|
return errors.Wrap(err, "failed to start proxy app connections")
|
2018-06-20 17:35:30 -07:00
|
|
|
}
|
|
|
|
defer proxyApp.Stop()
|
2018-09-19 09:35:09 -04:00
|
|
|
|
2018-06-20 17:35:30 -07:00
|
|
|
eventBus := types.NewEventBus()
|
|
|
|
eventBus.SetLogger(logger.With("module", "events"))
|
|
|
|
if err := eventBus.Start(); err != nil {
|
2018-09-25 19:22:45 +08:00
|
|
|
return errors.Wrap(err, "failed to start event bus")
|
2018-06-20 17:35:30 -07:00
|
|
|
}
|
|
|
|
defer eventBus.Stop()
|
2019-05-04 10:41:31 +04:00
|
|
|
mempool := mock.Mempool{}
|
2018-06-20 17:35:30 -07:00
|
|
|
evpool := sm.MockEvidencePool{}
|
|
|
|
blockExec := sm.NewBlockExecutor(stateDB, log.TestingLogger(), proxyApp.Consensus(), mempool, evpool)
|
|
|
|
consensusState := NewConsensusState(config.Consensus, state.Copy(), blockExec, blockStore, mempool, evpool)
|
|
|
|
consensusState.SetLogger(logger)
|
|
|
|
consensusState.SetEventBus(eventBus)
|
|
|
|
if privValidator != nil {
|
|
|
|
consensusState.SetPrivValidator(privValidator)
|
|
|
|
}
|
|
|
|
// END OF COPY PASTE
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
// set consensus wal to buffered WAL, which will write all incoming msgs to buffer
|
|
|
|
numBlocksWritten := make(chan struct{})
|
|
|
|
wal := newByteBufferWAL(logger, NewWALEncoder(wr), int64(numBlocks), numBlocksWritten)
|
|
|
|
// see wal.go#103
|
|
|
|
wal.Write(EndHeightMessage{0})
|
|
|
|
consensusState.wal = wal
|
|
|
|
|
|
|
|
if err := consensusState.Start(); err != nil {
|
2018-09-25 19:22:45 +08:00
|
|
|
return errors.Wrap(err, "failed to start consensus state")
|
2018-06-20 17:35:30 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-numBlocksWritten:
|
2018-06-20 20:04:43 -07:00
|
|
|
consensusState.Stop()
|
2018-09-25 19:22:45 +08:00
|
|
|
return nil
|
2018-06-20 17:35:30 -07:00
|
|
|
case <-time.After(1 * time.Minute):
|
2018-06-20 20:04:43 -07:00
|
|
|
consensusState.Stop()
|
2018-09-25 19:22:45 +08:00
|
|
|
return fmt.Errorf("waited too long for tendermint to produce %d blocks (grep logs for `wal_generator`)", numBlocks)
|
2018-06-20 17:35:30 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-09-25 19:22:45 +08:00
|
|
|
//WALWithNBlocks returns a WAL content with numBlocks.
|
2019-02-18 08:45:27 +01:00
|
|
|
func WALWithNBlocks(t *testing.T, numBlocks int) (data []byte, err error) {
|
2018-09-25 19:22:45 +08:00
|
|
|
var b bytes.Buffer
|
|
|
|
wr := bufio.NewWriter(&b)
|
|
|
|
|
2019-02-18 08:45:27 +01:00
|
|
|
if err := WALGenerateNBlocks(t, wr, numBlocks); err != nil {
|
2018-09-25 19:22:45 +08:00
|
|
|
return []byte{}, err
|
|
|
|
}
|
|
|
|
|
|
|
|
wr.Flush()
|
|
|
|
return b.Bytes(), nil
|
|
|
|
}
|
|
|
|
|
2018-06-20 17:35:30 -07:00
|
|
|
func randPort() int {
|
|
|
|
// returns between base and base + spread
|
|
|
|
base, spread := 20000, 20000
|
|
|
|
return base + cmn.RandIntn(spread)
|
|
|
|
}
|
|
|
|
|
|
|
|
func makeAddrs() (string, string, string) {
|
|
|
|
start := randPort()
|
|
|
|
return fmt.Sprintf("tcp://0.0.0.0:%d", start),
|
|
|
|
fmt.Sprintf("tcp://0.0.0.0:%d", start+1),
|
|
|
|
fmt.Sprintf("tcp://0.0.0.0:%d", start+2)
|
|
|
|
}
|
|
|
|
|
|
|
|
// getConfig returns a config for test cases
|
2019-02-18 08:45:27 +01:00
|
|
|
func getConfig(t *testing.T) *cfg.Config {
|
|
|
|
c := cfg.ResetTestRoot(t.Name())
|
2018-06-20 17:35:30 -07:00
|
|
|
|
|
|
|
// and we use random ports to run in parallel
|
|
|
|
tm, rpc, grpc := makeAddrs()
|
|
|
|
c.P2P.ListenAddress = tm
|
|
|
|
c.RPC.ListenAddress = rpc
|
|
|
|
c.RPC.GRPCListenAddress = grpc
|
|
|
|
return c
|
|
|
|
}
|
|
|
|
|
|
|
|
// byteBufferWAL is a WAL which writes all msgs to a byte buffer. Writing stops
|
|
|
|
// when the heightToStop is reached. Client will be notified via
|
|
|
|
// signalWhenStopsTo channel.
|
|
|
|
type byteBufferWAL struct {
|
|
|
|
enc *WALEncoder
|
|
|
|
stopped bool
|
|
|
|
heightToStop int64
|
|
|
|
signalWhenStopsTo chan<- struct{}
|
|
|
|
|
|
|
|
logger log.Logger
|
|
|
|
}
|
|
|
|
|
|
|
|
// needed for determinism
|
|
|
|
var fixedTime, _ = time.Parse(time.RFC3339, "2017-01-02T15:04:05Z")
|
|
|
|
|
|
|
|
func newByteBufferWAL(logger log.Logger, enc *WALEncoder, nBlocks int64, signalStop chan<- struct{}) *byteBufferWAL {
|
|
|
|
return &byteBufferWAL{
|
|
|
|
enc: enc,
|
|
|
|
heightToStop: nBlocks,
|
|
|
|
signalWhenStopsTo: signalStop,
|
|
|
|
logger: logger,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Save writes message to the internal buffer except when heightToStop is
|
|
|
|
// reached, in which case it will signal the caller via signalWhenStopsTo and
|
|
|
|
// skip writing.
|
|
|
|
func (w *byteBufferWAL) Write(m WALMessage) {
|
|
|
|
if w.stopped {
|
|
|
|
w.logger.Debug("WAL already stopped. Not writing message", "msg", m)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if endMsg, ok := m.(EndHeightMessage); ok {
|
|
|
|
w.logger.Debug("WAL write end height message", "height", endMsg.Height, "stopHeight", w.heightToStop)
|
|
|
|
if endMsg.Height == w.heightToStop {
|
|
|
|
w.logger.Debug("Stopping WAL at height", "height", endMsg.Height)
|
|
|
|
w.signalWhenStopsTo <- struct{}{}
|
|
|
|
w.stopped = true
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
w.logger.Debug("WAL Write Message", "msg", m)
|
|
|
|
err := w.enc.Encode(&TimedWALMessage{fixedTime, m})
|
|
|
|
if err != nil {
|
|
|
|
panic(fmt.Sprintf("failed to encode the msg %v", m))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w *byteBufferWAL) WriteSync(m WALMessage) {
|
|
|
|
w.Write(m)
|
|
|
|
}
|
|
|
|
|
2019-02-25 09:11:07 +04:00
|
|
|
func (w *byteBufferWAL) FlushAndSync() error { return nil }
|
|
|
|
|
|
|
|
func (w *byteBufferWAL) SearchForEndHeight(height int64, options *WALSearchOptions) (rd io.ReadCloser, found bool, err error) {
|
2018-06-20 17:35:30 -07:00
|
|
|
return nil, false, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w *byteBufferWAL) Start() error { return nil }
|
|
|
|
func (w *byteBufferWAL) Stop() error { return nil }
|
|
|
|
func (w *byteBufferWAL) Wait() {}
|