blockchain: Reorg reactor (#3561)

* go routines in blockchain reactor

* Added reference to the go routine diagram

* Initial commit

* cleanup

* Undo testing_logger change, committed by mistake

* Fix the test loggers

* pulled some fsm code into pool.go

* added pool tests

* changes to the design

added block requests under peer

moved the request trigger in the reactor poolRoutine, triggered now by a ticker

in general moved everything required for making block requests smarter in the poolRoutine

added a simple map of heights to keep track of what will need to be requested next

added a few more tests

* send errors to FSM in a different channel than blocks

send errors (RemovePeer) from switch on a different channel than the
one receiving blocks
renamed channels
added more pool tests

* more pool tests

* lint errors

* more tests

* more tests

* switch fast sync to new implementation

* fixed data race in tests

* cleanup

* finished fsm tests

* address golangci comments :)

* address golangci comments :)

* Added timeout on next block needed to advance

* updating docs and cleanup

* fix issue in test from previous cleanup

* cleanup

* Added termination scenarios, tests and more cleanup

* small fixes to adr, comments and cleanup

* Fix bug in sendRequest()

If we tried to send a request to a peer not present in the switch, a
missing continue statement caused the request to be blackholed in a peer
that was removed and never retried.

While this bug was manifesting, the reactor kept asking for other
blocks that would be stored and never consumed. Added the number of
unconsumed blocks in the math for requesting blocks ahead of current
processing height so eventually there will be no more blocks requested
until the already received ones are consumed.

* remove bpPeer's didTimeout field

* Use distinct err codes for peer timeout and FSM timeouts

* Don't allow peers to update with lower height

* review comments from Ethan and Zarko

* some cleanup, renaming, comments

* Move block execution in separate goroutine

* Remove pool's numPending

* review comments

* fix lint, remove old blockchain reactor and duplicates in fsm tests

* small reorg around peer after review comments

* add the reactor spec

* verify block only once

* review comments

* change to int for max number of pending requests

* cleanup and godoc

* Add configuration flag fast sync version

* golangci fixes

* fix config template

* move both reactor versions under blockchain

* cleanup, golint, renaming stuff

* updated documentation, fixed more golint warnings

* integrate with behavior package

* sync with master

* gofmt

* add changelog_pending entry

* move to improvments

* suggestion to changelog entry
This commit is contained in:
Anca Zamfir
2019-07-23 10:58:52 +02:00
committed by Jack Zampolin
parent e89991c445
commit abc30821f4
34 changed files with 4275 additions and 54 deletions

View File

@ -18,8 +18,8 @@ import (
amino "github.com/tendermint/go-amino"
abci "github.com/tendermint/tendermint/abci/types"
"github.com/tendermint/tendermint/blockchain"
bc "github.com/tendermint/tendermint/blockchain"
bcv0 "github.com/tendermint/tendermint/blockchain/v0"
bcv1 "github.com/tendermint/tendermint/blockchain/v1"
cfg "github.com/tendermint/tendermint/config"
"github.com/tendermint/tendermint/consensus"
cs "github.com/tendermint/tendermint/consensus"
@ -41,6 +41,7 @@ import (
"github.com/tendermint/tendermint/state/txindex"
"github.com/tendermint/tendermint/state/txindex/kv"
"github.com/tendermint/tendermint/state/txindex/null"
"github.com/tendermint/tendermint/store"
"github.com/tendermint/tendermint/types"
tmtime "github.com/tendermint/tendermint/types/time"
"github.com/tendermint/tendermint/version"
@ -175,9 +176,9 @@ type Node struct {
// services
eventBus *types.EventBus // pub/sub for services
stateDB dbm.DB
blockStore *bc.BlockStore // store the blockchain to disk
bcReactor *bc.BlockchainReactor // for fast-syncing
mempoolReactor *mempl.Reactor // for gossipping transactions
blockStore *store.BlockStore // store the blockchain to disk
bcReactor p2p.Reactor // for fast-syncing
mempoolReactor *mempl.Reactor // for gossipping transactions
mempool mempl.Mempool
consensusState *cs.ConsensusState // latest consensus state
consensusReactor *cs.ConsensusReactor // for participating in the consensus
@ -190,13 +191,13 @@ type Node struct {
prometheusSrv *http.Server
}
func initDBs(config *cfg.Config, dbProvider DBProvider) (blockStore *bc.BlockStore, stateDB dbm.DB, err error) {
func initDBs(config *cfg.Config, dbProvider DBProvider) (blockStore *store.BlockStore, stateDB dbm.DB, err error) {
var blockStoreDB dbm.DB
blockStoreDB, err = dbProvider(&DBContext{"blockstore", config})
if err != nil {
return
}
blockStore = bc.NewBlockStore(blockStoreDB)
blockStore = store.NewBlockStore(blockStoreDB)
stateDB, err = dbProvider(&DBContext{"state", config})
if err != nil {
@ -337,6 +338,26 @@ func createEvidenceReactor(config *cfg.Config, dbProvider DBProvider,
return evidenceReactor, evidencePool, nil
}
func createBlockchainReactor(config *cfg.Config,
state sm.State,
blockExec *sm.BlockExecutor,
blockStore *store.BlockStore,
fastSync bool,
logger log.Logger) (bcReactor p2p.Reactor, err error) {
switch config.FastSync.Version {
case "v0":
bcReactor = bcv0.NewBlockchainReactor(state.Copy(), blockExec, blockStore, fastSync)
case "v1":
bcReactor = bcv1.NewBlockchainReactor(state.Copy(), blockExec, blockStore, fastSync)
default:
return nil, fmt.Errorf("unknown fastsync version %s", config.FastSync.Version)
}
bcReactor.SetLogger(logger.With("module", "blockchain"))
return bcReactor, nil
}
func createConsensusReactor(config *cfg.Config,
state sm.State,
blockExec *sm.BlockExecutor,
@ -431,7 +452,7 @@ func createSwitch(config *cfg.Config,
p2pMetrics *p2p.Metrics,
peerFilters []p2p.PeerFilterFunc,
mempoolReactor *mempl.Reactor,
bcReactor *blockchain.BlockchainReactor,
bcReactor p2p.Reactor,
consensusReactor *consensus.ConsensusReactor,
evidenceReactor *evidence.EvidenceReactor,
nodeInfo p2p.NodeInfo,
@ -572,7 +593,7 @@ func NewNode(config *cfg.Config,
// Decide whether to fast-sync or not
// We don't fast-sync when the only validator is us.
fastSync := config.FastSync && !onlyValidatorIsUs(state, privValidator)
fastSync := config.FastSyncMode && !onlyValidatorIsUs(state, privValidator)
csMetrics, p2pMetrics, memplMetrics, smMetrics := metricsProvider(genDoc.ChainID)
@ -596,8 +617,10 @@ func NewNode(config *cfg.Config,
)
// Make BlockchainReactor
bcReactor := bc.NewBlockchainReactor(state.Copy(), blockExec, blockStore, fastSync)
bcReactor.SetLogger(logger.With("module", "blockchain"))
bcReactor, err := createBlockchainReactor(config, state, blockExec, blockStore, fastSync, logger)
if err != nil {
return nil, errors.Wrap(err, "could not create blockchain reactor")
}
// Make ConsensusReactor
consensusReactor, consensusState := createConsensusReactor(
@ -930,7 +953,7 @@ func (n *Node) Switch() *p2p.Switch {
}
// BlockStore returns the Node's BlockStore.
func (n *Node) BlockStore() *bc.BlockStore {
func (n *Node) BlockStore() *store.BlockStore {
return n.blockStore
}
@ -1018,6 +1041,17 @@ func makeNodeInfo(
if _, ok := txIndexer.(*null.TxIndex); ok {
txIndexerStatus = "off"
}
var bcChannel byte
switch config.FastSync.Version {
case "v0":
bcChannel = bcv0.BlockchainChannel
case "v1":
bcChannel = bcv1.BlockchainChannel
default:
return nil, fmt.Errorf("unknown fastsync version %s", config.FastSync.Version)
}
nodeInfo := p2p.DefaultNodeInfo{
ProtocolVersion: p2p.NewProtocolVersion(
version.P2PProtocol, // global
@ -1028,7 +1062,7 @@ func makeNodeInfo(
Network: genDoc.ChainID,
Version: version.TMCoreSemVer,
Channels: []byte{
bc.BlockchainChannel,
bcChannel,
cs.StateChannel, cs.DataChannel, cs.VoteChannel, cs.VoteSetBitsChannel,
mempl.MempoolChannel,
evidence.EvidenceChannel,