tendermint/blockchain/reactor.go
Anton Kaliaev f6539737de
new pubsub package
comment out failing consensus tests for now

rewrite rpc httpclient to use new pubsub package

import pubsub as tmpubsub, query as tmquery

make event IDs constants
EventKey -> EventTypeKey

rename EventsPubsub to PubSub

mempool does not use pubsub

rename eventsSub to pubsub

new subscribe API

fix channel size issues and consensus tests bugs

refactor rpc client

add missing discardFromChan method

add mutex

rename pubsub to eventBus

remove IsRunning from WSRPCConnection interface (not needed)

add a comment in broadcastNewRoundStepsAndVotes

rename registerEventCallbacks to broadcastNewRoundStepsAndVotes

See https://dave.cheney.net/2014/03/19/channel-axioms

stop eventBuses after reactor tests

remove unnecessary Unsubscribe

return subscribe helper function

move discardFromChan to where it is used

subscribe now returns an err

this gives us ability to refuse to subscribe if pubsub is at its max
capacity.

use context for control overflow

cache queries

handle err when subscribing in replay_test

rename testClientID to testSubscriber

extract var

set channel buffer capacity to 1 in replay_file

fix byzantine_test

unsubscribe from single event, not all events

refactor httpclient to return events to appropriate channels

return failing testReplayCrashBeforeWriteVote test

fix TestValidatorSetChanges

refactor code a bit

fix testReplayCrashBeforeWriteVote

add comment

fix TestValidatorSetChanges

fixes from Bucky's review

update comment [ci skip]

test TxEventBuffer

update changelog

fix TestValidatorSetChanges (2nd attempt)

only do wg.Done when no errors

benchmark event bus

create pubsub server inside NewEventBus

only expose config params (later if needed)

set buffer capacity to 0 so we are not testing cache

new tx event format: key = "Tx" plus a tag {"tx.hash": XYZ}

This should allow to subscribe to all transactions! or a specific one
using a query: "tm.events.type = Tx and tx.hash = '013ABF99434...'"

use TimeoutCommit instead of afterPublishEventNewBlockTimeout

TimeoutCommit is the time a node waits after committing a block, before
it goes into the next height. So it will finish everything from the last
block, but then wait a bit. The idea is this gives it time to hear more
votes from other validators, to strengthen the commit it includes in the
next block. But it also gives it time to hear about new transactions.

waitForBlockWithUpdatedVals

rewrite WAL crash tests

Task:
test that we can recover from any WAL crash.

Solution:
the old tests were relying on event hub being run in the same thread (we
were injecting the private validator's last signature).

when considering a rewrite, we considered two possible solutions: write
a "fuzzy" testing system where WAL is crashing upon receiving a new
message, or inject failures and trigger them in tests using something
like https://github.com/coreos/gofail.

remove sleep

no cs.Lock around wal.Save

test different cases (empty block, non-empty block, ...)

comments

add comments

test 4 cases: empty block, non-empty block, non-empty block with smaller part size, many blocks

fixes as per Bucky's last review

reset subscriptions on UnsubscribeAll

use a simple counter to track message for which we panicked

also, set a smaller part size for all test cases
2017-10-30 00:32:22 -05:00

391 lines
12 KiB
Go

package blockchain
import (
"bytes"
"errors"
"reflect"
"time"
wire "github.com/tendermint/go-wire"
"github.com/tendermint/tendermint/p2p"
"github.com/tendermint/tendermint/proxy"
sm "github.com/tendermint/tendermint/state"
"github.com/tendermint/tendermint/types"
cmn "github.com/tendermint/tmlibs/common"
"github.com/tendermint/tmlibs/log"
)
const (
// BlockchainChannel is a channel for blocks and status updates (`BlockStore` height)
BlockchainChannel = byte(0x40)
defaultChannelCapacity = 1000
trySyncIntervalMS = 50
// stop syncing when last block's time is
// within this much of the system time.
// stopSyncingDurationMinutes = 10
// ask for best height every 10s
statusUpdateIntervalSeconds = 10
// check if we should switch to consensus reactor
switchToConsensusIntervalSeconds = 1
)
type consensusReactor interface {
// for when we switch from blockchain reactor and fast sync to
// the consensus machine
SwitchToConsensus(*sm.State, int)
}
// BlockchainReactor handles long-term catchup syncing.
type BlockchainReactor struct {
p2p.BaseReactor
state *sm.State
proxyAppConn proxy.AppConnConsensus // same as consensus.proxyAppConn
store *BlockStore
pool *BlockPool
fastSync bool
requestsCh chan BlockRequest
timeoutsCh chan string
eventBus *types.EventBus
}
// NewBlockchainReactor returns new reactor instance.
func NewBlockchainReactor(state *sm.State, proxyAppConn proxy.AppConnConsensus, store *BlockStore, fastSync bool) *BlockchainReactor {
if state.LastBlockHeight == store.Height()-1 {
store.height-- // XXX HACK, make this better
}
if state.LastBlockHeight != store.Height() {
cmn.PanicSanity(cmn.Fmt("state (%v) and store (%v) height mismatch", state.LastBlockHeight, store.Height()))
}
requestsCh := make(chan BlockRequest, defaultChannelCapacity)
timeoutsCh := make(chan string, defaultChannelCapacity)
pool := NewBlockPool(
store.Height()+1,
requestsCh,
timeoutsCh,
)
bcR := &BlockchainReactor{
state: state,
proxyAppConn: proxyAppConn,
store: store,
pool: pool,
fastSync: fastSync,
requestsCh: requestsCh,
timeoutsCh: timeoutsCh,
}
bcR.BaseReactor = *p2p.NewBaseReactor("BlockchainReactor", bcR)
return bcR
}
// SetLogger implements cmn.Service by setting the logger on reactor and pool.
func (bcR *BlockchainReactor) SetLogger(l log.Logger) {
bcR.BaseService.Logger = l
bcR.pool.Logger = l
}
// OnStart implements cmn.Service.
func (bcR *BlockchainReactor) OnStart() error {
bcR.BaseReactor.OnStart()
if bcR.fastSync {
_, err := bcR.pool.Start()
if err != nil {
return err
}
go bcR.poolRoutine()
}
return nil
}
// OnStop implements cmn.Service.
func (bcR *BlockchainReactor) OnStop() {
bcR.BaseReactor.OnStop()
bcR.pool.Stop()
}
// GetChannels implements Reactor
func (bcR *BlockchainReactor) GetChannels() []*p2p.ChannelDescriptor {
return []*p2p.ChannelDescriptor{
&p2p.ChannelDescriptor{
ID: BlockchainChannel,
Priority: 10,
SendQueueCapacity: 1000,
},
}
}
// AddPeer implements Reactor by sending our state to peer.
func (bcR *BlockchainReactor) AddPeer(peer p2p.Peer) {
if !peer.Send(BlockchainChannel, struct{ BlockchainMessage }{&bcStatusResponseMessage{bcR.store.Height()}}) {
// doing nothing, will try later in `poolRoutine`
}
}
// RemovePeer implements Reactor by removing peer from the pool.
func (bcR *BlockchainReactor) RemovePeer(peer p2p.Peer, reason interface{}) {
bcR.pool.RemovePeer(peer.Key())
}
// respondToPeer loads a block and sends it to the requesting peer,
// if we have it. Otherwise, we'll respond saying we don't have it.
// According to the Tendermint spec, if all nodes are honest,
// no node should be requesting for a block that's non-existent.
func (bcR *BlockchainReactor) respondToPeer(msg *bcBlockRequestMessage, src p2p.Peer) (queued bool) {
block := bcR.store.LoadBlock(msg.Height)
if block != nil {
msg := &bcBlockResponseMessage{Block: block}
return src.TrySend(BlockchainChannel, struct{ BlockchainMessage }{msg})
}
bcR.Logger.Info("Peer asking for a block we don't have", "src", src, "height", msg.Height)
return src.TrySend(BlockchainChannel, struct{ BlockchainMessage }{
&bcNoBlockResponseMessage{Height: msg.Height},
})
}
// Receive implements Reactor by handling 4 types of messages (look below).
func (bcR *BlockchainReactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
_, msg, err := DecodeMessage(msgBytes, bcR.maxMsgSize())
if err != nil {
bcR.Logger.Error("Error decoding message", "err", err)
return
}
bcR.Logger.Debug("Receive", "src", src, "chID", chID, "msg", msg)
// TODO: improve logic to satisfy megacheck
switch msg := msg.(type) {
case *bcBlockRequestMessage:
if queued := bcR.respondToPeer(msg, src); !queued {
// Unfortunately not queued since the queue is full.
}
case *bcBlockResponseMessage:
// Got a block.
bcR.pool.AddBlock(src.Key(), msg.Block, len(msgBytes))
case *bcStatusRequestMessage:
// Send peer our state.
queued := src.TrySend(BlockchainChannel, struct{ BlockchainMessage }{&bcStatusResponseMessage{bcR.store.Height()}})
if !queued {
// sorry
}
case *bcStatusResponseMessage:
// Got a peer status. Unverified.
bcR.pool.SetPeerHeight(src.Key(), msg.Height)
default:
bcR.Logger.Error(cmn.Fmt("Unknown message type %v", reflect.TypeOf(msg)))
}
}
// maxMsgSize returns the maximum allowable size of a
// message on the blockchain reactor.
func (bcR *BlockchainReactor) maxMsgSize() int {
return bcR.state.Params.BlockSizeParams.MaxBytes + 2
}
// Handle messages from the poolReactor telling the reactor what to do.
// NOTE: Don't sleep in the FOR_LOOP or otherwise slow it down!
// (Except for the SYNC_LOOP, which is the primary purpose and must be synchronous.)
func (bcR *BlockchainReactor) poolRoutine() {
trySyncTicker := time.NewTicker(trySyncIntervalMS * time.Millisecond)
statusUpdateTicker := time.NewTicker(statusUpdateIntervalSeconds * time.Second)
switchToConsensusTicker := time.NewTicker(switchToConsensusIntervalSeconds * time.Second)
blocksSynced := 0
chainID := bcR.state.ChainID
lastHundred := time.Now()
lastRate := 0.0
FOR_LOOP:
for {
select {
case request := <-bcR.requestsCh: // chan BlockRequest
peer := bcR.Switch.Peers().Get(request.PeerID)
if peer == nil {
continue FOR_LOOP // Peer has since been disconnected.
}
msg := &bcBlockRequestMessage{request.Height}
queued := peer.TrySend(BlockchainChannel, struct{ BlockchainMessage }{msg})
if !queued {
// We couldn't make the request, send-queue full.
// The pool handles timeouts, just let it go.
continue FOR_LOOP
}
case peerID := <-bcR.timeoutsCh: // chan string
// Peer timed out.
peer := bcR.Switch.Peers().Get(peerID)
if peer != nil {
bcR.Switch.StopPeerForError(peer, errors.New("BlockchainReactor Timeout"))
}
case <-statusUpdateTicker.C:
// ask for status updates
go bcR.BroadcastStatusRequest()
case <-switchToConsensusTicker.C:
height, numPending, lenRequesters := bcR.pool.GetStatus()
outbound, inbound, _ := bcR.Switch.NumPeers()
bcR.Logger.Debug("Consensus ticker", "numPending", numPending, "total", lenRequesters,
"outbound", outbound, "inbound", inbound)
if bcR.pool.IsCaughtUp() {
bcR.Logger.Info("Time to switch to consensus reactor!", "height", height)
bcR.pool.Stop()
conR := bcR.Switch.Reactor("CONSENSUS").(consensusReactor)
conR.SwitchToConsensus(bcR.state, blocksSynced)
break FOR_LOOP
}
case <-trySyncTicker.C: // chan time
// This loop can be slow as long as it's doing syncing work.
SYNC_LOOP:
for i := 0; i < 10; i++ {
// See if there are any blocks to sync.
first, second := bcR.pool.PeekTwoBlocks()
//bcR.Logger.Info("TrySync peeked", "first", first, "second", second)
if first == nil || second == nil {
// We need both to sync the first block.
break SYNC_LOOP
}
firstParts := first.MakePartSet(bcR.state.Params.BlockPartSizeBytes)
firstPartsHeader := firstParts.Header()
// Finally, verify the first block using the second's commit
// NOTE: we can probably make this more efficient, but note that calling
// first.Hash() doesn't verify the tx contents, so MakePartSet() is
// currently necessary.
err := bcR.state.Validators.VerifyCommit(
chainID, types.BlockID{first.Hash(), firstPartsHeader}, first.Height, second.LastCommit)
if err != nil {
bcR.Logger.Error("Error in validation", "err", err)
bcR.pool.RedoRequest(first.Height)
break SYNC_LOOP
} else {
bcR.pool.PopRequest()
bcR.store.SaveBlock(first, firstParts, second.LastCommit)
// TODO: should we be firing events? need to fire NewBlock events manually ...
// NOTE: we could improve performance if we
// didn't make the app commit to disk every block
// ... but we would need a way to get the hash without it persisting
err := bcR.state.ApplyBlock(bcR.eventBus, bcR.proxyAppConn, first, firstPartsHeader, types.MockMempool{})
if err != nil {
// TODO This is bad, are we zombie?
cmn.PanicQ(cmn.Fmt("Failed to process committed block (%d:%X): %v", first.Height, first.Hash(), err))
}
blocksSynced += 1
if blocksSynced%100 == 0 {
lastRate = 0.9*lastRate + 0.1*(100/time.Since(lastHundred).Seconds())
bcR.Logger.Info("Fast Sync Rate", "height", bcR.pool.height,
"max_peer_height", bcR.pool.MaxPeerHeight(), "blocks/s", lastRate)
lastHundred = time.Now()
}
}
}
continue FOR_LOOP
case <-bcR.Quit:
break FOR_LOOP
}
}
}
// BroadcastStatusRequest broadcasts `BlockStore` height.
func (bcR *BlockchainReactor) BroadcastStatusRequest() error {
bcR.Switch.Broadcast(BlockchainChannel, struct{ BlockchainMessage }{&bcStatusRequestMessage{bcR.store.Height()}})
return nil
}
// SetEventBus sets event bus.
func (bcR *BlockchainReactor) SetEventBus(b *types.EventBus) {
bcR.eventBus = b
}
//-----------------------------------------------------------------------------
// Messages
const (
msgTypeBlockRequest = byte(0x10)
msgTypeBlockResponse = byte(0x11)
msgTypeNoBlockResponse = byte(0x12)
msgTypeStatusResponse = byte(0x20)
msgTypeStatusRequest = byte(0x21)
)
// BlockchainMessage is a generic message for this reactor.
type BlockchainMessage interface{}
var _ = wire.RegisterInterface(
struct{ BlockchainMessage }{},
wire.ConcreteType{&bcBlockRequestMessage{}, msgTypeBlockRequest},
wire.ConcreteType{&bcBlockResponseMessage{}, msgTypeBlockResponse},
wire.ConcreteType{&bcNoBlockResponseMessage{}, msgTypeNoBlockResponse},
wire.ConcreteType{&bcStatusResponseMessage{}, msgTypeStatusResponse},
wire.ConcreteType{&bcStatusRequestMessage{}, msgTypeStatusRequest},
)
// DecodeMessage decodes BlockchainMessage.
// TODO: ensure that bz is completely read.
func DecodeMessage(bz []byte, maxSize int) (msgType byte, msg BlockchainMessage, err error) {
msgType = bz[0]
n := int(0)
r := bytes.NewReader(bz)
msg = wire.ReadBinary(struct{ BlockchainMessage }{}, r, maxSize, &n, &err).(struct{ BlockchainMessage }).BlockchainMessage
if err != nil && n != len(bz) {
err = errors.New("DecodeMessage() had bytes left over")
}
return
}
//-------------------------------------
type bcBlockRequestMessage struct {
Height int
}
func (m *bcBlockRequestMessage) String() string {
return cmn.Fmt("[bcBlockRequestMessage %v]", m.Height)
}
type bcNoBlockResponseMessage struct {
Height int
}
func (brm *bcNoBlockResponseMessage) String() string {
return cmn.Fmt("[bcNoBlockResponseMessage %d]", brm.Height)
}
//-------------------------------------
// NOTE: keep up-to-date with maxBlockchainResponseSize
type bcBlockResponseMessage struct {
Block *types.Block
}
func (m *bcBlockResponseMessage) String() string {
return cmn.Fmt("[bcBlockResponseMessage %v]", m.Block.Height)
}
//-------------------------------------
type bcStatusRequestMessage struct {
Height int
}
func (m *bcStatusRequestMessage) String() string {
return cmn.Fmt("[bcStatusRequestMessage %v]", m.Height)
}
//-------------------------------------
type bcStatusResponseMessage struct {
Height int
}
func (m *bcStatusResponseMessage) String() string {
return cmn.Fmt("[bcStatusResponseMessage %v]", m.Height)
}