mirror of
https://github.com/fluencelabs/tendermint
synced 2025-04-25 14:52:17 +00:00
Merge pull request #1805 from tendermint/jae/optimize_blockchain
Optimizing blockchain reactor.
This commit is contained in:
commit
b92860b6c4
10
CHANGELOG_PENDING.md
Normal file
10
CHANGELOG_PENDING.md
Normal file
@ -0,0 +1,10 @@
|
||||
# Pending
|
||||
|
||||
BREAKING CHANGES:
|
||||
- [types] CanonicalTime uses nanoseconds instead of clipping to ms
|
||||
- breaks serialization/signing of all messages with a timestamp
|
||||
|
||||
IMPROVEMENTS:
|
||||
- [blockchain] Improve fast-sync logic
|
||||
- tweak params
|
||||
- only process one block at a time to avoid starving
|
1
Gopkg.lock
generated
1
Gopkg.lock
generated
@ -131,7 +131,6 @@
|
||||
version = "v1.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/jmhodges/levigo"
|
||||
packages = ["."]
|
||||
revision = "c42d9e0ca023e2198120196f842701bb4c55d7b9"
|
||||
|
@ -91,6 +91,10 @@
|
||||
name = "google.golang.org/genproto"
|
||||
revision = "7fd901a49ba6a7f87732eb344f6e3c5b19d1b200"
|
||||
|
||||
[[override]]
|
||||
name = "github.com/jmhodges/levigo"
|
||||
revision = "c42d9e0ca023e2198120196f842701bb4c55d7b9"
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/ebuchman/fail-test"
|
||||
revision = "95f809107225be108efcf10a3509e4ea6ceef3c4"
|
||||
|
17
Makefile
17
Makefile
@ -5,9 +5,10 @@ GOTOOLS = \
|
||||
github.com/gogo/protobuf/protoc-gen-gogo \
|
||||
github.com/gogo/protobuf/gogoproto \
|
||||
github.com/square/certstrap
|
||||
PACKAGES=$(shell go list ./...)
|
||||
PACKAGES=$(shell go list ./... | grep -v '/vendor/')
|
||||
|
||||
INCLUDE = -I=. -I=${GOPATH}/src -I=${GOPATH}/src/github.com/gogo/protobuf/protobuf
|
||||
BUILD_TAGS?=tendermint
|
||||
BUILD_TAGS?='tendermint'
|
||||
BUILD_FLAGS = -ldflags "-X github.com/tendermint/tendermint/version.GitCommit=`git rev-parse --short=8 HEAD`"
|
||||
|
||||
all: check build test install
|
||||
@ -19,13 +20,13 @@ check: check_tools ensure_deps
|
||||
### Build Tendermint
|
||||
|
||||
build:
|
||||
CGO_ENABLED=0 go build $(BUILD_FLAGS) -tags '$(BUILD_TAGS)' -o build/tendermint ./cmd/tendermint/
|
||||
CGO_ENABLED=0 go build $(BUILD_FLAGS) -tags $(BUILD_TAGS) -o build/tendermint ./cmd/tendermint/
|
||||
|
||||
build_race:
|
||||
CGO_ENABLED=0 go build -race $(BUILD_FLAGS) -tags '$(BUILD_TAGS)' -o build/tendermint ./cmd/tendermint
|
||||
CGO_ENABLED=0 go build -race $(BUILD_FLAGS) -tags $(BUILD_TAGS) -o build/tendermint ./cmd/tendermint
|
||||
|
||||
install:
|
||||
CGO_ENABLED=0 go install $(BUILD_FLAGS) -tags '$(BUILD_TAGS)' ./cmd/tendermint
|
||||
CGO_ENABLED=0 go install $(BUILD_FLAGS) -tags $(BUILD_TAGS) ./cmd/tendermint
|
||||
|
||||
########################################
|
||||
### Protobuf
|
||||
@ -57,7 +58,7 @@ install_abci:
|
||||
# dist builds binaries for all platforms and packages them for distribution
|
||||
# TODO add abci to these scripts
|
||||
dist:
|
||||
@BUILD_TAGS='$(BUILD_TAGS)' sh -c "'$(CURDIR)/scripts/dist.sh'"
|
||||
@BUILD_TAGS=$(BUILD_TAGS) sh -c "'$(CURDIR)/scripts/dist.sh'"
|
||||
|
||||
########################################
|
||||
### Tools & dependencies
|
||||
@ -107,7 +108,7 @@ draw_deps:
|
||||
|
||||
get_deps_bin_size:
|
||||
@# Copy of build recipe with additional flags to perform binary size analysis
|
||||
$(eval $(shell go build -work -a $(BUILD_FLAGS) -tags '$(BUILD_TAGS)' -o build/tendermint ./cmd/tendermint/ 2>&1))
|
||||
$(eval $(shell go build -work -a $(BUILD_FLAGS) -tags $(BUILD_TAGS) -o build/tendermint ./cmd/tendermint/ 2>&1))
|
||||
@find $(WORK) -type f -name "*.a" | xargs -I{} du -hxs "{}" | sort -rh | sed -e s:${WORK}/::g > deps_bin_size.log
|
||||
@echo "Results can be found here: $(CURDIR)/deps_bin_size.log"
|
||||
|
||||
@ -207,7 +208,7 @@ vagrant_test:
|
||||
### go tests
|
||||
test:
|
||||
@echo "--> Running go test"
|
||||
@go test $(PACKAGES)
|
||||
@GOCACHE=off go test $(PACKAGES)
|
||||
|
||||
test_race:
|
||||
@echo "--> Running go test --race"
|
||||
|
@ -29,10 +29,10 @@ eg, L = latency = 0.1s
|
||||
*/
|
||||
|
||||
const (
|
||||
requestIntervalMS = 100
|
||||
maxTotalRequesters = 1000
|
||||
requestIntervalMS = 2
|
||||
maxTotalRequesters = 600
|
||||
maxPendingRequests = maxTotalRequesters
|
||||
maxPendingRequestsPerPeer = 50
|
||||
maxPendingRequestsPerPeer = 20
|
||||
|
||||
// Minimum recv rate to ensure we're receiving blocks from a peer fast
|
||||
// enough. If a peer is not sending us data at at least that rate, we
|
||||
@ -219,14 +219,12 @@ func (pool *BlockPool) RedoRequest(height int64) p2p.ID {
|
||||
defer pool.mtx.Unlock()
|
||||
|
||||
request := pool.requesters[height]
|
||||
|
||||
if request.block == nil {
|
||||
panic("Expected block to be non-nil")
|
||||
peerID := request.getPeerID()
|
||||
if peerID != p2p.ID("") {
|
||||
// RemovePeer will redo all requesters associated with this peer.
|
||||
pool.removePeer(peerID)
|
||||
}
|
||||
|
||||
// RemovePeer will redo all requesters associated with this peer.
|
||||
pool.removePeer(request.peerID)
|
||||
return request.peerID
|
||||
return peerID
|
||||
}
|
||||
|
||||
// TODO: ensure that blocks come in order for each peer.
|
||||
|
@ -18,7 +18,8 @@ const (
|
||||
// BlockchainChannel is a channel for blocks and status updates (`BlockStore` height)
|
||||
BlockchainChannel = byte(0x40)
|
||||
|
||||
trySyncIntervalMS = 50
|
||||
trySyncIntervalMS = 10
|
||||
|
||||
// stop syncing when last block's time is
|
||||
// within this much of the system time.
|
||||
// stopSyncingDurationMinutes = 10
|
||||
@ -76,8 +77,9 @@ func NewBlockchainReactor(state sm.State, blockExec *sm.BlockExecutor, store *Bl
|
||||
store.Height()))
|
||||
}
|
||||
|
||||
const capacity = 1000 // must be bigger than peers count
|
||||
requestsCh := make(chan BlockRequest, capacity)
|
||||
requestsCh := make(chan BlockRequest, maxTotalRequesters)
|
||||
|
||||
const capacity = 1000 // must be bigger than peers count
|
||||
errorsCh := make(chan peerError, capacity) // so we don't block in #Receive#pool.AddBlock
|
||||
|
||||
pool := NewBlockPool(
|
||||
@ -209,7 +211,6 @@ func (bcR *BlockchainReactor) Receive(chID byte, src p2p.Peer, msgBytes []byte)
|
||||
|
||||
// Handle messages from the poolReactor telling the reactor what to do.
|
||||
// NOTE: Don't sleep in the FOR_LOOP or otherwise slow it down!
|
||||
// (Except for the SYNC_LOOP, which is the primary purpose and must be synchronous.)
|
||||
func (bcR *BlockchainReactor) poolRoutine() {
|
||||
|
||||
trySyncTicker := time.NewTicker(trySyncIntervalMS * time.Millisecond)
|
||||
@ -224,6 +225,8 @@ func (bcR *BlockchainReactor) poolRoutine() {
|
||||
lastHundred := time.Now()
|
||||
lastRate := 0.0
|
||||
|
||||
didProcessCh := make(chan struct{}, 1)
|
||||
|
||||
FOR_LOOP:
|
||||
for {
|
||||
select {
|
||||
@ -239,14 +242,17 @@ FOR_LOOP:
|
||||
// The pool handles timeouts, just let it go.
|
||||
continue FOR_LOOP
|
||||
}
|
||||
|
||||
case err := <-bcR.errorsCh:
|
||||
peer := bcR.Switch.Peers().Get(err.peerID)
|
||||
if peer != nil {
|
||||
bcR.Switch.StopPeerForError(peer, err)
|
||||
}
|
||||
|
||||
case <-statusUpdateTicker.C:
|
||||
// ask for status updates
|
||||
go bcR.BroadcastStatusRequest() // nolint: errcheck
|
||||
|
||||
case <-switchToConsensusTicker.C:
|
||||
height, numPending, lenRequesters := bcR.pool.GetStatus()
|
||||
outbound, inbound, _ := bcR.Switch.NumPeers()
|
||||
@ -261,60 +267,78 @@ FOR_LOOP:
|
||||
|
||||
break FOR_LOOP
|
||||
}
|
||||
|
||||
case <-trySyncTicker.C: // chan time
|
||||
// This loop can be slow as long as it's doing syncing work.
|
||||
SYNC_LOOP:
|
||||
for i := 0; i < 10; i++ {
|
||||
// See if there are any blocks to sync.
|
||||
first, second := bcR.pool.PeekTwoBlocks()
|
||||
//bcR.Logger.Info("TrySync peeked", "first", first, "second", second)
|
||||
if first == nil || second == nil {
|
||||
// We need both to sync the first block.
|
||||
break SYNC_LOOP
|
||||
select {
|
||||
case didProcessCh <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
|
||||
case <-didProcessCh:
|
||||
// NOTE: It is a subtle mistake to process more than a single block
|
||||
// at a time (e.g. 10) here, because we only TrySend 1 request per
|
||||
// loop. The ratio mismatch can result in starving of blocks, a
|
||||
// sudden burst of requests and responses, and repeat.
|
||||
// Consequently, it is better to split these routines rather than
|
||||
// coupling them as it's written here. TODO uncouple from request
|
||||
// routine.
|
||||
|
||||
// See if there are any blocks to sync.
|
||||
first, second := bcR.pool.PeekTwoBlocks()
|
||||
//bcR.Logger.Info("TrySync peeked", "first", first, "second", second)
|
||||
if first == nil || second == nil {
|
||||
// We need both to sync the first block.
|
||||
continue FOR_LOOP
|
||||
} else {
|
||||
// Try again quickly next loop.
|
||||
didProcessCh <- struct{}{}
|
||||
}
|
||||
|
||||
firstParts := first.MakePartSet(state.ConsensusParams.BlockPartSizeBytes)
|
||||
firstPartsHeader := firstParts.Header()
|
||||
firstID := types.BlockID{first.Hash(), firstPartsHeader}
|
||||
// Finally, verify the first block using the second's commit
|
||||
// NOTE: we can probably make this more efficient, but note that calling
|
||||
// first.Hash() doesn't verify the tx contents, so MakePartSet() is
|
||||
// currently necessary.
|
||||
err := state.Validators.VerifyCommit(
|
||||
chainID, firstID, first.Height, second.LastCommit)
|
||||
if err != nil {
|
||||
bcR.Logger.Error("Error in validation", "err", err)
|
||||
peerID := bcR.pool.RedoRequest(first.Height)
|
||||
peer := bcR.Switch.Peers().Get(peerID)
|
||||
if peer != nil {
|
||||
// NOTE: we've already removed the peer's request, but we
|
||||
// still need to clean up the rest.
|
||||
bcR.Switch.StopPeerForError(peer, fmt.Errorf("BlockchainReactor validation error: %v", err))
|
||||
}
|
||||
firstParts := first.MakePartSet(state.ConsensusParams.BlockPartSizeBytes)
|
||||
firstPartsHeader := firstParts.Header()
|
||||
firstID := types.BlockID{first.Hash(), firstPartsHeader}
|
||||
// Finally, verify the first block using the second's commit
|
||||
// NOTE: we can probably make this more efficient, but note that calling
|
||||
// first.Hash() doesn't verify the tx contents, so MakePartSet() is
|
||||
// currently necessary.
|
||||
err := state.Validators.VerifyCommit(
|
||||
chainID, firstID, first.Height, second.LastCommit)
|
||||
continue FOR_LOOP
|
||||
} else {
|
||||
bcR.pool.PopRequest()
|
||||
|
||||
// TODO: batch saves so we dont persist to disk every block
|
||||
bcR.store.SaveBlock(first, firstParts, second.LastCommit)
|
||||
|
||||
// TODO: same thing for app - but we would need a way to
|
||||
// get the hash without persisting the state
|
||||
var err error
|
||||
state, err = bcR.blockExec.ApplyBlock(state, firstID, first)
|
||||
if err != nil {
|
||||
bcR.Logger.Error("Error in validation", "err", err)
|
||||
peerID := bcR.pool.RedoRequest(first.Height)
|
||||
peer := bcR.Switch.Peers().Get(peerID)
|
||||
if peer != nil {
|
||||
bcR.Switch.StopPeerForError(peer, fmt.Errorf("BlockchainReactor validation error: %v", err))
|
||||
}
|
||||
break SYNC_LOOP
|
||||
} else {
|
||||
bcR.pool.PopRequest()
|
||||
// TODO This is bad, are we zombie?
|
||||
cmn.PanicQ(cmn.Fmt("Failed to process committed block (%d:%X): %v",
|
||||
first.Height, first.Hash(), err))
|
||||
}
|
||||
blocksSynced++
|
||||
|
||||
// TODO: batch saves so we dont persist to disk every block
|
||||
bcR.store.SaveBlock(first, firstParts, second.LastCommit)
|
||||
|
||||
// TODO: same thing for app - but we would need a way to
|
||||
// get the hash without persisting the state
|
||||
var err error
|
||||
state, err = bcR.blockExec.ApplyBlock(state, firstID, first)
|
||||
if err != nil {
|
||||
// TODO This is bad, are we zombie?
|
||||
cmn.PanicQ(cmn.Fmt("Failed to process committed block (%d:%X): %v",
|
||||
first.Height, first.Hash(), err))
|
||||
}
|
||||
blocksSynced++
|
||||
|
||||
if blocksSynced%100 == 0 {
|
||||
lastRate = 0.9*lastRate + 0.1*(100/time.Since(lastHundred).Seconds())
|
||||
bcR.Logger.Info("Fast Sync Rate", "height", bcR.pool.height,
|
||||
"max_peer_height", bcR.pool.MaxPeerHeight(), "blocks/s", lastRate)
|
||||
lastHundred = time.Now()
|
||||
}
|
||||
if blocksSynced%100 == 0 {
|
||||
lastRate = 0.9*lastRate + 0.1*(100/time.Since(lastHundred).Seconds())
|
||||
bcR.Logger.Info("Fast Sync Rate", "height", bcR.pool.height,
|
||||
"max_peer_height", bcR.pool.MaxPeerHeight(), "blocks/s", lastRate)
|
||||
lastHundred = time.Now()
|
||||
}
|
||||
}
|
||||
continue FOR_LOOP
|
||||
|
||||
case <-bcR.Quit():
|
||||
break FOR_LOOP
|
||||
}
|
||||
|
@ -184,7 +184,7 @@ func TestDifferByTimestamp(t *testing.T) {
|
||||
assert.NoError(t, err, "expected no error signing proposal")
|
||||
signBytes := proposal.SignBytes(chainID)
|
||||
sig := proposal.Signature
|
||||
timeStamp := clipToMS(proposal.Timestamp)
|
||||
timeStamp := proposal.Timestamp
|
||||
|
||||
// manipulate the timestamp. should get changed back
|
||||
proposal.Timestamp = proposal.Timestamp.Add(time.Millisecond)
|
||||
@ -208,7 +208,7 @@ func TestDifferByTimestamp(t *testing.T) {
|
||||
|
||||
signBytes := vote.SignBytes(chainID)
|
||||
sig := vote.Signature
|
||||
timeStamp := clipToMS(vote.Timestamp)
|
||||
timeStamp := vote.Timestamp
|
||||
|
||||
// manipulate the timestamp. should get changed back
|
||||
vote.Timestamp = vote.Timestamp.Add(time.Millisecond)
|
||||
@ -243,10 +243,3 @@ func newProposal(height int64, round int, partsHeader types.PartSetHeader) *type
|
||||
Timestamp: time.Now().UTC(),
|
||||
}
|
||||
}
|
||||
|
||||
func clipToMS(t time.Time) time.Time {
|
||||
nano := t.UnixNano()
|
||||
million := int64(1000000)
|
||||
nano = (nano / million) * million
|
||||
return time.Unix(0, nano).UTC()
|
||||
}
|
||||
|
@ -80,6 +80,7 @@ func loadState(db dbm.DB, key []byte) (state State) {
|
||||
}
|
||||
|
||||
// SaveState persists the State, the ValidatorsInfo, and the ConsensusParamsInfo to the database.
|
||||
// This flushes the writes (e.g. calls SetSync).
|
||||
func SaveState(db dbm.DB, state State) {
|
||||
saveState(db, state, stateKey)
|
||||
}
|
||||
@ -218,7 +219,7 @@ func saveValidatorsInfo(db dbm.DB, nextHeight, changeHeight int64, valSet *types
|
||||
if changeHeight == nextHeight {
|
||||
valInfo.ValidatorSet = valSet
|
||||
}
|
||||
db.SetSync(calcValidatorsKey(nextHeight), valInfo.Bytes())
|
||||
db.Set(calcValidatorsKey(nextHeight), valInfo.Bytes())
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
@ -289,5 +290,5 @@ func saveConsensusParamsInfo(db dbm.DB, nextHeight, changeHeight int64, params t
|
||||
if changeHeight == nextHeight {
|
||||
paramsInfo.ConsensusParams = params
|
||||
}
|
||||
db.SetSync(calcConsensusParamsKey(nextHeight), paramsInfo.Bytes())
|
||||
db.Set(calcConsensusParamsKey(nextHeight), paramsInfo.Bytes())
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ import (
|
||||
// Canonical json is amino's json for structs with fields in alphabetical order
|
||||
|
||||
// TimeFormat is used for generating the sigs
|
||||
const TimeFormat = "2006-01-02T15:04:05.000Z"
|
||||
const TimeFormat = time.RFC3339Nano
|
||||
|
||||
type CanonicalJSONBlockID struct {
|
||||
Hash cmn.HexBytes `json:"hash,omitempty"`
|
||||
|
Loading…
x
Reference in New Issue
Block a user