Merge pull request #3203 from tendermint/release/v0.29.1

Release/v0.29.1
This commit is contained in:
Ethan Buchman 2019-01-24 11:34:20 -05:00 committed by GitHub
commit 4d7b29cd8f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 465 additions and 107 deletions

View File

@ -240,7 +240,7 @@ jobs:
for pkg in $(go list github.com/tendermint/tendermint/... | circleci tests split --split-by=timings); do for pkg in $(go list github.com/tendermint/tendermint/... | circleci tests split --split-by=timings); do
id=$(basename "$pkg") id=$(basename "$pkg")
GOCACHE=off go test -timeout 5m -race -coverprofile=/tmp/workspace/profiles/$id.out -covermode=atomic "$pkg" | tee "/tmp/logs/$id-$RANDOM.log" GOCACHE=off go test -v -timeout 5m -race -coverprofile=/tmp/workspace/profiles/$id.out -covermode=atomic "$pkg" | tee "/tmp/logs/$id-$RANDOM.log"
done done
- persist_to_workspace: - persist_to_workspace:
root: /tmp/workspace root: /tmp/workspace

View File

@ -1,5 +1,28 @@
# Changelog # Changelog
## v0.29.1
*January 24, 2019*
Special thanks to external contributors on this release:
@infinytum, @gauthamzz
This release contains two important fixes: one for p2p layer where we sometimes
were not closing connections and one for consensus layer where consensus with
no empty blocks (`create_empty_blocks = false`) could halt.
Friendly reminder, we have a [bug bounty
program](https://hackerone.com/tendermint).
### IMPROVEMENTS:
- [pex] [\#3037](https://github.com/tendermint/tendermint/issues/3037) Only log "Reached max attempts to dial" once
- [rpc] [\#3159](https://github.com/tendermint/tendermint/issues/3159) Expose
`triggered_timeout_commit` in the `/dump_consensus_state`
### BUG FIXES:
- [consensus] [\#3199](https://github.com/tendermint/tendermint/issues/3199) Fix consensus halt with no empty blocks from not resetting triggeredTimeoutCommit
- [p2p] [\#2967](https://github.com/tendermint/tendermint/issues/2967) Fix file descriptor leak
## v0.29.0 ## v0.29.0
*January 21, 2019* *January 21, 2019*

View File

@ -94,7 +94,6 @@ type ConsensusState struct {
// internal state // internal state
mtx sync.RWMutex mtx sync.RWMutex
cstypes.RoundState cstypes.RoundState
triggeredTimeoutPrecommit bool
state sm.State // State until height-1. state sm.State // State until height-1.
// state changes may be triggered by: msgs from peers, // state changes may be triggered by: msgs from peers,
@ -732,6 +731,7 @@ func (cs *ConsensusState) handleTxsAvailable() {
cs.mtx.Lock() cs.mtx.Lock()
defer cs.mtx.Unlock() defer cs.mtx.Unlock()
// we only need to do this for round 0 // we only need to do this for round 0
cs.enterNewRound(cs.Height, 0)
cs.enterPropose(cs.Height, 0) cs.enterPropose(cs.Height, 0)
} }
@ -782,7 +782,7 @@ func (cs *ConsensusState) enterNewRound(height int64, round int) {
cs.ProposalBlockParts = nil cs.ProposalBlockParts = nil
} }
cs.Votes.SetRound(round + 1) // also track next round (round+1) to allow round-skipping cs.Votes.SetRound(round + 1) // also track next round (round+1) to allow round-skipping
cs.triggeredTimeoutPrecommit = false cs.TriggeredTimeoutPrecommit = false
cs.eventBus.PublishEventNewRound(cs.NewRoundEvent()) cs.eventBus.PublishEventNewRound(cs.NewRoundEvent())
cs.metrics.Rounds.Set(float64(round)) cs.metrics.Rounds.Set(float64(round))
@ -1128,12 +1128,12 @@ func (cs *ConsensusState) enterPrecommit(height int64, round int) {
func (cs *ConsensusState) enterPrecommitWait(height int64, round int) { func (cs *ConsensusState) enterPrecommitWait(height int64, round int) {
logger := cs.Logger.With("height", height, "round", round) logger := cs.Logger.With("height", height, "round", round)
if cs.Height != height || round < cs.Round || (cs.Round == round && cs.triggeredTimeoutPrecommit) { if cs.Height != height || round < cs.Round || (cs.Round == round && cs.TriggeredTimeoutPrecommit) {
logger.Debug( logger.Debug(
fmt.Sprintf( fmt.Sprintf(
"enterPrecommitWait(%v/%v): Invalid args. "+ "enterPrecommitWait(%v/%v): Invalid args. "+
"Current state is Height/Round: %v/%v/, triggeredTimeoutPrecommit:%v", "Current state is Height/Round: %v/%v/, TriggeredTimeoutPrecommit:%v",
height, round, cs.Height, cs.Round, cs.triggeredTimeoutPrecommit)) height, round, cs.Height, cs.Round, cs.TriggeredTimeoutPrecommit))
return return
} }
if !cs.Votes.Precommits(round).HasTwoThirdsAny() { if !cs.Votes.Precommits(round).HasTwoThirdsAny() {
@ -1143,7 +1143,7 @@ func (cs *ConsensusState) enterPrecommitWait(height int64, round int) {
defer func() { defer func() {
// Done enterPrecommitWait: // Done enterPrecommitWait:
cs.triggeredTimeoutPrecommit = true cs.TriggeredTimeoutPrecommit = true
cs.newStep() cs.newStep()
}() }()

View File

@ -1279,6 +1279,71 @@ func TestCommitFromPreviousRound(t *testing.T) {
ensureNewRound(newRoundCh, height+1, 0) ensureNewRound(newRoundCh, height+1, 0)
} }
type fakeTxNotifier struct {
ch chan struct{}
}
func (n *fakeTxNotifier) TxsAvailable() <-chan struct{} {
return n.ch
}
func (n *fakeTxNotifier) Notify() {
n.ch <- struct{}{}
}
func TestStartNextHeightCorrectly(t *testing.T) {
cs1, vss := randConsensusState(4)
cs1.config.SkipTimeoutCommit = false
cs1.txNotifier = &fakeTxNotifier{ch: make(chan struct{})}
vs2, vs3, vs4 := vss[1], vss[2], vss[3]
height, round := cs1.Height, cs1.Round
proposalCh := subscribe(cs1.eventBus, types.EventQueryCompleteProposal)
timeoutProposeCh := subscribe(cs1.eventBus, types.EventQueryTimeoutPropose)
newRoundCh := subscribe(cs1.eventBus, types.EventQueryNewRound)
newBlockHeader := subscribe(cs1.eventBus, types.EventQueryNewBlockHeader)
addr := cs1.privValidator.GetPubKey().Address()
voteCh := subscribeToVoter(cs1, addr)
// start round and wait for propose and prevote
startTestRound(cs1, height, round)
ensureNewRound(newRoundCh, height, round)
ensureNewProposal(proposalCh, height, round)
rs := cs1.GetRoundState()
theBlockHash := rs.ProposalBlock.Hash()
theBlockParts := rs.ProposalBlockParts.Header()
ensurePrevote(voteCh, height, round)
validatePrevote(t, cs1, round, vss[0], theBlockHash)
signAddVotes(cs1, types.PrevoteType, theBlockHash, theBlockParts, vs2, vs3, vs4)
ensurePrecommit(voteCh, height, round)
// the proposed block should now be locked and our precommit added
validatePrecommit(t, cs1, round, round, vss[0], theBlockHash, theBlockHash)
rs = cs1.GetRoundState()
// add precommits
signAddVotes(cs1, types.PrecommitType, nil, types.PartSetHeader{}, vs2)
signAddVotes(cs1, types.PrecommitType, theBlockHash, theBlockParts, vs3)
signAddVotes(cs1, types.PrecommitType, theBlockHash, theBlockParts, vs4)
ensureNewBlockHeader(newBlockHeader, height, theBlockHash)
rs = cs1.GetRoundState()
assert.True(t, rs.TriggeredTimeoutPrecommit)
cs1.txNotifier.(*fakeTxNotifier).Notify()
ensureNewTimeout(timeoutProposeCh, height+1, round, cs1.config.TimeoutPropose.Nanoseconds())
rs = cs1.GetRoundState()
assert.False(t, rs.TriggeredTimeoutPrecommit, "triggeredTimeoutPrecommit should be false at the beginning of each round")
}
//------------------------------------------------------------------------------------------ //------------------------------------------------------------------------------------------
// SlashingSuite // SlashingSuite
// TODO: Slashing // TODO: Slashing

View File

@ -84,6 +84,7 @@ type RoundState struct {
CommitRound int `json:"commit_round"` // CommitRound int `json:"commit_round"` //
LastCommit *types.VoteSet `json:"last_commit"` // Last precommits at Height-1 LastCommit *types.VoteSet `json:"last_commit"` // Last precommits at Height-1
LastValidators *types.ValidatorSet `json:"last_validators"` LastValidators *types.ValidatorSet `json:"last_validators"`
TriggeredTimeoutPrecommit bool `json:"triggered_timeout_precommit"`
} }
// Compressed version of the RoundState for use in RPC // Compressed version of the RoundState for use in RPC

View File

@ -5,6 +5,8 @@ Author: Anton Kaliaev (@melekes)
## Changelog ## Changelog
02-10-2018: Initial draft 02-10-2018: Initial draft
16-01-2019: Second version based on our conversation with Jae
17-01-2019: Third version explaining how new design solves current issues
## Context ## Context
@ -40,7 +42,14 @@ goroutines can be used to avoid uncontrolled memory growth.
In certain cases, this is what you want. But in our case, because we need In certain cases, this is what you want. But in our case, because we need
strict ordering of events (if event A was published before B, the guaranteed strict ordering of events (if event A was published before B, the guaranteed
delivery order will be A -> B), we can't use goroutines. delivery order will be A -> B), we can't publish msg in a new goroutine every time.
We can also have a goroutine per subscriber, although we'd need to be careful
with the number of subscribers. It's more difficult to implement as well +
unclear if we'll benefit from it (cause we'd be forced to create N additional
channels to distribute msg to these goroutines).
### Non-blocking send
There is also a question whenever we should have a non-blocking send: There is also a question whenever we should have a non-blocking send:
@ -56,15 +65,14 @@ for each subscriber {
``` ```
This fixes the "slow client problem", but there is no way for a slow client to This fixes the "slow client problem", but there is no way for a slow client to
know if it had missed a message. On the other hand, if we're going to stick know if it had missed a message. We could return a second channel and close it
with blocking send, **devs must always ensure subscriber's handling code does not to indicate subscription termination. On the other hand, if we're going to
block**. As you can see, there is an implicit choice between ordering guarantees stick with blocking send, **devs must always ensure subscriber's handling code
and using goroutines. does not block**, which is a hard task to put on their shoulders.
The interim option is to run goroutines pool for a single message, wait for all The interim option is to run goroutines pool for a single message, wait for all
goroutines to finish. This will solve "slow client problem", but we'd still goroutines to finish. This will solve "slow client problem", but we'd still
have to wait `max(goroutine_X_time)` before we can publish the next message. have to wait `max(goroutine_X_time)` before we can publish the next message.
My opinion: not worth doing.
### Channels vs Callbacks ### Channels vs Callbacks
@ -76,8 +84,6 @@ memory leaks and/or memory usage increase.
Go channels are de-facto standard for carrying data between goroutines. Go channels are de-facto standard for carrying data between goroutines.
**Question: Is it worth switching to callback functions?**
### Why `Subscribe()` accepts an `out` channel? ### Why `Subscribe()` accepts an `out` channel?
Because in our tests, we create buffered channels (cap: 1). Alternatively, we Because in our tests, we create buffered channels (cap: 1). Alternatively, we
@ -85,27 +91,89 @@ can make capacity an argument.
## Decision ## Decision
Change Subscribe() function to return out channel: Change Subscribe() function to return a `Subscription` struct:
```go ```go
// outCap can be used to set capacity of out channel (unbuffered by default). type Subscription struct {
Subscribe(ctx context.Context, clientID string, query Query, outCap... int) (out <-chan interface{}, err error) { // private fields
}
func (s *Subscription) Out() <-chan MsgAndTags
func (s *Subscription) Cancelled() <-chan struct{}
func (s *Subscription) Err() error
``` ```
It's more idiomatic since we're closing it during Unsubscribe/UnsubscribeAll calls. Out returns a channel onto which messages and tags are published.
Unsubscribe/UnsubscribeAll does not close the channel to avoid clients from
receiving a nil message.
Also, we should make tags available to subscribers: Cancelled returns a channel that's closed when the subscription is terminated
and supposed to be used in a select statement.
If Cancelled is not closed yet, Err() returns nil.
If Cancelled is closed, Err returns a non-nil error explaining why:
Unsubscribed if the subscriber choose to unsubscribe,
OutOfCapacity if the subscriber is not pulling messages fast enough and the Out channel become full.
After Err returns a non-nil error, successive calls to Err() return the same error.
```go
subscription, err := pubsub.Subscribe(...)
if err != nil {
// ...
}
for {
select {
case msgAndTags <- subscription.Out():
// ...
case <-subscription.Cancelled():
return subscription.Err()
}
```
Make Out() channel buffered (cap: 1) by default. In most cases, we want to
terminate the slow subscriber. Only in rare cases, we want to block the pubsub
(e.g. when debugging consensus). This should lower the chances of the pubsub
being frozen.
```go
// outCap can be used to set capacity of Out channel (1 by default). Set to 0
for unbuffered channel (WARNING: it may block the pubsub).
Subscribe(ctx context.Context, clientID string, query Query, outCap... int) (Subscription, error) {
```
Also, Out() channel should return tags along with a message:
```go ```go
type MsgAndTags struct { type MsgAndTags struct {
Msg interface{} Msg interface{}
Tags TagMap Tags TagMap
} }
// outCap can be used to set capacity of out channel (unbuffered by default).
Subscribe(ctx context.Context, clientID string, query Query, outCap... int) (out <-chan MsgAndTags, err error) {
``` ```
to inform clients of which Tags were used with Msg.
### How this new design solves the current issues?
https://github.com/tendermint/tendermint/issues/951 (https://github.com/tendermint/tendermint/issues/1880)
Because of non-blocking send, situation where we'll deadlock is not possible
anymore. If the client stops reading messages, it will be removed.
https://github.com/tendermint/tendermint/issues/1879
MsgAndTags is used now instead of a plain message.
### Future problems and their possible solutions
https://github.com/tendermint/tendermint/issues/2826
One question I am still pondering about: how to prevent pubsub from slowing
down consensus. We can increase the pubsub queue size (which is 0 now). Also,
it's probably a good idea to limit the total number of subscribers.
This can be made automatically. Say we set queue size to 1000 and, when it's >=
80% full, refuse new subscriptions.
## Status ## Status
In review In review
@ -116,7 +184,10 @@ In review
- more idiomatic interface - more idiomatic interface
- subscribers know what tags msg was published with - subscribers know what tags msg was published with
- subscribers aware of the reason their subscription was cancelled
### Negative ### Negative
- (since v1) no concurrency when it comes to publishing messages
### Neutral ### Neutral

View File

@ -78,6 +78,78 @@ cd $GOPATH/src/github.com/tendermint/tendermint
rm -rf ./build/node* rm -rf ./build/node*
``` ```
## Configuring abci containers
To use your own abci applications with 4-node setup edit the [docker-compose.yaml](https://github.com/tendermint/tendermint/blob/develop/docker-compose.yml) file and add image to your abci application.
```
abci0:
container_name: abci0
image: "abci-image"
build:
context: .
dockerfile: abci.Dockerfile
command: <insert command to run your abci application>
networks:
localnet:
ipv4_address: 192.167.10.6
abci1:
container_name: abci1
image: "abci-image"
build:
context: .
dockerfile: abci.Dockerfile
command: <insert command to run your abci application>
networks:
localnet:
ipv4_address: 192.167.10.7
abci2:
container_name: abci2
image: "abci-image"
build:
context: .
dockerfile: abci.Dockerfile
command: <insert command to run your abci application>
networks:
localnet:
ipv4_address: 192.167.10.8
abci3:
container_name: abci3
image: "abci-image"
build:
context: .
dockerfile: abci.Dockerfile
command: <insert command to run your abci application>
networks:
localnet:
ipv4_address: 192.167.10.9
```
Override the [command](https://github.com/tendermint/tendermint/blob/master/networks/local/localnode/Dockerfile#L12) in each node to connect to it's abci.
```
node0:
container_name: node0
image: "tendermint/localnode"
ports:
- "26656-26657:26656-26657"
environment:
- ID=0
- LOG=$${LOG:-tendermint.log}
volumes:
- ./build:/tendermint:Z
command: node --proxy_app=tcp://abci0:26658
networks:
localnet:
ipv4_address: 192.167.10.2
```
Similarly do for node1, node2 and node3 then [run testnet](https://github.com/tendermint/tendermint/blob/master/docs/networks/docker-compose.md#run-a-testnet)
## Logging ## Logging
Log is saved under the attached volume, in the `tendermint.log` file. If the Log is saved under the attached volume, in the `tendermint.log` file. If the

View File

@ -15,9 +15,7 @@ for you, so you can just build nice applications.
We design for clients who have no strong trust relationship with any Tendermint We design for clients who have no strong trust relationship with any Tendermint
node, just the blockchain and validator set as a whole. node, just the blockchain and validator set as a whole.
# Data structures SignedHeader
## SignedHeader
SignedHeader is a block header along with a commit -- enough validator SignedHeader is a block header along with a commit -- enough validator
precommit-vote signatures to prove its validity (> 2/3 of the voting power) precommit-vote signatures to prove its validity (> 2/3 of the voting power)
@ -42,7 +40,7 @@ The FullCommit is also declared in this package as a convenience structure,
which includes the SignedHeader along with the full current and next which includes the SignedHeader along with the full current and next
ValidatorSets. ValidatorSets.
## Verifier Verifier
A Verifier validates a new SignedHeader given the currently known state. There A Verifier validates a new SignedHeader given the currently known state. There
are two different types of Verifiers provided. are two different types of Verifiers provided.
@ -56,11 +54,10 @@ greater).
DynamicVerifier - this Verifier implements an auto-update and persistence DynamicVerifier - this Verifier implements an auto-update and persistence
strategy to verify any SignedHeader of the blockchain. strategy to verify any SignedHeader of the blockchain.
## Provider and PersistentProvider Provider and PersistentProvider
A Provider allows us to store and retrieve the FullCommits. A Provider allows us to store and retrieve the FullCommits.
```go
type Provider interface { type Provider interface {
// LatestFullCommit returns the latest commit with // LatestFullCommit returns the latest commit with
// minHeight <= height <= maxHeight. // minHeight <= height <= maxHeight.
@ -68,23 +65,21 @@ type Provider interface {
// minHeight <= height. // minHeight <= height.
LatestFullCommit(chainID string, minHeight, maxHeight int64) (FullCommit, error) LatestFullCommit(chainID string, minHeight, maxHeight int64) (FullCommit, error)
} }
```
* client.NewHTTPProvider - query Tendermint rpc. * client.NewHTTPProvider - query Tendermint rpc.
A PersistentProvider is a Provider that also allows for saving state. This is A PersistentProvider is a Provider that also allows for saving state. This is
used by the DynamicVerifier for persistence. used by the DynamicVerifier for persistence.
```go
type PersistentProvider interface { type PersistentProvider interface {
Provider Provider
// SaveFullCommit saves a FullCommit (without verification). // SaveFullCommit saves a FullCommit (without verification).
SaveFullCommit(fc FullCommit) error SaveFullCommit(fc FullCommit) error
} }
```
* DBProvider - persistence provider for use with any libs/DB. * DBProvider - persistence provider for use with any libs/DB.
* MultiProvider - combine multiple providers. * MultiProvider - combine multiple providers.
The suggested use for local light clients is client.NewHTTPProvider(...) for The suggested use for local light clients is client.NewHTTPProvider(...) for
@ -93,7 +88,7 @@ dbm.NewMemDB()), NewDBProvider("label", db.NewFileDB(...))) to store confirmed
full commits (Trusted) full commits (Trusted)
# How We Track Validators How We Track Validators
Unless you want to blindly trust the node you talk with, you need to trace Unless you want to blindly trust the node you talk with, you need to trace
every response back to a hash in a block header and validate the commit every response back to a hash in a block header and validate the commit

View File

@ -11,6 +11,7 @@ type ConnSet interface {
HasIP(net.IP) bool HasIP(net.IP) bool
Set(net.Conn, []net.IP) Set(net.Conn, []net.IP)
Remove(net.Conn) Remove(net.Conn)
RemoveAddr(net.Addr)
} }
type connSetItem struct { type connSetItem struct {
@ -62,6 +63,13 @@ func (cs *connSet) Remove(c net.Conn) {
delete(cs.conns, c.RemoteAddr().String()) delete(cs.conns, c.RemoteAddr().String())
} }
func (cs *connSet) RemoveAddr(addr net.Addr) {
cs.Lock()
defer cs.Unlock()
delete(cs.conns, addr.String())
}
func (cs *connSet) Set(c net.Conn, ips []net.IP) { func (cs *connSet) Set(c net.Conn, ips []net.IP) {
cs.Lock() cs.Lock()
defer cs.Unlock() defer cs.Unlock()

View File

@ -55,6 +55,16 @@ func (p *peer) RemoteIP() net.IP {
return net.ParseIP("127.0.0.1") return net.ParseIP("127.0.0.1")
} }
// Addr always returns tcp://localhost:8800.
func (p *peer) RemoteAddr() net.Addr {
return &net.TCPAddr{IP: net.ParseIP("127.0.0.1"), Port: 8800}
}
// CloseConn always returns nil.
func (p *peer) CloseConn() error {
return nil
}
// Status always returns empry connection status. // Status always returns empry connection status.
func (p *peer) Status() tmconn.ConnectionStatus { func (p *peer) Status() tmconn.ConnectionStatus {
return tmconn.ConnectionStatus{} return tmconn.ConnectionStatus{}

View File

@ -20,13 +20,16 @@ type Peer interface {
ID() ID // peer's cryptographic ID ID() ID // peer's cryptographic ID
RemoteIP() net.IP // remote IP of the connection RemoteIP() net.IP // remote IP of the connection
RemoteAddr() net.Addr // remote address of the connection
IsOutbound() bool // did we dial the peer IsOutbound() bool // did we dial the peer
IsPersistent() bool // do we redial this peer when we disconnect IsPersistent() bool // do we redial this peer when we disconnect
CloseConn() error // close original connection
NodeInfo() NodeInfo // peer's info NodeInfo() NodeInfo // peer's info
Status() tmconn.ConnectionStatus Status() tmconn.ConnectionStatus
OriginalAddr() *NetAddress OriginalAddr() *NetAddress // original address for outbound peers
Send(byte, []byte) bool Send(byte, []byte) bool
TrySend(byte, []byte) bool TrySend(byte, []byte) bool
@ -296,6 +299,11 @@ func (p *peer) hasChannel(chID byte) bool {
return false return false
} }
// CloseConn closes original connection. Used for cleaning up in cases where the peer had not been started at all.
func (p *peer) CloseConn() error {
return p.peerConn.conn.Close()
}
//--------------------------------------------------- //---------------------------------------------------
// methods only used for testing // methods only used for testing
// TODO: can we remove these? // TODO: can we remove these?
@ -305,8 +313,8 @@ func (pc *peerConn) CloseConn() {
pc.conn.Close() // nolint: errcheck pc.conn.Close() // nolint: errcheck
} }
// Addr returns peer's remote network address. // RemoteAddr returns peer's remote network address.
func (p *peer) Addr() net.Addr { func (p *peer) RemoteAddr() net.Addr {
return p.peerConn.conn.RemoteAddr() return p.peerConn.conn.RemoteAddr()
} }

View File

@ -30,6 +30,8 @@ func (mp *mockPeer) Get(s string) interface{} { return s }
func (mp *mockPeer) Set(string, interface{}) {} func (mp *mockPeer) Set(string, interface{}) {}
func (mp *mockPeer) RemoteIP() net.IP { return mp.ip } func (mp *mockPeer) RemoteIP() net.IP { return mp.ip }
func (mp *mockPeer) OriginalAddr() *NetAddress { return nil } func (mp *mockPeer) OriginalAddr() *NetAddress { return nil }
func (mp *mockPeer) RemoteAddr() net.Addr { return &net.TCPAddr{IP: mp.ip, Port: 8800} }
func (mp *mockPeer) CloseConn() error { return nil }
// Returns a mock peer // Returns a mock peer
func newMockPeer(ip net.IP) *mockPeer { func newMockPeer(ip net.IP) *mockPeer {

View File

@ -39,7 +39,7 @@ func TestPeerBasic(t *testing.T) {
assert.False(p.IsPersistent()) assert.False(p.IsPersistent())
p.persistent = true p.persistent = true
assert.True(p.IsPersistent()) assert.True(p.IsPersistent())
assert.Equal(rp.Addr().DialString(), p.Addr().String()) assert.Equal(rp.Addr().DialString(), p.RemoteAddr().String())
assert.Equal(rp.ID(), p.ID()) assert.Equal(rp.ID(), p.ID())
} }
@ -137,9 +137,9 @@ type remotePeer struct {
PrivKey crypto.PrivKey PrivKey crypto.PrivKey
Config *config.P2PConfig Config *config.P2PConfig
addr *NetAddress addr *NetAddress
quit chan struct{}
channels cmn.HexBytes channels cmn.HexBytes
listenAddr string listenAddr string
listener net.Listener
} }
func (rp *remotePeer) Addr() *NetAddress { func (rp *remotePeer) Addr() *NetAddress {
@ -159,25 +159,45 @@ func (rp *remotePeer) Start() {
if e != nil { if e != nil {
golog.Fatalf("net.Listen tcp :0: %+v", e) golog.Fatalf("net.Listen tcp :0: %+v", e)
} }
rp.listener = l
rp.addr = NewNetAddress(PubKeyToID(rp.PrivKey.PubKey()), l.Addr()) rp.addr = NewNetAddress(PubKeyToID(rp.PrivKey.PubKey()), l.Addr())
rp.quit = make(chan struct{})
if rp.channels == nil { if rp.channels == nil {
rp.channels = []byte{testCh} rp.channels = []byte{testCh}
} }
go rp.accept(l) go rp.accept()
} }
func (rp *remotePeer) Stop() { func (rp *remotePeer) Stop() {
close(rp.quit) rp.listener.Close()
} }
func (rp *remotePeer) accept(l net.Listener) { func (rp *remotePeer) Dial(addr *NetAddress) (net.Conn, error) {
conn, err := addr.DialTimeout(1 * time.Second)
if err != nil {
return nil, err
}
pc, err := testInboundPeerConn(conn, rp.Config, rp.PrivKey)
if err != nil {
return nil, err
}
_, err = handshake(pc.conn, time.Second, rp.nodeInfo())
if err != nil {
return nil, err
}
return conn, err
}
func (rp *remotePeer) accept() {
conns := []net.Conn{} conns := []net.Conn{}
for { for {
conn, err := l.Accept() conn, err := rp.listener.Accept()
if err != nil { if err != nil {
golog.Fatalf("Failed to accept conn: %+v", err) golog.Printf("Failed to accept conn: %+v", err)
for _, conn := range conns {
_ = conn.Close()
}
return
} }
pc, err := testInboundPeerConn(conn, rp.Config, rp.PrivKey) pc, err := testInboundPeerConn(conn, rp.Config, rp.PrivKey)
@ -185,31 +205,20 @@ func (rp *remotePeer) accept(l net.Listener) {
golog.Fatalf("Failed to create a peer: %+v", err) golog.Fatalf("Failed to create a peer: %+v", err)
} }
_, err = handshake(pc.conn, time.Second, rp.nodeInfo(l)) _, err = handshake(pc.conn, time.Second, rp.nodeInfo())
if err != nil { if err != nil {
golog.Fatalf("Failed to perform handshake: %+v", err) golog.Fatalf("Failed to perform handshake: %+v", err)
} }
conns = append(conns, conn) conns = append(conns, conn)
select {
case <-rp.quit:
for _, conn := range conns {
if err := conn.Close(); err != nil {
golog.Fatal(err)
}
}
return
default:
}
} }
} }
func (rp *remotePeer) nodeInfo(l net.Listener) NodeInfo { func (rp *remotePeer) nodeInfo() NodeInfo {
return DefaultNodeInfo{ return DefaultNodeInfo{
ProtocolVersion: defaultProtocolVersion, ProtocolVersion: defaultProtocolVersion,
ID_: rp.Addr().ID, ID_: rp.Addr().ID,
ListenAddr: l.Addr().String(), ListenAddr: rp.listener.Addr().String(),
Network: "testing", Network: "testing",
Version: "1.2.3-rc0-deadbeef", Version: "1.2.3-rc0-deadbeef",
Channels: rp.channels, Channels: rp.channels,

View File

@ -471,7 +471,11 @@ func (r *PEXReactor) dialPeer(addr *p2p.NetAddress) {
attempts, lastDialed := r.dialAttemptsInfo(addr) attempts, lastDialed := r.dialAttemptsInfo(addr)
if attempts > maxAttemptsToDial { if attempts > maxAttemptsToDial {
r.Logger.Error("Reached max attempts to dial", "addr", addr, "attempts", attempts) // Do not log the message if the addr gets readded.
if attempts+1 == maxAttemptsToDial {
r.Logger.Info("Reached max attempts to dial", "addr", addr, "attempts", attempts)
r.attemptsToDial.Store(addr.DialString(), _attemptsToDial{attempts + 1, time.Now()})
}
r.book.MarkBad(addr) r.book.MarkBad(addr)
return return
} }

View File

@ -404,6 +404,8 @@ func (mockPeer) TrySend(byte, []byte) bool { return false }
func (mockPeer) Set(string, interface{}) {} func (mockPeer) Set(string, interface{}) {}
func (mockPeer) Get(string) interface{} { return nil } func (mockPeer) Get(string) interface{} { return nil }
func (mockPeer) OriginalAddr() *p2p.NetAddress { return nil } func (mockPeer) OriginalAddr() *p2p.NetAddress { return nil }
func (mockPeer) RemoteAddr() net.Addr { return &net.TCPAddr{IP: net.ParseIP("127.0.0.1"), Port: 8800} }
func (mockPeer) CloseConn() error { return nil }
func assertPeersWithTimeout( func assertPeersWithTimeout(
t *testing.T, t *testing.T,

View File

@ -210,6 +210,7 @@ func (sw *Switch) OnStart() error {
func (sw *Switch) OnStop() { func (sw *Switch) OnStop() {
// Stop peers // Stop peers
for _, p := range sw.peers.List() { for _, p := range sw.peers.List() {
sw.transport.Cleanup(p)
p.Stop() p.Stop()
if sw.peers.Remove(p) { if sw.peers.Remove(p) {
sw.metrics.Peers.Add(float64(-1)) sw.metrics.Peers.Add(float64(-1))
@ -304,6 +305,7 @@ func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) {
if sw.peers.Remove(peer) { if sw.peers.Remove(peer) {
sw.metrics.Peers.Add(float64(-1)) sw.metrics.Peers.Add(float64(-1))
} }
sw.transport.Cleanup(peer)
peer.Stop() peer.Stop()
for _, reactor := range sw.reactors { for _, reactor := range sw.reactors {
reactor.RemovePeer(peer, reason) reactor.RemovePeer(peer, reason)
@ -529,13 +531,16 @@ func (sw *Switch) acceptRoutine() {
"max", sw.config.MaxNumInboundPeers, "max", sw.config.MaxNumInboundPeers,
) )
_ = p.Stop() sw.transport.Cleanup(p)
continue continue
} }
if err := sw.addPeer(p); err != nil { if err := sw.addPeer(p); err != nil {
sw.transport.Cleanup(p)
if p.IsRunning() {
_ = p.Stop() _ = p.Stop()
}
sw.Logger.Info( sw.Logger.Info(
"Ignoring inbound connection: error while adding peer", "Ignoring inbound connection: error while adding peer",
"err", err, "err", err,
@ -593,7 +598,10 @@ func (sw *Switch) addOutboundPeerWithConfig(
} }
if err := sw.addPeer(p); err != nil { if err := sw.addPeer(p); err != nil {
sw.transport.Cleanup(p)
if p.IsRunning() {
_ = p.Stop() _ = p.Stop()
}
return err return err
} }
@ -628,7 +636,8 @@ func (sw *Switch) filterPeer(p Peer) error {
return nil return nil
} }
// addPeer starts up the Peer and adds it to the Switch. // addPeer starts up the Peer and adds it to the Switch. Error is returned if
// the peer is filtered out or failed to start or can't be added.
func (sw *Switch) addPeer(p Peer) error { func (sw *Switch) addPeer(p Peer) error {
if err := sw.filterPeer(p); err != nil { if err := sw.filterPeer(p); err != nil {
return err return err
@ -636,11 +645,15 @@ func (sw *Switch) addPeer(p Peer) error {
p.SetLogger(sw.Logger.With("peer", p.NodeInfo().NetAddress())) p.SetLogger(sw.Logger.With("peer", p.NodeInfo().NetAddress()))
// All good. Start peer // Handle the shut down case where the switch has stopped but we're
// concurrently trying to add a peer.
if sw.IsRunning() { if sw.IsRunning() {
// All good. Start peer
if err := sw.startInitPeer(p); err != nil { if err := sw.startInitPeer(p); err != nil {
return err return err
} }
} else {
sw.Logger.Error("Won't start a peer - switch is not running", "peer", p)
} }
// Add the peer to .peers. // Add the peer to .peers.

View File

@ -3,7 +3,9 @@ package p2p
import ( import (
"bytes" "bytes"
"fmt" "fmt"
"io"
"io/ioutil" "io/ioutil"
"net"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"regexp" "regexp"
@ -13,7 +15,6 @@ import (
"time" "time"
stdprometheus "github.com/prometheus/client_golang/prometheus" stdprometheus "github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@ -477,6 +478,58 @@ func TestSwitchFullConnectivity(t *testing.T) {
} }
} }
func TestSwitchAcceptRoutine(t *testing.T) {
cfg.MaxNumInboundPeers = 5
// make switch
sw := MakeSwitch(cfg, 1, "testing", "123.123.123", initSwitchFunc)
err := sw.Start()
require.NoError(t, err)
defer sw.Stop()
remotePeers := make([]*remotePeer, 0)
assert.Equal(t, 0, sw.Peers().Size())
// 1. check we connect up to MaxNumInboundPeers
for i := 0; i < cfg.MaxNumInboundPeers; i++ {
rp := &remotePeer{PrivKey: ed25519.GenPrivKey(), Config: cfg}
remotePeers = append(remotePeers, rp)
rp.Start()
c, err := rp.Dial(sw.NodeInfo().NetAddress())
require.NoError(t, err)
// spawn a reading routine to prevent connection from closing
go func(c net.Conn) {
for {
one := make([]byte, 1)
_, err := c.Read(one)
if err != nil {
return
}
}
}(c)
}
time.Sleep(10 * time.Millisecond)
assert.Equal(t, cfg.MaxNumInboundPeers, sw.Peers().Size())
// 2. check we close new connections if we already have MaxNumInboundPeers peers
rp := &remotePeer{PrivKey: ed25519.GenPrivKey(), Config: cfg}
rp.Start()
conn, err := rp.Dial(sw.NodeInfo().NetAddress())
require.NoError(t, err)
// check conn is closed
one := make([]byte, 1)
conn.SetReadDeadline(time.Now().Add(10 * time.Millisecond))
_, err = conn.Read(one)
assert.Equal(t, io.EOF, err)
assert.Equal(t, cfg.MaxNumInboundPeers, sw.Peers().Size())
rp.Stop()
// stop remote peers
for _, rp := range remotePeers {
rp.Stop()
}
}
func BenchmarkSwitchBroadcast(b *testing.B) { func BenchmarkSwitchBroadcast(b *testing.B) {
s1, s2 := MakeSwitchPair(b, func(i int, sw *Switch) *Switch { s1, s2 := MakeSwitchPair(b, func(i int, sw *Switch) *Switch {
// Make bar reactors of bar channels each // Make bar reactors of bar channels each

View File

@ -247,17 +247,35 @@ func testNodeInfo(id ID, name string) NodeInfo {
} }
func testNodeInfoWithNetwork(id ID, name, network string) NodeInfo { func testNodeInfoWithNetwork(id ID, name, network string) NodeInfo {
port, err := getFreePort()
if err != nil {
panic(err)
}
return DefaultNodeInfo{ return DefaultNodeInfo{
ProtocolVersion: defaultProtocolVersion, ProtocolVersion: defaultProtocolVersion,
ID_: id, ID_: id,
ListenAddr: fmt.Sprintf("127.0.0.1:%d", cmn.RandIntn(64512)+1023), ListenAddr: fmt.Sprintf("127.0.0.1:%d", port),
Network: network, Network: network,
Version: "1.2.3-rc0-deadbeef", Version: "1.2.3-rc0-deadbeef",
Channels: []byte{testCh}, Channels: []byte{testCh},
Moniker: name, Moniker: name,
Other: DefaultNodeInfoOther{ Other: DefaultNodeInfoOther{
TxIndex: "on", TxIndex: "on",
RPCAddress: fmt.Sprintf("127.0.0.1:%d", cmn.RandIntn(64512)+1023), RPCAddress: fmt.Sprintf("127.0.0.1:%d", port),
}, },
} }
} }
func getFreePort() (int, error) {
addr, err := net.ResolveTCPAddr("tcp", "localhost:0")
if err != nil {
return 0, err
}
l, err := net.ListenTCP("tcp", addr)
if err != nil {
return 0, err
}
defer l.Close()
return l.Addr().(*net.TCPAddr).Port, nil
}

View File

@ -52,6 +52,9 @@ type Transport interface {
// Dial connects to the Peer for the address. // Dial connects to the Peer for the address.
Dial(NetAddress, peerConfig) (Peer, error) Dial(NetAddress, peerConfig) (Peer, error)
// Cleanup any resources associated with Peer.
Cleanup(Peer)
} }
// transportLifecycle bundles the methods for callers to control start and stop // transportLifecycle bundles the methods for callers to control start and stop
@ -274,6 +277,13 @@ func (mt *MultiplexTransport) acceptPeers() {
} }
} }
// Cleanup removes the given address from the connections set and
// closes the connection.
func (mt *MultiplexTransport) Cleanup(peer Peer) {
mt.conns.RemoveAddr(peer.RemoteAddr())
_ = peer.CloseConn()
}
func (mt *MultiplexTransport) cleanup(c net.Conn) error { func (mt *MultiplexTransport) cleanup(c net.Conn) error {
mt.conns.Remove(c) mt.conns.Remove(c)
@ -418,12 +428,6 @@ func (mt *MultiplexTransport) wrapPeer(
PeerMetrics(cfg.metrics), PeerMetrics(cfg.metrics),
) )
// Wait for Peer to Stop so we can cleanup.
go func(c net.Conn) {
<-p.Quit()
_ = mt.cleanup(c)
}(c)
return p return p
} }

View File

@ -18,7 +18,7 @@ const (
// TMCoreSemVer is the current version of Tendermint Core. // TMCoreSemVer is the current version of Tendermint Core.
// It's the Semantic Version of the software. // It's the Semantic Version of the software.
// Must be a string because scripts like dist.sh read this file. // Must be a string because scripts like dist.sh read this file.
TMCoreSemVer = "0.29.0" TMCoreSemVer = "0.29.1"
// ABCISemVer is the semantic version of the ABCI library // ABCISemVer is the semantic version of the ABCI library
ABCISemVer = "0.15.0" ABCISemVer = "0.15.0"