mirror of
https://github.com/fluencelabs/tendermint
synced 2025-06-10 20:01:20 +00:00
@ -24,6 +24,7 @@ IMPROVEMENTS:
|
|||||||
BUG FIXES:
|
BUG FIXES:
|
||||||
- Graceful handling/recovery for apps that have non-determinism or fail to halt
|
- Graceful handling/recovery for apps that have non-determinism or fail to halt
|
||||||
- Graceful handling/recovery for violations of safety, or liveness
|
- Graceful handling/recovery for violations of safety, or liveness
|
||||||
|
- Fix reconnect to persistent peer when first dial fails
|
||||||
|
|
||||||
## 0.17.1 (March 27th, 2018)
|
## 0.17.1 (March 27th, 2018)
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ BREAKING CHANGES:
|
|||||||
- Remove or unexport methods from FuzzedConnection: Active, Mode, ProbDropRW, ProbDropConn, ProbSleep, MaxDelayMilliseconds, Fuzz
|
- Remove or unexport methods from FuzzedConnection: Active, Mode, ProbDropRW, ProbDropConn, ProbSleep, MaxDelayMilliseconds, Fuzz
|
||||||
- switch.AddPeerWithConnection is unexported and replaced by switch.AddPeer
|
- switch.AddPeerWithConnection is unexported and replaced by switch.AddPeer
|
||||||
- switch.DialPeerWithAddress takes a bool, setting the peer as persistent or not
|
- switch.DialPeerWithAddress takes a bool, setting the peer as persistent or not
|
||||||
|
- PeerConfig requires a Dial function
|
||||||
|
|
||||||
FEATURES:
|
FEATURES:
|
||||||
|
|
||||||
|
@ -87,6 +87,8 @@ func newPeer(pc peerConn, nodeInfo NodeInfo,
|
|||||||
type PeerConfig struct {
|
type PeerConfig struct {
|
||||||
AuthEnc bool `mapstructure:"auth_enc"` // authenticated encryption
|
AuthEnc bool `mapstructure:"auth_enc"` // authenticated encryption
|
||||||
|
|
||||||
|
Dial func(addr *NetAddress, config *PeerConfig) (net.Conn, error)
|
||||||
|
|
||||||
// times are in seconds
|
// times are in seconds
|
||||||
HandshakeTimeout time.Duration `mapstructure:"handshake_timeout"`
|
HandshakeTimeout time.Duration `mapstructure:"handshake_timeout"`
|
||||||
DialTimeout time.Duration `mapstructure:"dial_timeout"`
|
DialTimeout time.Duration `mapstructure:"dial_timeout"`
|
||||||
@ -101,6 +103,7 @@ type PeerConfig struct {
|
|||||||
func DefaultPeerConfig() *PeerConfig {
|
func DefaultPeerConfig() *PeerConfig {
|
||||||
return &PeerConfig{
|
return &PeerConfig{
|
||||||
AuthEnc: true,
|
AuthEnc: true,
|
||||||
|
Dial: dial,
|
||||||
HandshakeTimeout: 20, // * time.Second,
|
HandshakeTimeout: 20, // * time.Second,
|
||||||
DialTimeout: 3, // * time.Second,
|
DialTimeout: 3, // * time.Second,
|
||||||
MConfig: tmconn.DefaultMConnConfig(),
|
MConfig: tmconn.DefaultMConnConfig(),
|
||||||
@ -112,7 +115,7 @@ func DefaultPeerConfig() *PeerConfig {
|
|||||||
func newOutboundPeerConn(addr *NetAddress, config *PeerConfig, persistent bool, ourNodePrivKey crypto.PrivKey) (peerConn, error) {
|
func newOutboundPeerConn(addr *NetAddress, config *PeerConfig, persistent bool, ourNodePrivKey crypto.PrivKey) (peerConn, error) {
|
||||||
var pc peerConn
|
var pc peerConn
|
||||||
|
|
||||||
conn, err := dial(addr, config)
|
conn, err := config.Dial(addr, config)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return pc, errors.Wrap(err, "Error creating peer")
|
return pc, errors.Wrap(err, "Error creating peer")
|
||||||
}
|
}
|
||||||
|
@ -56,6 +56,7 @@ type Switch struct {
|
|||||||
reactorsByCh map[byte]Reactor
|
reactorsByCh map[byte]Reactor
|
||||||
peers *PeerSet
|
peers *PeerSet
|
||||||
dialing *cmn.CMap
|
dialing *cmn.CMap
|
||||||
|
reconnecting *cmn.CMap
|
||||||
nodeInfo NodeInfo // our node info
|
nodeInfo NodeInfo // our node info
|
||||||
nodeKey *NodeKey // our node privkey
|
nodeKey *NodeKey // our node privkey
|
||||||
addrBook AddrBook
|
addrBook AddrBook
|
||||||
@ -75,6 +76,7 @@ func NewSwitch(config *cfg.P2PConfig) *Switch {
|
|||||||
reactorsByCh: make(map[byte]Reactor),
|
reactorsByCh: make(map[byte]Reactor),
|
||||||
peers: NewPeerSet(),
|
peers: NewPeerSet(),
|
||||||
dialing: cmn.NewCMap(),
|
dialing: cmn.NewCMap(),
|
||||||
|
reconnecting: cmn.NewCMap(),
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure we have a completely undeterministic PRNG. cmd.RandInt64() draws
|
// Ensure we have a completely undeterministic PRNG. cmd.RandInt64() draws
|
||||||
@ -255,7 +257,7 @@ func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) {
|
|||||||
sw.stopAndRemovePeer(peer, reason)
|
sw.stopAndRemovePeer(peer, reason)
|
||||||
|
|
||||||
if peer.IsPersistent() {
|
if peer.IsPersistent() {
|
||||||
go sw.reconnectToPeer(peer)
|
go sw.reconnectToPeer(peer.NodeInfo().NetAddress())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -274,24 +276,28 @@ func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// reconnectToPeer tries to reconnect to the peer, first repeatedly
|
// reconnectToPeer tries to reconnect to the addr, first repeatedly
|
||||||
// with a fixed interval, then with exponential backoff.
|
// with a fixed interval, then with exponential backoff.
|
||||||
// If no success after all that, it stops trying, and leaves it
|
// If no success after all that, it stops trying, and leaves it
|
||||||
// to the PEX/Addrbook to find the peer again
|
// to the PEX/Addrbook to find the peer with the addr again
|
||||||
func (sw *Switch) reconnectToPeer(peer Peer) {
|
func (sw *Switch) reconnectToPeer(addr *NetAddress) {
|
||||||
// NOTE this will connect to the self reported address,
|
if sw.reconnecting.Has(string(addr.ID)) {
|
||||||
// not necessarily the original we dialed
|
return
|
||||||
netAddr := peer.NodeInfo().NetAddress()
|
}
|
||||||
|
|
||||||
|
sw.reconnecting.Set(string(addr.ID), addr)
|
||||||
|
defer sw.reconnecting.Delete(string(addr.ID))
|
||||||
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
sw.Logger.Info("Reconnecting to peer", "peer", peer)
|
sw.Logger.Info("Reconnecting to peer", "addr", addr)
|
||||||
for i := 0; i < reconnectAttempts; i++ {
|
for i := 0; i < reconnectAttempts; i++ {
|
||||||
if !sw.IsRunning() {
|
if !sw.IsRunning() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
err := sw.DialPeerWithAddress(netAddr, true)
|
err := sw.DialPeerWithAddress(addr, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "peer", peer)
|
sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
|
||||||
// sleep a set amount
|
// sleep a set amount
|
||||||
sw.randomSleep(reconnectInterval)
|
sw.randomSleep(reconnectInterval)
|
||||||
continue
|
continue
|
||||||
@ -301,7 +307,7 @@ func (sw *Switch) reconnectToPeer(peer Peer) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff",
|
sw.Logger.Error("Failed to reconnect to peer. Beginning exponential backoff",
|
||||||
"peer", peer, "elapsed", time.Since(start))
|
"addr", addr, "elapsed", time.Since(start))
|
||||||
for i := 0; i < reconnectBackOffAttempts; i++ {
|
for i := 0; i < reconnectBackOffAttempts; i++ {
|
||||||
if !sw.IsRunning() {
|
if !sw.IsRunning() {
|
||||||
return
|
return
|
||||||
@ -310,13 +316,13 @@ func (sw *Switch) reconnectToPeer(peer Peer) {
|
|||||||
// sleep an exponentially increasing amount
|
// sleep an exponentially increasing amount
|
||||||
sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i))
|
sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i))
|
||||||
sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second)
|
sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second)
|
||||||
err := sw.DialPeerWithAddress(netAddr, true)
|
err := sw.DialPeerWithAddress(addr, true)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return // success
|
return // success
|
||||||
}
|
}
|
||||||
sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "peer", peer)
|
sw.Logger.Info("Error reconnecting to peer. Trying again", "tries", i, "err", err, "addr", addr)
|
||||||
}
|
}
|
||||||
sw.Logger.Error("Failed to reconnect to peer. Giving up", "peer", peer, "elapsed", time.Since(start))
|
sw.Logger.Error("Failed to reconnect to peer. Giving up", "addr", addr, "elapsed", time.Since(start))
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetAddrBook allows to set address book on Switch.
|
// SetAddrBook allows to set address book on Switch.
|
||||||
@ -470,6 +476,7 @@ func (sw *Switch) addOutboundPeerWithConfig(addr *NetAddress, config *PeerConfig
|
|||||||
peerConn, err := newOutboundPeerConn(addr, config, persistent, sw.nodeKey.PrivKey)
|
peerConn, err := newOutboundPeerConn(addr, config, persistent, sw.nodeKey.PrivKey)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
sw.Logger.Error("Failed to dial peer", "address", addr, "err", err)
|
sw.Logger.Error("Failed to dial peer", "address", addr, "err", err)
|
||||||
|
go sw.reconnectToPeer(addr)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/pkg/errors"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
@ -23,6 +24,11 @@ var (
|
|||||||
config *cfg.P2PConfig
|
config *cfg.P2PConfig
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// badDial returns an error for testing dial errors
|
||||||
|
func badDial(addr *NetAddress, config *PeerConfig) (net.Conn, error) {
|
||||||
|
return nil, errors.New("dial err")
|
||||||
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
config = cfg.DefaultP2PConfig()
|
config = cfg.DefaultP2PConfig()
|
||||||
config.PexReactor = true
|
config.PexReactor = true
|
||||||
@ -295,6 +301,29 @@ func TestSwitchReconnectsToPersistentPeer(t *testing.T) {
|
|||||||
}
|
}
|
||||||
assert.NotZero(npeers)
|
assert.NotZero(npeers)
|
||||||
assert.False(peer.IsRunning())
|
assert.False(peer.IsRunning())
|
||||||
|
|
||||||
|
// simulate another remote peer
|
||||||
|
rp = &remotePeer{PrivKey: crypto.GenPrivKeyEd25519().Wrap(), Config: DefaultPeerConfig()}
|
||||||
|
rp.Start()
|
||||||
|
defer rp.Stop()
|
||||||
|
|
||||||
|
// simulate first time dial failure
|
||||||
|
peerConfig := DefaultPeerConfig()
|
||||||
|
peerConfig.Dial = badDial
|
||||||
|
err = sw.addOutboundPeerWithConfig(rp.Addr(), peerConfig, true)
|
||||||
|
require.NotNil(err)
|
||||||
|
|
||||||
|
// DialPeerWithAddres - sw.peerConfig resets the dialer
|
||||||
|
|
||||||
|
// TODO: same as above
|
||||||
|
for i := 0; i < 20; i++ {
|
||||||
|
time.Sleep(250 * time.Millisecond)
|
||||||
|
npeers = sw.Peers().Size()
|
||||||
|
if npeers > 1 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert.EqualValues(2, npeers)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestSwitchFullConnectivity(t *testing.T) {
|
func TestSwitchFullConnectivity(t *testing.T) {
|
||||||
|
Reference in New Issue
Block a user