p2p: make persistent prop independent of conn direction (#3593)

## Description

Previously only outbound peers can be persistent.

Now, even if the peer is inbound, if it's marked as persistent, when/if conn is lost,
Tendermint will try to reconnect. This part is actually optional and can be reverted.

Plus, seed won't disconnect from inbound peer if it's marked as
persistent. Fixes #3362

## Commits

* make persistent prop independent of conn direction

Previously only outbound peers can be persistent. Now, even if the peer
is inbound, if it's marked as persistent, when/if conn is lost,
Tendermint will try to reconnect.

Plus, seed won't disconnect from inbound peer if it's marked as
persistent. Fixes #3362

* fix TestPEXReactorDialPeer test

* add a changelog entry

* update changelog

* add two tests

* reformat code

* test UnsafeDialPeers and UnsafeDialSeeds

* add TestSwitchDialPeersAsync

* spec: update p2p/config spec

* fixes after Ismail's review

* Apply suggestions from code review

Co-Authored-By: melekes <anton.kalyaev@gmail.com>

* fix merge conflict

* remove sleep from TestPEXReactorDoesNotDisconnectFromPersistentPeerInSeedMode

We don't need it actually.
This commit is contained in:
Anton Kaliaev
2019-05-03 17:21:56 +04:00
committed by GitHub
parent 9926ae768e
commit 8711af608f
11 changed files with 323 additions and 104 deletions

View File

@@ -77,6 +77,8 @@ type Switch struct {
nodeInfo NodeInfo // our node info
nodeKey *NodeKey // our node privkey
addrBook AddrBook
// peers addresses with whom we'll maintain constant connection
persistentPeersAddrs []*NetAddress
transport Transport
@@ -104,16 +106,17 @@ func NewSwitch(
options ...SwitchOption,
) *Switch {
sw := &Switch{
config: cfg,
reactors: make(map[string]Reactor),
chDescs: make([]*conn.ChannelDescriptor, 0),
reactorsByCh: make(map[byte]Reactor),
peers: NewPeerSet(),
dialing: cmn.NewCMap(),
reconnecting: cmn.NewCMap(),
metrics: NopMetrics(),
transport: transport,
filterTimeout: defaultFilterTimeout,
config: cfg,
reactors: make(map[string]Reactor),
chDescs: make([]*conn.ChannelDescriptor, 0),
reactorsByCh: make(map[byte]Reactor),
peers: NewPeerSet(),
dialing: cmn.NewCMap(),
reconnecting: cmn.NewCMap(),
metrics: NopMetrics(),
transport: transport,
filterTimeout: defaultFilterTimeout,
persistentPeersAddrs: make([]*NetAddress, 0),
}
// Ensure we have a completely undeterministic PRNG.
@@ -297,7 +300,19 @@ func (sw *Switch) StopPeerForError(peer Peer, reason interface{}) {
sw.stopAndRemovePeer(peer, reason)
if peer.IsPersistent() {
go sw.reconnectToPeer(peer.SocketAddr())
var addr *NetAddress
if peer.IsOutbound() { // socket address for outbound peers
addr = peer.SocketAddr()
} else { // self-reported address for inbound peers
var err error
addr, err = peer.NodeInfo().NetAddress()
if err != nil {
sw.Logger.Error("Wanted to reconnect to inbound peer, but self-reported address is wrong",
"peer", peer, "addr", addr, "err", err)
return
}
}
go sw.reconnectToPeer(addr)
}
}
@@ -341,7 +356,7 @@ func (sw *Switch) reconnectToPeer(addr *NetAddress) {
return
}
err := sw.DialPeerWithAddress(addr, true)
err := sw.DialPeerWithAddress(addr)
if err == nil {
return // success
} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
@@ -365,7 +380,7 @@ func (sw *Switch) reconnectToPeer(addr *NetAddress) {
sleepIntervalSeconds := math.Pow(reconnectBackOffBaseSeconds, float64(i))
sw.randomSleep(time.Duration(sleepIntervalSeconds) * time.Second)
err := sw.DialPeerWithAddress(addr, true)
err := sw.DialPeerWithAddress(addr)
if err == nil {
return // success
} else if _, ok := err.(ErrCurrentlyDialingOrExistingAddress); ok {
@@ -401,28 +416,41 @@ func isPrivateAddr(err error) bool {
return ok && te.PrivateAddr()
}
// DialPeersAsync dials a list of peers asynchronously in random order (optionally, making them persistent).
// DialPeersAsync dials a list of peers asynchronously in random order.
// Used to dial peers from config on startup or from unsafe-RPC (trusted sources).
// TODO: remove addrBook arg since it's now set on the switch
func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent bool) error {
// It ignores ErrNetAddressLookup. However, if there are other errors, first
// encounter is returned.
// Nop if there are no peers.
func (sw *Switch) DialPeersAsync(peers []string) error {
netAddrs, errs := NewNetAddressStrings(peers)
// only log errors, dial correct addresses
// report all the errors
for _, err := range errs {
sw.Logger.Error("Error in peer's address", "err", err)
}
// return first non-ErrNetAddressLookup error
for _, err := range errs {
if _, ok := err.(ErrNetAddressLookup); ok {
continue
}
return err
}
sw.dialPeersAsync(netAddrs)
return nil
}
func (sw *Switch) dialPeersAsync(netAddrs []*NetAddress) {
ourAddr := sw.NetAddress()
// TODO: this code feels like it's in the wrong place.
// The integration tests depend on the addrBook being saved
// right away but maybe we can change that. Recall that
// the addrBook is only written to disk every 2min
if addrBook != nil {
if sw.addrBook != nil {
// add peers to `addrBook`
for _, netAddr := range netAddrs {
// do not add our address or ID
if !netAddr.Same(ourAddr) {
if err := addrBook.AddAddress(netAddr, ourAddr); err != nil {
if err := sw.addrBook.AddAddress(netAddr, ourAddr); err != nil {
if isPrivateAddr(err) {
sw.Logger.Debug("Won't add peer's address to addrbook", "err", err)
} else {
@@ -433,7 +461,7 @@ func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent b
}
// Persist some peers to disk right away.
// NOTE: integration tests depend on this
addrBook.Save()
sw.addrBook.Save()
}
// permute the list, dial them in random order.
@@ -450,7 +478,7 @@ func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent b
sw.randomSleep(0)
err := sw.DialPeerWithAddress(addr, persistent)
err := sw.DialPeerWithAddress(addr)
if err != nil {
switch err.(type) {
case ErrSwitchConnectToSelf, ErrSwitchDuplicatePeerID, ErrCurrentlyDialingOrExistingAddress:
@@ -461,16 +489,13 @@ func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent b
}
}(i)
}
return nil
}
// DialPeerWithAddress dials the given peer and runs sw.addPeer if it connects
// and authenticates successfully.
// If `persistent == true`, the switch will always try to reconnect to this
// peer if the connection ever fails.
// If we're currently dialing this address or it belongs to an existing peer,
// ErrCurrentlyDialingOrExistingAddress is returned.
func (sw *Switch) DialPeerWithAddress(addr *NetAddress, persistent bool) error {
func (sw *Switch) DialPeerWithAddress(addr *NetAddress) error {
if sw.IsDialingOrExistingAddress(addr) {
return ErrCurrentlyDialingOrExistingAddress{addr.String()}
}
@@ -478,7 +503,7 @@ func (sw *Switch) DialPeerWithAddress(addr *NetAddress, persistent bool) error {
sw.dialing.Set(string(addr.ID), addr)
defer sw.dialing.Delete(string(addr.ID))
return sw.addOutboundPeerWithConfig(addr, sw.config, persistent)
return sw.addOutboundPeerWithConfig(addr, sw.config)
}
// sleep for interval plus some random amount of ms on [0, dialRandomizerIntervalMilliseconds]
@@ -495,6 +520,38 @@ func (sw *Switch) IsDialingOrExistingAddress(addr *NetAddress) bool {
(!sw.config.AllowDuplicateIP && sw.peers.HasIP(addr.IP))
}
// AddPersistentPeers allows you to set persistent peers. It ignores
// ErrNetAddressLookup. However, if there are other errors, first encounter is
// returned.
func (sw *Switch) AddPersistentPeers(addrs []string) error {
sw.Logger.Info("Adding persistent peers", "addrs", addrs)
netAddrs, errs := NewNetAddressStrings(addrs)
// report all the errors
for _, err := range errs {
sw.Logger.Error("Error in peer's address", "err", err)
}
// return first non-ErrNetAddressLookup error
for _, err := range errs {
if _, ok := err.(ErrNetAddressLookup); ok {
continue
}
return err
}
sw.persistentPeersAddrs = netAddrs
return nil
}
func (sw *Switch) isPeerPersistentFn() func(*NetAddress) bool {
return func(na *NetAddress) bool {
for _, pa := range sw.persistentPeersAddrs {
if pa.Equals(na) {
return true
}
}
return false
}
}
func (sw *Switch) acceptRoutine() {
for {
p, err := sw.transport.Accept(peerConfig{
@@ -502,6 +559,7 @@ func (sw *Switch) acceptRoutine() {
onPeerError: sw.StopPeerForError,
reactorsByCh: sw.reactorsByCh,
metrics: sw.metrics,
isPersistent: sw.isPeerPersistentFn(),
})
if err != nil {
switch err := err.(type) {
@@ -581,13 +639,12 @@ func (sw *Switch) acceptRoutine() {
// dial the peer; make secret connection; authenticate against the dialed ID;
// add the peer.
// if dialing fails, start the reconnect loop. If handhsake fails, its over.
// If peer is started succesffuly, reconnectLoop will start when
// StopPeerForError is called
// if dialing fails, start the reconnect loop. If handshake fails, it's over.
// If peer is started successfully, reconnectLoop will start when
// StopPeerForError is called.
func (sw *Switch) addOutboundPeerWithConfig(
addr *NetAddress,
cfg *config.P2PConfig,
persistent bool,
) error {
sw.Logger.Info("Dialing peer", "address", addr)
@@ -600,7 +657,7 @@ func (sw *Switch) addOutboundPeerWithConfig(
p, err := sw.transport.Dial(*addr, peerConfig{
chDescs: sw.chDescs,
onPeerError: sw.StopPeerForError,
persistent: persistent,
isPersistent: sw.isPeerPersistentFn(),
reactorsByCh: sw.reactorsByCh,
metrics: sw.metrics,
})
@@ -619,7 +676,7 @@ func (sw *Switch) addOutboundPeerWithConfig(
// retry persistent peers after
// any dial error besides IsSelf()
if persistent {
if sw.isPeerPersistentFn()(addr) {
go sw.reconnectToPeer(addr)
}