mirror of
https://github.com/fluencelabs/tendermint
synced 2025-06-14 05:41:21 +00:00
some notes about the p2p layer
This commit is contained in:
@ -7,7 +7,8 @@ to good peers and to gossip peers to others.
|
||||
## Peer Types
|
||||
|
||||
Certain peers are special in that they are specified by the user as `persistent`,
|
||||
which means we auto-redial them if the connection fails.
|
||||
which means we auto-redial them if the connection fails, or if we fail to dial
|
||||
them.
|
||||
Some peers can be marked as `private`, which means
|
||||
we will not put them in the address book or gossip them to others.
|
||||
|
||||
@ -19,6 +20,11 @@ Peer discovery begins with a list of seeds.
|
||||
When we have no peers, or have been unable to find enough peers from existing ones,
|
||||
we dial a randomly selected seed to get a list of peers to dial.
|
||||
|
||||
On startup, we will also immediately dial the given list of `persisten_peers`,
|
||||
and will attempt to maintain persistent connections with them. If the connections die,
|
||||
we will redial every 5s for a a few minutes, then switch to an exponential backoff schedule,
|
||||
and after about a day of trying, stop dialing the peer.
|
||||
|
||||
So long as we have less than `MaxPeers`, we periodically request additional peers
|
||||
from each of our own. If sufficient time goes by and we still can't find enough peers,
|
||||
we try the seeds again.
|
||||
@ -68,6 +74,19 @@ Connection attempts are made with exponential backoff (plus jitter). Because
|
||||
the selection process happens every `ensurePeersPeriod`, we might not end up
|
||||
dialing a peer for much longer than the backoff duration.
|
||||
|
||||
.......
|
||||
-----------------
|
||||
TODO: put this in the right place. And add more details.
|
||||
|
||||
AddrBook: If there's no space in the book, we check the bucket for bad peers, which include peers we've attempted to
|
||||
dial 3 times, and then remove them from only that bucket.
|
||||
|
||||
PEX: if we fail to conenct to the peer after 16 tries (with exponential backoff), we remove from address book completely.
|
||||
|
||||
-----------------
|
||||
.......
|
||||
|
||||
|
||||
## Select Peers to Exchange
|
||||
|
||||
When we’re asked for peers, we select them as follows:
|
||||
@ -86,9 +105,9 @@ Note that the bad behaviour may be detected outside the PEX reactor itself
|
||||
(for instance, in the mconnection, or another reactor), but it must be communicated to the PEX reactor
|
||||
so it can remove and mark the peer.
|
||||
|
||||
In the PEX, if a peer sends us unsolicited lists of peers,
|
||||
or if the peer sends too many requests for more peers in a given amount of time,
|
||||
we Disconnect and Mark.
|
||||
In the PEX, if a peer sends us an unsolicited list of peers,
|
||||
or if the peer sends a request too soon after another one,
|
||||
we Disconnect and MarkBad.
|
||||
|
||||
## Trust Metric
|
||||
|
||||
|
34
node/node.go
34
node/node.go
@ -6,6 +6,7 @@ import (
|
||||
"fmt"
|
||||
"net"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
abci "github.com/tendermint/abci/types"
|
||||
amino "github.com/tendermint/go-amino"
|
||||
@ -21,7 +22,6 @@ import (
|
||||
mempl "github.com/tendermint/tendermint/mempool"
|
||||
"github.com/tendermint/tendermint/p2p"
|
||||
"github.com/tendermint/tendermint/p2p/pex"
|
||||
"github.com/tendermint/tendermint/p2p/trust"
|
||||
"github.com/tendermint/tendermint/proxy"
|
||||
rpccore "github.com/tendermint/tendermint/rpc/core"
|
||||
ctypes "github.com/tendermint/tendermint/rpc/core/types"
|
||||
@ -101,7 +101,7 @@ type Node struct {
|
||||
// network
|
||||
sw *p2p.Switch // p2p connections
|
||||
addrBook pex.AddrBook // known peers
|
||||
trustMetricStore *trust.TrustMetricStore // trust metrics for all peers
|
||||
// trustMetricStore *trust.TrustMetricStore // trust metrics for all peers
|
||||
|
||||
// services
|
||||
eventBus *types.EventBus // pub/sub for services
|
||||
@ -262,20 +262,33 @@ func NewNode(config *cfg.Config,
|
||||
sw.AddReactor("EVIDENCE", evidenceReactor)
|
||||
|
||||
// Optionally, start the pex reactor
|
||||
//
|
||||
// TODO:
|
||||
//
|
||||
// We need to set Seeds and PersistentPeers on the switch,
|
||||
// since it needs to be able to use these (and their DNS names)
|
||||
// even if the PEX is off. We can include the DNS name in the NetAddress,
|
||||
// but it would still be nice to have a clear list of the current "PersistentPeers"
|
||||
// somewhere that we can return with net_info.
|
||||
//
|
||||
// Let's assume we always have IDs ... and we just dont authenticate them
|
||||
// if auth_enc=false.
|
||||
//
|
||||
// If PEX is on, it should handle dialing the seeds. Otherwise the switch does it.
|
||||
var addrBook pex.AddrBook
|
||||
var trustMetricStore *trust.TrustMetricStore
|
||||
if config.P2P.PexReactor {
|
||||
addrBook = pex.NewAddrBook(config.P2P.AddrBookFile(), config.P2P.AddrBookStrict)
|
||||
addrBook.SetLogger(p2pLogger.With("book", config.P2P.AddrBookFile()))
|
||||
|
||||
// Get the trust metric history data
|
||||
trustHistoryDB, err := dbProvider(&DBContext{"trusthistory", config})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
var seeds []string
|
||||
if config.P2P.Seeds != "" {
|
||||
seeds = strings.Split(config.P2P.Seeds, ",")
|
||||
}
|
||||
trustMetricStore = trust.NewTrustMetricStore(trustHistoryDB, trust.DefaultConfig())
|
||||
trustMetricStore.SetLogger(p2pLogger)
|
||||
|
||||
var privatePeerIDs []string
|
||||
if config.P2P.PrivatePeerIDs != "" {
|
||||
privatePeerIDs = strings.Split(config.P2P.PrivatePeerIDs, ",")
|
||||
}
|
||||
// TODO persistent peers ? so we can have their DNS addrs saved
|
||||
pexReactor := pex.NewPEXReactor(addrBook,
|
||||
&pex.PEXReactorConfig{
|
||||
Seeds: cmn.SplitAndTrim(config.P2P.Seeds, ",", " "),
|
||||
@ -357,7 +370,6 @@ func NewNode(config *cfg.Config,
|
||||
|
||||
sw: sw,
|
||||
addrBook: addrBook,
|
||||
trustMetricStore: trustMetricStore,
|
||||
|
||||
stateDB: stateDB,
|
||||
blockStore: blockStore,
|
||||
|
@ -22,6 +22,7 @@ type NetAddress struct {
|
||||
ID ID
|
||||
IP net.IP
|
||||
Port uint16
|
||||
Name string // optional DNS name
|
||||
str string
|
||||
}
|
||||
|
||||
|
@ -20,7 +20,10 @@ const (
|
||||
// PexChannel is a channel for PEX messages
|
||||
PexChannel = byte(0x00)
|
||||
|
||||
maxMsgSize = 1048576 // 1MB
|
||||
// TODO: make smaller. Should match the maxGetSelection
|
||||
// this is basically the amplification factor since a request
|
||||
// msg is like 1 byte ... it can cause us to send msgs of this size!
|
||||
maxPexMessageSize = 1048576 // 1MB
|
||||
|
||||
// ensure we have enough peers
|
||||
defaultEnsurePeersPeriod = 30 * time.Second
|
||||
@ -61,7 +64,7 @@ type PEXReactor struct {
|
||||
|
||||
book AddrBook
|
||||
config *PEXReactorConfig
|
||||
ensurePeersPeriod time.Duration
|
||||
ensurePeersPeriod time.Duration // TODO: should go in the config
|
||||
|
||||
// maps to prevent abuse
|
||||
requestsSent *cmn.CMap // ID->struct{}: unanswered send requests
|
||||
@ -70,6 +73,12 @@ type PEXReactor struct {
|
||||
attemptsToDial sync.Map // address (string) -> {number of attempts (int), last time dialed (time.Time)}
|
||||
}
|
||||
|
||||
func (pexR *PEXReactor) minReceiveRequestInterval() time.Duration {
|
||||
// NOTE: must be less than ensurePeersPeriod, otherwise we'll request
|
||||
// peers too quickly from others and they'll think we're bad!
|
||||
return pexR.ensurePeersPeriod / 3
|
||||
}
|
||||
|
||||
// PEXReactorConfig holds reactor specific configuration data.
|
||||
type PEXReactorConfig struct {
|
||||
// Seed/Crawler mode
|
||||
@ -113,6 +122,9 @@ func (r *PEXReactor) OnStart() error {
|
||||
}
|
||||
|
||||
// return err if user provided a bad seed address
|
||||
// NOTE: only if its an invalid address.
|
||||
// If we simply fail to resovle a DNS name,
|
||||
// we shouldn't exit here ...
|
||||
if err := r.checkSeeds(); err != nil {
|
||||
return err
|
||||
}
|
||||
@ -195,6 +207,10 @@ func (r *PEXReactor) Receive(chID byte, src Peer, msgBytes []byte) {
|
||||
}
|
||||
|
||||
// Seeds disconnect after sending a batch of addrs
|
||||
// NOTE: this is a prime candidate for amplification attacks
|
||||
// so it's important we
|
||||
// 1) restrict how frequently peers can request
|
||||
// 2) limit the output size
|
||||
if r.config.SeedMode {
|
||||
r.SendAddrs(src, r.book.GetSelectionWithBias(biasToSelectNewPeers))
|
||||
r.Switch.StopPeerGracefully(src)
|
||||
@ -213,6 +229,7 @@ func (r *PEXReactor) Receive(chID byte, src Peer, msgBytes []byte) {
|
||||
}
|
||||
}
|
||||
|
||||
// enforces a minimum amount of time between requests
|
||||
func (r *PEXReactor) receiveRequest(src Peer) error {
|
||||
id := string(src.ID())
|
||||
v := r.lastReceivedRequests.Get(id)
|
||||
@ -232,8 +249,14 @@ func (r *PEXReactor) receiveRequest(src Peer) error {
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
if now.Sub(lastReceived) < r.ensurePeersPeriod/3 {
|
||||
return fmt.Errorf("Peer (%v) is sending too many PEX requests. Disconnecting", src.ID())
|
||||
minInterval := r.minReceiveRequestInterval()
|
||||
if now.Sub(lastReceived) < minInterval {
|
||||
return fmt.Errorf("Peer (%v) send next PEX request too soon. lastReceived: %v, now: %v, minInterval: %v. Disconnecting",
|
||||
src.ID(),
|
||||
lastReceived,
|
||||
now,
|
||||
minInterval,
|
||||
)
|
||||
}
|
||||
r.lastReceivedRequests.Set(id, now)
|
||||
return nil
|
||||
@ -264,7 +287,11 @@ func (r *PEXReactor) ReceiveAddrs(addrs []*p2p.NetAddress, src Peer) error {
|
||||
|
||||
srcAddr := src.NodeInfo().NetAddress()
|
||||
for _, netAddr := range addrs {
|
||||
// TODO: make sure correct nodes never send nil and return error
|
||||
// if a netAddr == nil
|
||||
if netAddr != nil && !isAddrPrivate(netAddr, r.config.PrivatePeerIDs) {
|
||||
// TODO: Should we moe the list of private peers into the AddrBook so AddAddress
|
||||
// can do the check for us, and we don't have to worry about checking before calling ?
|
||||
err := r.book.AddAddress(netAddr, srcAddr)
|
||||
if err != nil {
|
||||
r.Logger.Error("Failed to add new address", "err", err)
|
||||
@ -360,6 +387,9 @@ func (r *PEXReactor) ensurePeers() {
|
||||
if connected := r.Switch.Peers().Has(try.ID); connected {
|
||||
continue
|
||||
}
|
||||
// TODO: consider moving some checks from toDial into here
|
||||
// so we don't even consider dialing peers that we want to wait
|
||||
// before dialling again, or have dialled too many times already
|
||||
r.Logger.Info("Will dial address", "addr", try)
|
||||
toDial[try.ID] = try
|
||||
}
|
||||
@ -387,13 +417,17 @@ func (r *PEXReactor) ensurePeers() {
|
||||
}
|
||||
}
|
||||
|
||||
func (r *PEXReactor) dialPeer(addr *p2p.NetAddress) {
|
||||
var attempts int
|
||||
var lastDialed time.Time
|
||||
if lAttempts, attempted := r.attemptsToDial.Load(addr.DialString()); attempted {
|
||||
attempts = lAttempts.(_attemptsToDial).number
|
||||
lastDialed = lAttempts.(_attemptsToDial).lastDialed
|
||||
func (r *PEXReactor) dialAttemptsInfo(addr *p2p.NetAddress) (attempts int, lastDialed time.Time) {
|
||||
_attempts, ok := r.attemptsToDial.Load(addr.DialString())
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
atd := _attempts.(_attemptsToDial)
|
||||
return atd.number, atd.lastDialed
|
||||
}
|
||||
|
||||
func (r *PEXReactor) dialPeer(addr *p2p.NetAddress) {
|
||||
attempts, lastDialed := r.dialAttemptsInfo(addr)
|
||||
|
||||
if attempts > maxAttemptsToDial {
|
||||
r.Logger.Error("Reached max attempts to dial", "addr", addr, "attempts", attempts)
|
||||
@ -439,6 +473,9 @@ func (r *PEXReactor) checkSeeds() error {
|
||||
if lSeeds == 0 {
|
||||
return nil
|
||||
}
|
||||
// TODO: don't exit the program if we simply cant resolve a DNS name.
|
||||
// But if names or addresses are incorrectly speficied (ie. invalid),
|
||||
// then we should return an err that causes an exit
|
||||
_, errs := p2p.NewNetAddressStrings(r.config.Seeds)
|
||||
for _, err := range errs {
|
||||
if err != nil {
|
||||
|
@ -351,6 +351,7 @@ func (sw *Switch) IsDialing(id ID) bool {
|
||||
}
|
||||
|
||||
// DialPeersAsync dials a list of peers asynchronously in random order (optionally, making them persistent).
|
||||
// TODO: remove addrBook arg since it's now set on the switch
|
||||
func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent bool) error {
|
||||
netAddrs, errs := NewNetAddressStrings(peers)
|
||||
// only log errors, dial correct addresses
|
||||
@ -360,7 +361,10 @@ func (sw *Switch) DialPeersAsync(addrBook AddrBook, peers []string, persistent b
|
||||
|
||||
ourAddr := sw.nodeInfo.NetAddress()
|
||||
|
||||
// TODO: move this out of here ?
|
||||
// TODO: this code feels like it's in the wrong place.
|
||||
// The integration tests depend on the addrBook being saved
|
||||
// right away but maybe we can change that. Recall that
|
||||
// the addrBook is only written to disk every 2min
|
||||
if addrBook != nil {
|
||||
// add peers to `addrBook`
|
||||
for _, netAddr := range netAddrs {
|
||||
|
@ -47,6 +47,8 @@ func NetInfo() (*ctypes.ResultNetInfo, error) {
|
||||
ConnectionStatus: peer.Status(),
|
||||
})
|
||||
}
|
||||
// TODO: should we include "num_peers" field for convenience ?
|
||||
// Let's also include the PersistentPeers and Seeds in here.
|
||||
return &ctypes.ResultNetInfo{
|
||||
Listening: listening,
|
||||
Listeners: listeners,
|
||||
|
Reference in New Issue
Block a user