Fix regression w.r.t. reporting of dial errors. (#1493)

* Fix regression w.r.t. reporting of dial errors.

PR [1440] introduced a regression w.r.t. the reporting of
dial errors. In particular, if a connection attempt fails
due to an invalid remote peer ID, any remaining addresses
for the same peer would not be tried (intentional) but
the dial failure would not be reported to the behaviour,
causing e.g. libp2p-kad queries to potentially stall.

In hindsight, I figured it is better to preserve the
previous behaviour to still try alternative addresses
of the peer even on invalid peer ID errors on an earlier
address. In particular because in the context of libp2p-kad
it is not uncommon for peers to report localhost addresses
while the local node actually has e.g. an ipfs node running
on that address, obviously with a different peer ID, which
is the scenario causing frequent invalid peer ID (mismatch)
errors when running the ipfs-kad example.

This commit thus restores the previous behaviour w.r.t.
trying all remaining addresses on invalid peer ID errors
as well as making sure `inject_dial_error` is always
called when the last attempt failed.

[1440]: https://github.com/libp2p/rust-libp2p/pull/1440.

* Remove an fmt::Debug requirement.
This commit is contained in:
Roman Borschel
2020-03-16 16:53:21 +01:00
committed by GitHub
parent 96cd509c60
commit 58ee13b630
8 changed files with 96 additions and 109 deletions

View File

@ -7,6 +7,7 @@ workflows:
- test
- test-wasm
- check-rustdoc-links
- integration-test
jobs:
test:
@ -90,3 +91,24 @@ jobs:
- ./target
- /usr/local/cargo
- /root/.cache/sccache
integration-test:
docker:
- image: rust
- image: ipfs/go-ipfs
steps:
- checkout
- restore_cache:
key: integration-test-cache-{{ epoch }}
- run:
name: Print Rust version
command: |
rustc --version
- run:
command: RUST_LOG=libp2p_swarm=debug,libp2p_kad=trace,libp2p_tcp=debug cargo run --example ipfs-kad
- save_cache:
key: integration-test-cache-{{ epoch }}
paths:
- "~/.cargo"
- "./target"

View File

@ -29,6 +29,10 @@ pub enum ConnectionError<THandlerErr> {
// TODO: Eventually this should also be a custom error?
IO(io::Error),
/// The connection was dropped because the connection limit
/// for a peer has been reached.
ConnectionLimit(ConnectionLimit),
/// The connection handler produced an error.
Handler(THandlerErr),
}
@ -44,6 +48,8 @@ where
write!(f, "Connection error: I/O error: {}", err),
ConnectionError::Handler(err) =>
write!(f, "Connection error: Handler error: {}", err),
ConnectionError::ConnectionLimit(l) =>
write!(f, "Connection error: Connection limit: {}.", l)
}
}
}
@ -57,6 +63,7 @@ where
match self {
ConnectionError::IO(err) => Some(err),
ConnectionError::Handler(err) => Some(err),
ConnectionError::ConnectionLimit(..) => None,
}
}
}
@ -71,10 +78,6 @@ pub enum PendingConnectionError<TTransErr> {
/// match the one that was expected or is otherwise invalid.
InvalidPeerId,
/// The pending connection was successfully negotiated but dropped
/// because the connection limit for a peer has been reached.
ConnectionLimit(ConnectionLimit),
/// An I/O error occurred on the connection.
// TODO: Eventually this should also be a custom error?
IO(io::Error),
@ -93,8 +96,6 @@ where
write!(f, "Pending connection: Transport error: {}", err),
PendingConnectionError::InvalidPeerId =>
write!(f, "Pending connection: Invalid peer ID."),
PendingConnectionError::ConnectionLimit(l) =>
write!(f, "Pending connection: Connection limit: {}.", l)
}
}
}
@ -109,7 +110,6 @@ where
PendingConnectionError::IO(err) => Some(err),
PendingConnectionError::Transport(err) => Some(err),
PendingConnectionError::InvalidPeerId => None,
PendingConnectionError::ConnectionLimit(..) => None,
}
}
}

View File

@ -112,7 +112,7 @@ pub enum PoolEvent<'a, TInEvent, TOutEvent, THandler, TTransErr, THandlerErr, TC
error: PendingConnectionError<TTransErr>,
/// The handler that was supposed to handle the connection,
/// if the connection failed before the handler was consumed.
handler: Option<THandler>,
handler: THandler,
/// The (expected) peer of the failed connection.
peer: Option<TPeerId>,
/// A reference to the pool that managed the connection.
@ -222,6 +222,7 @@ where
TOutEvent: Send + 'static,
TMuxer: StreamMuxer + Send + Sync + 'static,
TMuxer::OutboundSubstream: Send + 'static,
TPeerId: Clone + Send + 'static,
{
let endpoint = info.to_connected_point();
if let Some(limit) = self.limits.max_pending_incoming {
@ -263,7 +264,7 @@ where
TOutEvent: Send + 'static,
TMuxer: StreamMuxer + Send + Sync + 'static,
TMuxer::OutboundSubstream: Send + 'static,
TPeerId: Clone,
TPeerId: Clone + Send + 'static,
{
self.limits.check_outgoing(|| self.iter_pending_outgoing().count())?;
let endpoint = info.to_connected_point();
@ -298,14 +299,32 @@ where
TOutEvent: Send + 'static,
TMuxer: StreamMuxer + Send + Sync + 'static,
TMuxer::OutboundSubstream: Send + 'static,
TPeerId: Clone + Send + 'static,
{
// Validate the received peer ID as the last step of the pending connection
// future, so that these errors can be raised before the `handler` is consumed
// by the background task, which happens when this future resolves to an
// "established" connection.
let future = future.and_then({
let endpoint = endpoint.clone();
let expected_peer = peer.clone();
let local_id = self.local_id.clone();
move |(info, muxer)| {
if let Some(peer) = expected_peer {
if &peer != info.peer_id() {
return future::err(PendingConnectionError::InvalidPeerId)
}
}
if &local_id == info.peer_id() {
return future::err(PendingConnectionError::InvalidPeerId)
}
let connected = Connected { info, endpoint };
future::ready(Ok((connected, muxer)))
}
});
let id = self.manager.add_pending(future, handler);
self.pending.insert(id, (endpoint, peer));
id
@ -536,7 +555,7 @@ where
PoolEvent<'a, TInEvent, TOutEvent, THandler, TTransErr, THandlerErr, TConnInfo, TPeerId>
> where
TConnInfo: ConnectionInfo<PeerId = TPeerId> + Clone,
TPeerId: Clone,
TPeerId: Clone
{
loop {
let item = match self.manager.poll(cx) {
@ -551,7 +570,7 @@ where
id,
endpoint,
error,
handler: Some(handler),
handler,
peer,
pool: self
})
@ -581,39 +600,25 @@ where
.map_or(0, |conns| conns.len());
if let Err(e) = self.limits.check_established(current) {
let connected = entry.close();
return Poll::Ready(PoolEvent::PendingConnectionError {
let num_established = e.current;
return Poll::Ready(PoolEvent::ConnectionError {
id,
endpoint: connected.endpoint,
peer: Some(connected.info.peer_id().clone()),
error: PendingConnectionError::ConnectionLimit(e),
connected,
error: ConnectionError::ConnectionLimit(e),
num_established,
pool: self,
handler: None,
})
}
// Check peer ID.
// Peer ID checks must already have happened. See `add_pending`.
if cfg!(debug_assertions) {
if &self.local_id == entry.connected().peer_id() {
panic!("Unexpected local peer ID for remote.");
}
if let Some(peer) = peer {
if &peer != entry.connected().peer_id() {
let connected = entry.close();
return Poll::Ready(PoolEvent::PendingConnectionError {
id,
endpoint: connected.endpoint,
peer: Some(connected.info.peer_id().clone()),
error: PendingConnectionError::InvalidPeerId,
pool: self,
handler: None,
})
panic!("Unexpected peer ID mismatch.");
}
}
if &self.local_id == entry.connected().peer_id() {
let connected = entry.close();
return Poll::Ready(PoolEvent::PendingConnectionError {
id,
endpoint: connected.endpoint,
peer: Some(connected.info.peer_id().clone()),
error: PendingConnectionError::InvalidPeerId,
pool: self,
handler: None,
})
}
// Add the connection to the pool.
let peer = entry.connected().peer_id().clone();

View File

@ -55,7 +55,6 @@ use std::{
error,
fmt,
hash::Hash,
num::NonZeroUsize,
pin::Pin,
task::{Context, Poll},
};
@ -331,7 +330,7 @@ where
THandler::Handler: ConnectionHandler<Substream = Substream<TMuxer>, InEvent = TInEvent, OutEvent = TOutEvent> + Send + 'static,
<THandler::Handler as ConnectionHandler>::Error: error::Error + Send + 'static,
TConnInfo: Clone,
TPeerId: AsRef<[u8]> + Send + 'static,
TPeerId: Send + 'static,
{
// Poll the listener(s) for new connections.
match ListenersStream::poll(Pin::new(&mut self.listeners), cx) {
@ -383,7 +382,7 @@ where
}
Poll::Ready(PoolEvent::PendingConnectionError { id, endpoint, error, handler, pool, .. }) => {
let dialing = &mut self.dialing;
let (next, event) = on_connection_failed(pool, dialing, id, endpoint, error, handler);
let (next, event) = on_connection_failed(dialing, id, endpoint, error, handler);
if let Some(dial) = next {
let transport = self.listeners.transport().clone();
if let Err(e) = dial_peer_impl(transport, pool, dialing, dial) {
@ -496,13 +495,11 @@ where
/// If the failed connection attempt was a dialing attempt and there
/// are more addresses to try, new `DialingOpts` are returned.
fn on_connection_failed<'a, TTrans, TInEvent, TOutEvent, THandler, TConnInfo, TPeerId>(
pool: &Pool<TInEvent, TOutEvent, THandler, TTrans::Error,
<THandler::Handler as ConnectionHandler>::Error, TConnInfo, TPeerId>,
dialing: &mut FnvHashMap<TPeerId, peer::DialingAttempt>,
id: ConnectionId,
endpoint: ConnectedPoint,
error: PendingConnectionError<TTrans::Error>,
handler: Option<THandler>,
handler: THandler,
) -> (Option<DialingOpts<TPeerId, THandler>>, NetworkEvent<'a, TTrans, TInEvent, TOutEvent, THandler, TConnInfo, TPeerId>)
where
TTrans: Transport,
@ -518,41 +515,27 @@ where
if let Some(peer_id) = dialing_peer {
// A pending outgoing connection to a known peer failed.
let attempt = dialing.remove(&peer_id).expect("by (1)");
let mut attempt = dialing.remove(&peer_id).expect("by (1)");
let num_remain = attempt.next.len();
let failed_addr = attempt.current.clone();
let new_state = if pool.is_connected(&peer_id) {
peer::PeerState::Connected
} else if num_remain == 0 { // (2)
peer::PeerState::Disconnected
} else {
peer::PeerState::Dialing {
num_pending_addresses: NonZeroUsize::new(num_remain).expect("by (2)"),
}
};
let opts =
if let Some(handler) = handler {
if !attempt.next.is_empty() {
let mut attempt = attempt;
if num_remain > 0 {
let next_attempt = attempt.next.remove(0);
Some(DialingOpts {
let opts = DialingOpts {
peer: peer_id.clone(),
handler,
address: next_attempt,
remaining: attempt.next
})
} else {
None
}
};
Some(opts)
} else {
None
};
(opts, NetworkEvent::DialError {
new_state,
attempts_remaining: num_remain,
peer_id,
multiaddr: failed_addr,
error,

View File

@ -39,7 +39,6 @@ use crate::{
pool::Pool,
},
muxing::StreamMuxer,
network::peer::PeerState,
transport::{Transport, TransportError},
};
use futures::prelude::*;
@ -122,8 +121,8 @@ where
/// A dialing attempt to an address of a peer failed.
DialError {
/// New state of a peer.
new_state: PeerState,
/// The number of remaining dialing attempts.
attempts_remaining: usize,
/// Id of the peer we were trying to dial.
peer_id: TPeerId,
@ -145,7 +144,7 @@ where
/// The handler that was passed to `dial()`, if the
/// connection failed before the handler was consumed.
handler: Option<THandler>,
handler: THandler,
},
/// An established connection produced an event.
@ -219,9 +218,9 @@ where
.field("error", error)
.finish()
}
NetworkEvent::DialError { new_state, peer_id, multiaddr, error } => {
NetworkEvent::DialError { attempts_remaining, peer_id, multiaddr, error } => {
f.debug_struct("DialError")
.field("new_state", new_state)
.field("attempts_remaining", attempts_remaining)
.field("peer_id", peer_id)
.field("multiaddr", multiaddr)
.field("error", error)

View File

@ -42,27 +42,9 @@ use std::{
error,
fmt,
hash::Hash,
num::NonZeroUsize,
};
use super::{Network, DialingOpts};
/// The state of a (remote) peer as seen by the local peer
/// through a [`Network`].
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum PeerState {
/// The [`Network`] is connected to the peer, i.e. has at least one
/// established connection.
Connected,
/// We are currently trying to reach this peer.
Dialing {
/// Number of addresses we are trying to dial.
num_pending_addresses: NonZeroUsize,
},
/// The [`Network`] is disconnected from the peer, i.e. has no
/// established connection and no pending, outgoing connection.
Disconnected,
}
/// The possible representations of a peer in a [`Network`], as
/// seen by the local node.
pub enum Peer<'a, TTrans, TInEvent, TOutEvent, THandler, TConnInfo, TPeerId>

View File

@ -29,7 +29,7 @@ use libp2p_core::{
Transport,
connection::PendingConnectionError,
muxing::StreamMuxerBox,
network::{NetworkEvent, peer::PeerState},
network::NetworkEvent,
upgrade,
};
use libp2p_swarm::{
@ -137,7 +137,7 @@ fn deny_incoming_connec() {
match swarm2.poll(cx) {
Poll::Ready(NetworkEvent::DialError {
new_state: PeerState::Disconnected,
attempts_remaining: 0,
peer_id,
multiaddr,
error: PendingConnectionError::Transport(_)
@ -294,7 +294,7 @@ fn multiple_addresses_err() {
loop {
match swarm.poll(cx) {
Poll::Ready(NetworkEvent::DialError {
new_state,
attempts_remaining,
peer_id,
multiaddr,
error: PendingConnectionError::Transport(_)
@ -303,15 +303,10 @@ fn multiple_addresses_err() {
let expected = addresses.remove(0);
assert_eq!(multiaddr, expected);
if addresses.is_empty() {
assert_eq!(new_state, PeerState::Disconnected);
assert_eq!(attempts_remaining, 0);
return Poll::Ready(Ok(()));
} else {
match new_state {
PeerState::Dialing { num_pending_addresses } => {
assert_eq!(num_pending_addresses.get(), addresses.len());
},
_ => panic!()
}
assert_eq!(attempts_remaining, addresses.len());
}
},
Poll::Ready(_) => unreachable!(),

View File

@ -108,7 +108,7 @@ use libp2p_core::{
NetworkEvent,
NetworkConfig,
Peer,
peer::{ConnectedPeer, PeerState},
peer::ConnectedPeer,
},
upgrade::ProtocolName,
};
@ -454,11 +454,12 @@ where TBehaviour: NetworkBehaviour<ProtocolsHandler = THandler>,
Poll::Ready(NetworkEvent::IncomingConnectionError { error, .. }) => {
log::debug!("Incoming connection failed: {:?}", error);
},
Poll::Ready(NetworkEvent::DialError { peer_id, multiaddr, error, new_state }) => {
log::debug!("Connection attempt to peer {:?} at address {:?} failed with {:?}",
peer_id, multiaddr, error);
Poll::Ready(NetworkEvent::DialError { peer_id, multiaddr, error, attempts_remaining }) => {
log::debug!(
"Connection attempt to {:?} via {:?} failed with {:?}. Attempts remaining: {}.",
peer_id, multiaddr, error, attempts_remaining);
this.behaviour.inject_addr_reach_failure(Some(&peer_id), &multiaddr, &error);
if let PeerState::Disconnected = new_state {
if attempts_remaining == 0 {
this.behaviour.inject_dial_failure(&peer_id);
}
return Poll::Ready(SwarmEvent::UnreachableAddr {