feat(swarm): expose ConnectionId and add conn duration metric

- Exposes the `ConnectionId` in the various `SwarmEvent` variants.
- Tracks connection duration in `libp2p-metrics::swarm`.

Pull-Request: #3927.
This commit is contained in:
Max Inden
2023-05-17 07:19:53 +02:00
committed by GitHub
parent a580906a88
commit cc5b346a7c
13 changed files with 159 additions and 51 deletions

View File

@ -18,20 +18,25 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
use std::collections::HashMap;
use std::sync::{Arc, Mutex};
use crate::protocol_stack;
use instant::Instant;
use libp2p_swarm::ConnectionId;
use prometheus_client::encoding::{EncodeLabelSet, EncodeLabelValue};
use prometheus_client::metrics::counter::Counter;
use prometheus_client::metrics::family::Family;
use prometheus_client::metrics::histogram::{exponential_buckets, Histogram};
use prometheus_client::registry::Registry;
use prometheus_client::registry::{Registry, Unit};
pub(crate) struct Metrics {
connections_incoming: Family<AddressLabels, Counter>,
connections_incoming_error: Family<IncomingConnectionErrorLabels, Counter>,
connections_established: Family<ConnectionEstablishedLabels, Counter>,
connections_establishment_duration: Family<ConnectionEstablishmentDurationLabels, Histogram>,
connections_closed: Family<ConnectionClosedLabels, Counter>,
connections_established: Family<ConnectionLabels, Counter>,
connections_establishment_duration: Family<ConnectionLabels, Histogram>,
connections_duration: Family<ConnectionClosedLabels, Histogram>,
new_listen_addr: Family<AddressLabels, Counter>,
expired_listen_addr: Family<AddressLabels, Counter>,
@ -41,6 +46,8 @@ pub(crate) struct Metrics {
dial_attempt: Counter,
outgoing_connection_error: Family<OutgoingConnectionErrorLabels, Counter>,
connections: Arc<Mutex<HashMap<ConnectionId, Instant>>>,
}
impl Metrics {
@ -110,27 +117,33 @@ impl Metrics {
connections_established.clone(),
);
let connections_closed = Family::default();
sub_registry.register(
"connections_closed",
"Number of connections closed",
connections_closed.clone(),
);
let connections_establishment_duration = Family::new_with_constructor(
create_connection_establishment_duration_histogram as fn() -> Histogram,
);
let connections_establishment_duration = {
let constructor: fn() -> Histogram =
|| Histogram::new(exponential_buckets(0.01, 1.5, 20));
Family::new_with_constructor(constructor)
};
sub_registry.register(
"connections_establishment_duration",
"Time it took (locally) to establish connections",
connections_establishment_duration.clone(),
);
let connections_duration = {
let constructor: fn() -> Histogram =
|| Histogram::new(exponential_buckets(0.01, 3.0, 20));
Family::new_with_constructor(constructor)
};
sub_registry.register_with_unit(
"connections_establishment_duration",
"Time it took (locally) to establish connections",
Unit::Seconds,
connections_establishment_duration.clone(),
);
Self {
connections_incoming,
connections_incoming_error,
connections_established,
connections_closed,
new_listen_addr,
expired_listen_addr,
listener_closed,
@ -138,6 +151,8 @@ impl Metrics {
dial_attempt,
outgoing_connection_error,
connections_establishment_duration,
connections_duration,
connections: Default::default(),
}
}
}
@ -149,9 +164,10 @@ impl<TBvEv, THandleErr> super::Recorder<libp2p_swarm::SwarmEvent<TBvEv, THandleE
libp2p_swarm::SwarmEvent::ConnectionEstablished {
endpoint,
established_in: time_taken,
connection_id,
..
} => {
let labels = ConnectionEstablishedLabels {
let labels = ConnectionLabels {
role: endpoint.into(),
protocols: protocol_stack::as_string(endpoint.get_remote_address()),
};
@ -159,14 +175,33 @@ impl<TBvEv, THandleErr> super::Recorder<libp2p_swarm::SwarmEvent<TBvEv, THandleE
self.connections_establishment_duration
.get_or_create(&labels)
.observe(time_taken.as_secs_f64());
self.connections
.lock()
.expect("lock not to be poisoned")
.insert(*connection_id, Instant::now());
}
libp2p_swarm::SwarmEvent::ConnectionClosed { endpoint, .. } => {
self.connections_closed
.get_or_create(&ConnectionClosedLabels {
libp2p_swarm::SwarmEvent::ConnectionClosed {
endpoint,
connection_id,
cause,
..
} => {
let labels = ConnectionClosedLabels {
connection: ConnectionLabels {
role: endpoint.into(),
protocols: protocol_stack::as_string(endpoint.get_remote_address()),
})
.inc();
},
cause: cause.as_ref().map(Into::into),
};
self.connections_duration.get_or_create(&labels).observe(
self.connections
.lock()
.expect("lock not to be poisoned")
.remove(connection_id)
.expect("closed connection to previously be established")
.elapsed()
.as_secs_f64(),
);
}
libp2p_swarm::SwarmEvent::IncomingConnection { send_back_addr, .. } => {
self.connections_incoming
@ -187,7 +222,7 @@ impl<TBvEv, THandleErr> super::Recorder<libp2p_swarm::SwarmEvent<TBvEv, THandleE
})
.inc();
}
libp2p_swarm::SwarmEvent::OutgoingConnectionError { error, peer_id } => {
libp2p_swarm::SwarmEvent::OutgoingConnectionError { error, peer_id, .. } => {
let peer = match peer_id {
Some(_) => PeerStatus::Known,
None => PeerStatus::Unknown,
@ -261,7 +296,7 @@ impl<TBvEv, THandleErr> super::Recorder<libp2p_swarm::SwarmEvent<TBvEv, THandleE
libp2p_swarm::SwarmEvent::ListenerError { .. } => {
self.listener_error.inc();
}
libp2p_swarm::SwarmEvent::Dialing(_) => {
libp2p_swarm::SwarmEvent::Dialing { .. } => {
self.dial_attempt.inc();
}
}
@ -269,17 +304,33 @@ impl<TBvEv, THandleErr> super::Recorder<libp2p_swarm::SwarmEvent<TBvEv, THandleE
}
#[derive(EncodeLabelSet, Hash, Clone, Eq, PartialEq, Debug)]
struct ConnectionEstablishedLabels {
struct ConnectionLabels {
role: Role,
protocols: String,
}
type ConnectionEstablishmentDurationLabels = ConnectionEstablishedLabels;
#[derive(EncodeLabelSet, Hash, Clone, Eq, PartialEq, Debug)]
struct ConnectionClosedLabels {
role: Role,
protocols: String,
cause: Option<ConnectionError>,
#[prometheus(flatten)]
connection: ConnectionLabels,
}
#[derive(EncodeLabelValue, Hash, Clone, Eq, PartialEq, Debug)]
enum ConnectionError {
Io,
KeepAliveTimeout,
Handler,
}
impl<E> From<&libp2p_swarm::ConnectionError<E>> for ConnectionError {
fn from(value: &libp2p_swarm::ConnectionError<E>) -> Self {
match value {
libp2p_swarm::ConnectionError::IO(_) => ConnectionError::Io,
libp2p_swarm::ConnectionError::KeepAliveTimeout => ConnectionError::KeepAliveTimeout,
libp2p_swarm::ConnectionError::Handler(_) => ConnectionError::Handler,
}
}
}
#[derive(EncodeLabelSet, Hash, Clone, Eq, PartialEq, Debug)]
@ -359,7 +410,3 @@ impl From<&libp2p_swarm::ListenError> for IncomingConnectionError {
}
}
}
fn create_connection_establishment_duration_histogram() -> Histogram {
Histogram::new(exponential_buckets(0.01, 1.5, 20))
}