feat(metrics)!: expose identify metrics for connected peers only

Previously we would increase a counter / gauge / histogram on each received identify information. These metrics are missleading, as e.g. they depend on the identify interval and don't represent the set of currently connected peers.

With this commit, identify information is tracked for the currently connected peers only. Instead of an increase on each received identify information, metrics represent the status quo (Gauge).

Example:

```
\# HELP libp2p_libp2p_identify_remote_protocols Number of connected nodes supporting a specific protocol, with "unrecognized" for each peer supporting one or more unrecognized protocols...
\# TYPE libp2p_libp2p_identify_remote_protocols gauge
libp2p_libp2p_identify_remote_protocols_total{protocol="/ipfs/id/push/1.0.0"} 1
libp2p_libp2p_identify_remote_protocols_total{protocol="/ipfs/id/1.0.0"} 1
libp2p_libp2p_identify_remote_protocols_total{protocol="/ipfs/ping/1.0.0"} 1
libp2p_libp2p_identify_remote_protocols_total{protocol="unrecognized"} 1
\# HELP libp2p_libp2p_identify_remote_listen_addresses Number of connected nodes advertising a specific listen address...
\# TYPE libp2p_libp2p_identify_remote_listen_addresses gauge
libp2p_libp2p_identify_remote_listen_addresses_total{listen_address="/ip4/tcp"} 1
libp2p_libp2p_identify_remote_listen_addresses_total{listen_address="/ip4/udp/quic"} 1
\# HELP libp2p_libp2p_identify_local_observed_addresses Number of connected nodes observing the local node at a specific address...
\# TYPE libp2p_libp2p_identify_local_observed_addresses gauge
libp2p_libp2p_identify_local_observed_addresses_total{observed_address="/ip4/tcp"} 1
```

Pull-Request: #3325.
This commit is contained in:
Max Inden
2023-05-15 04:58:06 +02:00
committed by GitHub
parent 1742bffcea
commit 2d83725c09
7 changed files with 180 additions and 145 deletions

5
Cargo.lock generated
View File

@ -2732,6 +2732,7 @@ dependencies = [
"libp2p-ping", "libp2p-ping",
"libp2p-relay", "libp2p-relay",
"libp2p-swarm", "libp2p-swarm",
"once_cell",
"prometheus-client", "prometheus-client",
] ]
@ -4019,9 +4020,9 @@ dependencies = [
[[package]] [[package]]
name = "prometheus-client" name = "prometheus-client"
version = "0.20.0" version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e227aeb6c2cfec819e999c4773b35f8c7fa37298a203ff46420095458eee567e" checksum = "38974b1966bd5b6c7c823a20c1e07d5b84b171db20bac601e9b529720f7299f8"
dependencies = [ dependencies = [
"dtoa", "dtoa",
"itoa", "itoa",

View File

@ -12,4 +12,4 @@ hyper = { version = "0.14", features = ["server", "tcp", "http1"] }
libp2p = { path = "../../libp2p", features = ["async-std", "metrics", "ping", "noise", "identify", "tcp", "yamux", "macros"] } libp2p = { path = "../../libp2p", features = ["async-std", "metrics", "ping", "noise", "identify", "tcp", "yamux", "macros"] }
log = "0.4.0" log = "0.4.0"
tokio = { version = "1", features = ["rt-multi-thread"] } tokio = { version = "1", features = ["rt-multi-thread"] }
prometheus-client = "0.20.0" prometheus-client = "0.21.0"

View File

@ -33,7 +33,7 @@ const METRICS_CONTENT_TYPE: &str = "application/openmetrics-text;charset=utf-8;v
pub(crate) async fn metrics_server(registry: Registry) -> Result<(), std::io::Error> { pub(crate) async fn metrics_server(registry: Registry) -> Result<(), std::io::Error> {
// Serve on localhost. // Serve on localhost.
let addr = ([127, 0, 0, 1], 0).into(); let addr = ([127, 0, 0, 1], 8080).into();
// Use the tokio runtime to run the hyper server. // Use the tokio runtime to run the hyper server.
let rt = tokio::runtime::Runtime::new()?; let rt = tokio::runtime::Runtime::new()?;

View File

@ -1,9 +1,29 @@
## 0.13.0 - unreleased ## 0.13.0 - unreleased
- Previously `libp2p-metrics::identify` would increase a counter / gauge / histogram on each
received identify information. These metrics are misleading, as e.g. they depend on the identify
interval and don't represent the set of currently connected peers. With this change, identify
information is tracked for the currently connected peers only. Instead of an increase on each
received identify information, metrics represent the status quo (Gauge).
Metrics removed:
- `libp2p_identify_protocols`
- `libp2p_identify_received_info_listen_addrs`
- `libp2p_identify_received_info_protocols`
- `libp2p_identify_listen_addresses`
Metrics added:
- `libp2p_identify_remote_protocols`
- `libp2p_identify_remote_listen_addresses`
- `libp2p_identify_local_observed_addresses`
See [PR 3325].
- Raise MSRV to 1.65. - Raise MSRV to 1.65.
See [PR 3715]. See [PR 3715].
[PR 3715]: https://github.com/libp2p/rust-libp2p/pull/3715 [PR 3715]: https://github.com/libp2p/rust-libp2p/pull/3715
[PR 3325]: https://github.com/libp2p/rust-libp2p/pull/3325
## 0.12.0 ## 0.12.0

View File

@ -27,7 +27,8 @@ libp2p-ping = { workspace = true, optional = true }
libp2p-relay = { workspace = true, optional = true } libp2p-relay = { workspace = true, optional = true }
libp2p-swarm = { workspace = true } libp2p-swarm = { workspace = true }
libp2p-identity = { workspace = true } libp2p-identity = { workspace = true }
prometheus-client = "0.20.0" prometheus-client = { version = "0.21.0" }
once_cell = "1.16.0"
[target.'cfg(not(target_os = "unknown"))'.dependencies] [target.'cfg(not(target_os = "unknown"))'.dependencies]
libp2p-gossipsub = { workspace = true, optional = true } libp2p-gossipsub = { workspace = true, optional = true }

View File

@ -21,39 +21,78 @@
use crate::protocol_stack; use crate::protocol_stack;
use libp2p_identity::PeerId; use libp2p_identity::PeerId;
use libp2p_swarm::StreamProtocol; use libp2p_swarm::StreamProtocol;
use prometheus_client::encoding::{EncodeLabelSet, EncodeMetric, MetricEncoder}; use once_cell::sync::Lazy;
use prometheus_client::collector::Collector;
use prometheus_client::encoding::EncodeLabelSet;
use prometheus_client::metrics::counter::Counter; use prometheus_client::metrics::counter::Counter;
use prometheus_client::metrics::family::Family; use prometheus_client::metrics::family::ConstFamily;
use prometheus_client::metrics::histogram::{exponential_buckets, Histogram}; use prometheus_client::metrics::gauge::ConstGauge;
use prometheus_client::metrics::MetricType; use prometheus_client::registry::{Descriptor, LocalMetric, Registry};
use prometheus_client::registry::Registry; use prometheus_client::MaybeOwned;
use std::borrow::Cow;
use std::collections::HashMap; use std::collections::HashMap;
use std::iter;
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
static PROTOCOLS_DESCRIPTOR: Lazy<Descriptor> = Lazy::new(|| {
Descriptor::new(
"remote_protocols",
r#"Number of connected nodes supporting a specific protocol, with "unrecognized" for each
peer supporting one or more unrecognized protocols"#,
None,
None,
vec![],
)
});
static LISTEN_ADDRESSES_DESCRIPTOR: Lazy<Descriptor> = Lazy::new(|| {
Descriptor::new(
"remote_listen_addresses",
"Number of connected nodes advertising a specific listen address",
None,
None,
vec![],
)
});
static OBSERVED_ADDRESSES_DESCRIPTOR: Lazy<Descriptor> = Lazy::new(|| {
Descriptor::new(
"local_observed_addresses",
"Number of connected nodes observing the local node at a specific address",
None,
None,
vec![],
)
});
const ALLOWED_PROTOCOLS: &[StreamProtocol] = &[
#[cfg(feature = "dcutr")]
libp2p_dcutr::PROTOCOL_NAME,
// #[cfg(feature = "gossipsub")]
// #[cfg(not(target_os = "unknown"))]
// TODO: Add Gossipsub protocol name
libp2p_identify::PROTOCOL_NAME,
libp2p_identify::PUSH_PROTOCOL_NAME,
#[cfg(feature = "kad")]
libp2p_kad::PROTOCOL_NAME,
#[cfg(feature = "ping")]
libp2p_ping::PROTOCOL_NAME,
#[cfg(feature = "relay")]
libp2p_relay::STOP_PROTOCOL_NAME,
#[cfg(feature = "relay")]
libp2p_relay::HOP_PROTOCOL_NAME,
];
pub(crate) struct Metrics { pub(crate) struct Metrics {
protocols: Protocols, peers: Peers,
error: Counter, error: Counter,
pushed: Counter, pushed: Counter,
received: Counter, received: Counter,
received_info_listen_addrs: Histogram,
received_info_protocols: Histogram,
sent: Counter, sent: Counter,
listen_addresses: Family<AddressLabels, Counter>,
} }
impl Metrics { impl Metrics {
pub(crate) fn new(registry: &mut Registry) -> Self { pub(crate) fn new(registry: &mut Registry) -> Self {
let sub_registry = registry.sub_registry_with_prefix("identify"); let sub_registry = registry.sub_registry_with_prefix("identify");
let protocols = Protocols::default(); let peers = Peers::default();
sub_registry.register( sub_registry.register_collector(Box::new(peers.clone()));
"protocols",
"Number of connected nodes supporting a specific protocol, with \
\"unrecognized\" for each peer supporting one or more unrecognized \
protocols",
protocols.clone(),
);
let error = Counter::default(); let error = Counter::default();
sub_registry.register( sub_registry.register(
@ -78,24 +117,6 @@ impl Metrics {
received.clone(), received.clone(),
); );
let received_info_listen_addrs =
Histogram::new(iter::once(0.0).chain(exponential_buckets(1.0, 2.0, 9)));
sub_registry.register(
"received_info_listen_addrs",
"Number of listen addresses for remote peer received in \
identification information",
received_info_listen_addrs.clone(),
);
let received_info_protocols =
Histogram::new(iter::once(0.0).chain(exponential_buckets(1.0, 2.0, 9)));
sub_registry.register(
"received_info_protocols",
"Number of protocols supported by the remote peer received in \
identification information",
received_info_protocols.clone(),
);
let sent = Counter::default(); let sent = Counter::default();
sub_registry.register( sub_registry.register(
"sent", "sent",
@ -104,22 +125,12 @@ impl Metrics {
sent.clone(), sent.clone(),
); );
let listen_addresses = Family::default();
sub_registry.register(
"listen_addresses",
"Number of listen addresses for remote peer per protocol stack",
listen_addresses.clone(),
);
Self { Self {
protocols, peers,
error, error,
pushed, pushed,
received, received,
received_info_listen_addrs,
received_info_protocols,
sent, sent,
listen_addresses,
} }
} }
} }
@ -134,58 +145,8 @@ impl super::Recorder<libp2p_identify::Event> for Metrics {
self.pushed.inc(); self.pushed.inc();
} }
libp2p_identify::Event::Received { peer_id, info, .. } => { libp2p_identify::Event::Received { peer_id, info, .. } => {
{
let mut protocols = info
.protocols
.iter()
.filter(|p| {
let allowed_protocols: &[StreamProtocol] = &[
#[cfg(feature = "dcutr")]
libp2p_dcutr::PROTOCOL_NAME,
// #[cfg(feature = "gossipsub")]
// #[cfg(not(target_os = "unknown"))]
// TODO: Add Gossipsub protocol name
libp2p_identify::PROTOCOL_NAME,
libp2p_identify::PUSH_PROTOCOL_NAME,
#[cfg(feature = "kad")]
libp2p_kad::PROTOCOL_NAME,
#[cfg(feature = "ping")]
libp2p_ping::PROTOCOL_NAME,
#[cfg(feature = "relay")]
libp2p_relay::STOP_PROTOCOL_NAME,
#[cfg(feature = "relay")]
libp2p_relay::HOP_PROTOCOL_NAME,
];
allowed_protocols.contains(p)
})
.map(|p| p.to_string())
.collect::<Vec<_>>();
// Signal via an additional label value that one or more
// protocols of the remote peer have not been recognized.
if protocols.len() < info.protocols.len() {
protocols.push("unrecognized".to_string());
}
protocols.sort_unstable();
protocols.dedup();
self.protocols.add(*peer_id, protocols);
}
self.received.inc(); self.received.inc();
self.received_info_protocols self.peers.record(*peer_id, info.clone());
.observe(info.protocols.len() as f64);
self.received_info_listen_addrs
.observe(info.listen_addrs.len() as f64);
for listen_addr in &info.listen_addrs {
self.listen_addresses
.get_or_create(&AddressLabels {
protocols: protocol_stack::as_string(listen_addr),
})
.inc();
}
} }
libp2p_identify::Event::Sent { .. } => { libp2p_identify::Event::Sent { .. } => {
self.sent.inc(); self.sent.inc();
@ -203,7 +164,7 @@ impl<TBvEv, THandleErr> super::Recorder<libp2p_swarm::SwarmEvent<TBvEv, THandleE
} = event } = event
{ {
if *num_established == 0 { if *num_established == 0 {
self.protocols.remove(*peer_id) self.peers.remove(*peer_id);
} }
} }
} }
@ -214,55 +175,107 @@ struct AddressLabels {
protocols: String, protocols: String,
} }
#[derive(Default, Clone, Debug)] #[derive(Default, Debug, Clone)]
struct Protocols { struct Peers(Arc<Mutex<HashMap<PeerId, libp2p_identify::Info>>>);
peers: Arc<Mutex<HashMap<PeerId, Vec<String>>>>,
}
impl Protocols { impl Peers {
fn add(&self, peer: PeerId, protocols: Vec<String>) { fn record(&self, peer_id: PeerId, info: libp2p_identify::Info) {
self.peers self.0.lock().unwrap().insert(peer_id, info);
.lock()
.expect("Lock not to be poisoned")
.insert(peer, protocols);
} }
fn remove(&self, peer: PeerId) { fn remove(&self, peer_id: PeerId) {
self.peers self.0.lock().unwrap().remove(&peer_id);
.lock()
.expect("Lock not to be poisoned")
.remove(&peer);
} }
} }
impl EncodeMetric for Protocols { impl Collector for Peers {
fn encode(&self, mut encoder: MetricEncoder) -> Result<(), std::fmt::Error> { fn collect<'a>(
let count_by_protocol = self &'a self,
.peers ) -> Box<dyn Iterator<Item = (Cow<'a, Descriptor>, MaybeOwned<'a, Box<dyn LocalMetric>>)> + 'a>
.lock() {
.expect("Lock not to be poisoned") let mut count_by_protocols: HashMap<String, i64> = Default::default();
.iter() let mut count_by_listen_addresses: HashMap<String, i64> = Default::default();
.fold( let mut count_by_observed_addresses: HashMap<String, i64> = Default::default();
HashMap::<String, i64>::default(),
|mut acc, (_, protocols)| {
for protocol in protocols {
let count = acc.entry(protocol.to_string()).or_default();
*count += 1;
}
acc
},
);
for (protocol, count) in count_by_protocol { for (_, peer_info) in self.0.lock().unwrap().iter() {
encoder {
.encode_family(&[("protocol", protocol)])? let mut protocols: Vec<_> = peer_info
.encode_gauge(&count)?; .protocols
.iter()
.map(|p| {
if ALLOWED_PROTOCOLS.contains(&p) {
p.to_string()
} else {
"unrecognized".to_string()
}
})
.collect();
protocols.sort();
protocols.dedup();
for protocol in protocols.into_iter() {
let count = count_by_protocols.entry(protocol).or_default();
*count += 1;
}
}
{
let mut addrs: Vec<_> = peer_info
.listen_addrs
.iter()
.map(protocol_stack::as_string)
.collect();
addrs.sort();
addrs.dedup();
for addr in addrs {
let count = count_by_listen_addresses.entry(addr).or_default();
*count += 1;
}
}
{
let count = count_by_observed_addresses
.entry(protocol_stack::as_string(&peer_info.observed_addr))
.or_default();
*count += 1;
}
} }
Ok(()) let count_by_protocols: Box<dyn LocalMetric> =
} Box::new(ConstFamily::new(count_by_protocols.into_iter().map(
|(protocol, count)| ([("protocol", protocol)], ConstGauge::new(count)),
)));
fn metric_type(&self) -> MetricType { let count_by_listen_addresses: Box<dyn LocalMetric> =
MetricType::Gauge Box::new(ConstFamily::new(count_by_listen_addresses.into_iter().map(
|(protocol, count)| ([("listen_address", protocol)], ConstGauge::new(count)),
)));
let count_by_observed_addresses: Box<dyn LocalMetric> = Box::new(ConstFamily::new(
count_by_observed_addresses
.into_iter()
.map(|(protocol, count)| {
([("observed_address", protocol)], ConstGauge::new(count))
}),
));
Box::new(
[
(
Cow::Borrowed(&*PROTOCOLS_DESCRIPTOR),
MaybeOwned::Owned(count_by_protocols),
),
(
Cow::Borrowed(&*LISTEN_ADDRESSES_DESCRIPTOR),
MaybeOwned::Owned(count_by_listen_addresses),
),
(
Cow::Borrowed(&*OBSERVED_ADDRESSES_DESCRIPTOR),
MaybeOwned::Owned(count_by_observed_addresses),
),
]
.into_iter(),
)
} }
} }

View File

@ -35,7 +35,7 @@ wasm-timer = "0.2.5"
instant = "0.1.11" instant = "0.1.11"
void = "1.0.2" void = "1.0.2"
# Metrics dependencies # Metrics dependencies
prometheus-client = "0.20.0" prometheus-client = "0.21.0"
[dev-dependencies] [dev-dependencies]
async-std = { version = "1.6.3", features = ["unstable"] } async-std = { version = "1.6.3", features = ["unstable"] }