mirror of
https://github.com/fluencelabs/rust-libp2p
synced 2025-06-24 07:11:38 +00:00
add performance metrics to gossipsub (#2346)
* add performance metrics to gossipsub * add changelog entry * Re-export open-client-metrics crate * Added some extra metrics * More metrics * Additional metrics * Correct topic encoding * More manual encoding of metric labels * Clean up metric labelling * Improve heartbeat duration buckets * Remove re-export Co-authored-by: Age Manning <Age@AgeManning.com>
This commit is contained in:
@ -4,9 +4,12 @@
|
|||||||
|
|
||||||
- Migrate to Rust edition 2021 (see [PR 2339]).
|
- Migrate to Rust edition 2021 (see [PR 2339]).
|
||||||
|
|
||||||
|
- Add metrics for network and configuration performance analysis (see [PR 2346]).
|
||||||
|
|
||||||
- Improve bandwidth performance by tracking IWANTs and reducing duplicate sends
|
- Improve bandwidth performance by tracking IWANTs and reducing duplicate sends
|
||||||
(see [PR 2327]).
|
(see [PR 2327]).
|
||||||
|
|
||||||
|
[PR 2346]: https://github.com/libp2p/rust-libp2p/pull/2346
|
||||||
[PR 2339]: https://github.com/libp2p/rust-libp2p/pull/2339
|
[PR 2339]: https://github.com/libp2p/rust-libp2p/pull/2339
|
||||||
[PR 2327]: https://github.com/libp2p/rust-libp2p/pull/2327
|
[PR 2327]: https://github.com/libp2p/rust-libp2p/pull/2327
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ futures-timer = "3.0.2"
|
|||||||
pin-project = "1.0.8"
|
pin-project = "1.0.8"
|
||||||
instant = "0.1.11"
|
instant = "0.1.11"
|
||||||
# Metrics dependencies
|
# Metrics dependencies
|
||||||
open-metrics-client = "0.13"
|
open-metrics-client = "0.13.0"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
async-std = "1.6.3"
|
async-std = "1.6.3"
|
||||||
|
@ -51,7 +51,7 @@ use crate::error::{PublishError, SubscriptionError, ValidationError};
|
|||||||
use crate::gossip_promises::GossipPromises;
|
use crate::gossip_promises::GossipPromises;
|
||||||
use crate::handler::{GossipsubHandler, GossipsubHandlerIn, HandlerEvent};
|
use crate::handler::{GossipsubHandler, GossipsubHandlerIn, HandlerEvent};
|
||||||
use crate::mcache::MessageCache;
|
use crate::mcache::MessageCache;
|
||||||
use crate::metrics::{Churn, Config as MetricsConfig, Inclusion, Metrics};
|
use crate::metrics::{Churn, Config as MetricsConfig, Inclusion, Metrics, Penalty};
|
||||||
use crate::peer_score::{PeerScore, PeerScoreParams, PeerScoreThresholds, RejectReason};
|
use crate::peer_score::{PeerScore, PeerScoreParams, PeerScoreThresholds, RejectReason};
|
||||||
use crate::protocol::SIGNING_PREFIX;
|
use crate::protocol::SIGNING_PREFIX;
|
||||||
use crate::subscription_filter::{AllowAllSubscriptionFilter, TopicSubscriptionFilter};
|
use crate::subscription_filter::{AllowAllSubscriptionFilter, TopicSubscriptionFilter};
|
||||||
@ -635,10 +635,9 @@ where
|
|||||||
return Err(PublishError::Duplicate);
|
return Err(PublishError::Duplicate);
|
||||||
}
|
}
|
||||||
|
|
||||||
debug!("Publishing message: {:?}", msg_id);
|
trace!("Publishing message: {:?}", msg_id);
|
||||||
|
|
||||||
let topic_hash = raw_message.topic.clone();
|
let topic_hash = raw_message.topic.clone();
|
||||||
let msg_bytes = raw_message.data.len();
|
|
||||||
|
|
||||||
// If we are not flood publishing forward the message to mesh peers.
|
// If we are not flood publishing forward the message to mesh peers.
|
||||||
let mesh_peers_sent = !self.config.flood_publish()
|
let mesh_peers_sent = !self.config.flood_publish()
|
||||||
@ -732,8 +731,9 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Send to peers we know are subscribed to the topic.
|
// Send to peers we know are subscribed to the topic.
|
||||||
|
let msg_bytes = event.encoded_len();
|
||||||
for peer_id in recipient_peers.iter() {
|
for peer_id in recipient_peers.iter() {
|
||||||
debug!("Sending message to peer: {:?}", peer_id);
|
trace!("Sending message to peer: {:?}", peer_id);
|
||||||
self.send_message(*peer_id, event.clone())?;
|
self.send_message(*peer_id, event.clone())?;
|
||||||
|
|
||||||
if let Some(m) = self.metrics.as_mut() {
|
if let Some(m) = self.metrics.as_mut() {
|
||||||
@ -742,6 +742,11 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
debug!("Published message: {:?}", &msg_id);
|
debug!("Published message: {:?}", &msg_id);
|
||||||
|
|
||||||
|
if let Some(metrics) = self.metrics.as_mut() {
|
||||||
|
metrics.register_published_message(&topic_hash);
|
||||||
|
}
|
||||||
|
|
||||||
Ok(msg_id)
|
Ok(msg_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -784,6 +789,11 @@ where
|
|||||||
return Ok(false);
|
return Ok(false);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if let Some(metrics) = self.metrics.as_mut() {
|
||||||
|
metrics.register_msg_validation(&raw_message.topic, &acceptance);
|
||||||
|
}
|
||||||
|
|
||||||
self.forward_msg(
|
self.forward_msg(
|
||||||
msg_id,
|
msg_id,
|
||||||
raw_message,
|
raw_message,
|
||||||
@ -797,6 +807,10 @@ where
|
|||||||
};
|
};
|
||||||
|
|
||||||
if let Some((raw_message, originating_peers)) = self.mcache.remove(msg_id) {
|
if let Some((raw_message, originating_peers)) = self.mcache.remove(msg_id) {
|
||||||
|
if let Some(metrics) = self.metrics.as_mut() {
|
||||||
|
metrics.register_msg_validation(&raw_message.topic, &acceptance);
|
||||||
|
}
|
||||||
|
|
||||||
// Tell peer_score about reject
|
// Tell peer_score about reject
|
||||||
// Reject the original source, and any duplicates we've seen from other peers.
|
// Reject the original source, and any duplicates we've seen from other peers.
|
||||||
if let Some((peer_score, ..)) = &mut self.peer_score {
|
if let Some((peer_score, ..)) = &mut self.peer_score {
|
||||||
@ -960,7 +974,7 @@ where
|
|||||||
|
|
||||||
let fanaout_added = added_peers.len();
|
let fanaout_added = added_peers.len();
|
||||||
if let Some(m) = self.metrics.as_mut() {
|
if let Some(m) = self.metrics.as_mut() {
|
||||||
m.peers_included(topic_hash, Inclusion::Fanaout, fanaout_added)
|
m.peers_included(topic_hash, Inclusion::Fanout, fanaout_added)
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if we need to get more peers, which we randomly select
|
// check if we need to get more peers, which we randomly select
|
||||||
@ -1191,7 +1205,7 @@ where
|
|||||||
|
|
||||||
trace!("Handling IHAVE for peer: {:?}", peer_id);
|
trace!("Handling IHAVE for peer: {:?}", peer_id);
|
||||||
|
|
||||||
// use a hashset to avoid duplicates efficiently
|
// use a hashmap to avoid duplicates efficiently
|
||||||
let mut iwant_ids = HashMap::new();
|
let mut iwant_ids = HashMap::new();
|
||||||
|
|
||||||
for (topic, ids) in ihave_msgs {
|
for (topic, ids) in ihave_msgs {
|
||||||
@ -1308,38 +1322,29 @@ where
|
|||||||
// Send the messages to the peer
|
// Send the messages to the peer
|
||||||
let message_list: Vec<_> = cached_messages.into_iter().map(|entry| entry.1).collect();
|
let message_list: Vec<_> = cached_messages.into_iter().map(|entry| entry.1).collect();
|
||||||
|
|
||||||
let mut topic_msgs = HashMap::<TopicHash, Vec<usize>>::default();
|
let topics = message_list
|
||||||
if self.metrics.is_some() {
|
.iter()
|
||||||
for msg in message_list.iter() {
|
.map(|message| message.topic.clone())
|
||||||
topic_msgs
|
.collect::<HashSet<TopicHash>>();
|
||||||
.entry(msg.topic.clone())
|
|
||||||
.or_default()
|
|
||||||
.push(msg.data.len());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if self
|
let message = GossipsubRpc {
|
||||||
.send_message(
|
|
||||||
*peer_id,
|
|
||||||
GossipsubRpc {
|
|
||||||
subscriptions: Vec::new(),
|
subscriptions: Vec::new(),
|
||||||
messages: message_list,
|
messages: message_list,
|
||||||
control_msgs: Vec::new(),
|
control_msgs: Vec::new(),
|
||||||
}
|
}
|
||||||
.into_protobuf(),
|
.into_protobuf();
|
||||||
)
|
|
||||||
.is_err()
|
let msg_bytes = message.encoded_len();
|
||||||
{
|
|
||||||
|
if self.send_message(*peer_id, message).is_err() {
|
||||||
error!("Failed to send cached messages. Messages too large");
|
error!("Failed to send cached messages. Messages too large");
|
||||||
} else if let Some(m) = self.metrics.as_mut() {
|
} else if let Some(m) = self.metrics.as_mut() {
|
||||||
// Sending of messages succeeded, register them on the internal metrics.
|
// Sending of messages succeeded, register them on the internal metrics.
|
||||||
for (topic, msg_bytes_vec) in topic_msgs.into_iter() {
|
for topic in topics.iter() {
|
||||||
for msg_bytes in msg_bytes_vec {
|
|
||||||
m.msg_sent(&topic, msg_bytes);
|
m.msg_sent(&topic, msg_bytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
debug!("Completed IWANT handling for peer: {}", peer_id);
|
debug!("Completed IWANT handling for peer: {}", peer_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1391,11 +1396,14 @@ where
|
|||||||
{
|
{
|
||||||
if backoff_time > now {
|
if backoff_time > now {
|
||||||
warn!(
|
warn!(
|
||||||
"GRAFT: peer attempted graft within backoff time, penalizing {}",
|
"[Penalty] Peer attempted graft within backoff time, penalizing {}",
|
||||||
peer_id
|
peer_id
|
||||||
);
|
);
|
||||||
// add behavioural penalty
|
// add behavioural penalty
|
||||||
if let Some((peer_score, ..)) = &mut self.peer_score {
|
if let Some((peer_score, ..)) = &mut self.peer_score {
|
||||||
|
if let Some(metrics) = self.metrics.as_mut() {
|
||||||
|
metrics.register_score_penalty(Penalty::GraftBackoff);
|
||||||
|
}
|
||||||
peer_score.add_penalty(peer_id, 1);
|
peer_score.add_penalty(peer_id, 1);
|
||||||
|
|
||||||
// check the flood cutoff
|
// check the flood cutoff
|
||||||
@ -1668,15 +1676,11 @@ where
|
|||||||
"Rejecting message from peer {} because of blacklisted source: {}",
|
"Rejecting message from peer {} because of blacklisted source: {}",
|
||||||
propagation_source, source
|
propagation_source, source
|
||||||
);
|
);
|
||||||
if let Some((peer_score, .., gossip_promises)) = &mut self.peer_score {
|
self.handle_invalid_message(
|
||||||
peer_score.reject_message(
|
|
||||||
propagation_source,
|
propagation_source,
|
||||||
msg_id,
|
raw_message,
|
||||||
&raw_message.topic,
|
|
||||||
RejectReason::BlackListedSource,
|
RejectReason::BlackListedSource,
|
||||||
);
|
);
|
||||||
gossip_promises.reject_message(msg_id, &RejectReason::BlackListedSource);
|
|
||||||
}
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1702,15 +1706,7 @@ where
|
|||||||
"Dropping message {} claiming to be from self but forwarded from {}",
|
"Dropping message {} claiming to be from self but forwarded from {}",
|
||||||
msg_id, propagation_source
|
msg_id, propagation_source
|
||||||
);
|
);
|
||||||
if let Some((peer_score, _, _, gossip_promises)) = &mut self.peer_score {
|
self.handle_invalid_message(propagation_source, raw_message, RejectReason::SelfOrigin);
|
||||||
peer_score.reject_message(
|
|
||||||
propagation_source,
|
|
||||||
msg_id,
|
|
||||||
&raw_message.topic,
|
|
||||||
RejectReason::SelfOrigin,
|
|
||||||
);
|
|
||||||
gossip_promises.reject_message(msg_id, &RejectReason::SelfOrigin);
|
|
||||||
}
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1725,6 +1721,11 @@ where
|
|||||||
mut raw_message: RawGossipsubMessage,
|
mut raw_message: RawGossipsubMessage,
|
||||||
propagation_source: &PeerId,
|
propagation_source: &PeerId,
|
||||||
) {
|
) {
|
||||||
|
// Record the received metric
|
||||||
|
if let Some(metrics) = self.metrics.as_mut() {
|
||||||
|
metrics.msg_recvd_unfiltered(&raw_message.topic, raw_message.raw_protobuf_len());
|
||||||
|
}
|
||||||
|
|
||||||
let fast_message_id = self.config.fast_message_id(&raw_message);
|
let fast_message_id = self.config.fast_message_id(&raw_message);
|
||||||
|
|
||||||
if let Some(fast_message_id) = fast_message_id.as_ref() {
|
if let Some(fast_message_id) = fast_message_id.as_ref() {
|
||||||
@ -1758,8 +1759,8 @@ where
|
|||||||
// Reject the message and return
|
// Reject the message and return
|
||||||
self.handle_invalid_message(
|
self.handle_invalid_message(
|
||||||
propagation_source,
|
propagation_source,
|
||||||
raw_message,
|
&raw_message,
|
||||||
ValidationError::TransformFailed,
|
RejectReason::ValidationError(ValidationError::TransformFailed),
|
||||||
);
|
);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -1796,6 +1797,11 @@ where
|
|||||||
msg_id
|
msg_id
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Record the received message with the metrics
|
||||||
|
if let Some(metrics) = self.metrics.as_mut() {
|
||||||
|
metrics.msg_recvd(&message.topic);
|
||||||
|
}
|
||||||
|
|
||||||
// Tells score that message arrived (but is maybe not fully validated yet).
|
// Tells score that message arrived (but is maybe not fully validated yet).
|
||||||
// Consider the message as delivered for gossip promises.
|
// Consider the message as delivered for gossip promises.
|
||||||
if let Some((peer_score, .., gossip_promises)) = &mut self.peer_score {
|
if let Some((peer_score, .., gossip_promises)) = &mut self.peer_score {
|
||||||
@ -1845,19 +1851,28 @@ where
|
|||||||
fn handle_invalid_message(
|
fn handle_invalid_message(
|
||||||
&mut self,
|
&mut self,
|
||||||
propagation_source: &PeerId,
|
propagation_source: &PeerId,
|
||||||
raw_message: RawGossipsubMessage,
|
raw_message: &RawGossipsubMessage,
|
||||||
validation_error: ValidationError,
|
reject_reason: RejectReason,
|
||||||
) {
|
) {
|
||||||
if let Some((peer_score, .., gossip_promises)) = &mut self.peer_score {
|
if let Some((peer_score, .., gossip_promises)) = &mut self.peer_score {
|
||||||
let reason = RejectReason::ValidationError(validation_error);
|
if let Some(metrics) = self.metrics.as_mut() {
|
||||||
|
metrics.register_invalid_message(&raw_message.topic);
|
||||||
|
}
|
||||||
|
|
||||||
let fast_message_id_cache = &self.fast_message_id_cache;
|
let fast_message_id_cache = &self.fast_message_id_cache;
|
||||||
|
|
||||||
if let Some(msg_id) = self
|
if let Some(msg_id) = self
|
||||||
.config
|
.config
|
||||||
.fast_message_id(&raw_message)
|
.fast_message_id(raw_message)
|
||||||
.and_then(|id| fast_message_id_cache.get(&id))
|
.and_then(|id| fast_message_id_cache.get(&id))
|
||||||
{
|
{
|
||||||
peer_score.reject_message(propagation_source, msg_id, &raw_message.topic, reason);
|
peer_score.reject_message(
|
||||||
gossip_promises.reject_message(msg_id, &reason);
|
propagation_source,
|
||||||
|
msg_id,
|
||||||
|
&raw_message.topic,
|
||||||
|
reject_reason,
|
||||||
|
);
|
||||||
|
gossip_promises.reject_message(msg_id, &reject_reason);
|
||||||
} else {
|
} else {
|
||||||
// The message is invalid, we reject it ignoring any gossip promises. If a peer is
|
// The message is invalid, we reject it ignoring any gossip promises. If a peer is
|
||||||
// advertising this message via an IHAVE and it's invalid it will be double
|
// advertising this message via an IHAVE and it's invalid it will be double
|
||||||
@ -2067,6 +2082,9 @@ where
|
|||||||
if let Some((peer_score, .., gossip_promises)) = &mut self.peer_score {
|
if let Some((peer_score, .., gossip_promises)) = &mut self.peer_score {
|
||||||
for (peer, count) in gossip_promises.get_broken_promises() {
|
for (peer, count) in gossip_promises.get_broken_promises() {
|
||||||
peer_score.add_penalty(&peer, count);
|
peer_score.add_penalty(&peer, count);
|
||||||
|
if let Some(metrics) = self.metrics.as_mut() {
|
||||||
|
metrics.register_score_penalty(Penalty::BrokenPromise);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2074,6 +2092,7 @@ where
|
|||||||
/// Heartbeat function which shifts the memcache and updates the mesh.
|
/// Heartbeat function which shifts the memcache and updates the mesh.
|
||||||
fn heartbeat(&mut self) {
|
fn heartbeat(&mut self) {
|
||||||
debug!("Starting heartbeat");
|
debug!("Starting heartbeat");
|
||||||
|
let start = Instant::now();
|
||||||
|
|
||||||
self.heartbeat_ticks += 1;
|
self.heartbeat_ticks += 1;
|
||||||
|
|
||||||
@ -2098,13 +2117,15 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// cache scores throughout the heartbeat
|
// Cache the scores of all connected peers, and record metrics for current penalties.
|
||||||
let mut scores = HashMap::new();
|
let mut scores = HashMap::with_capacity(self.connected_peers.len());
|
||||||
let peer_score = &self.peer_score;
|
if let Some((peer_score, ..)) = &self.peer_score {
|
||||||
let mut score = |p: &PeerId| match peer_score {
|
for peer_id in self.connected_peers.keys() {
|
||||||
Some((peer_score, ..)) => *scores.entry(*p).or_insert_with(|| peer_score.score(p)),
|
scores
|
||||||
_ => 0.0,
|
.entry(peer_id)
|
||||||
};
|
.or_insert_with(|| peer_score.metric_score(&peer_id, self.metrics.as_mut()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// maintain the mesh for each topic
|
// maintain the mesh for each topic
|
||||||
for (topic_hash, peers) in self.mesh.iter_mut() {
|
for (topic_hash, peers) in self.mesh.iter_mut() {
|
||||||
@ -2116,35 +2137,35 @@ where
|
|||||||
// drop all peers with negative score, without PX
|
// drop all peers with negative score, without PX
|
||||||
// if there is at some point a stable retain method for BTreeSet the following can be
|
// if there is at some point a stable retain method for BTreeSet the following can be
|
||||||
// written more efficiently with retain.
|
// written more efficiently with retain.
|
||||||
let to_remove: Vec<_> = peers
|
let mut to_remove_peers = Vec::new();
|
||||||
.iter()
|
for peer_id in peers.iter() {
|
||||||
.filter(|&p| {
|
let peer_score = *scores.get(peer_id).unwrap_or(&0.0);
|
||||||
if score(p) < 0.0 {
|
|
||||||
|
// Record the score per mesh
|
||||||
|
if let Some(metrics) = self.metrics.as_mut() {
|
||||||
|
metrics.observe_mesh_peers_score(topic_hash, peer_score);
|
||||||
|
}
|
||||||
|
|
||||||
|
if peer_score < 0.0 {
|
||||||
debug!(
|
debug!(
|
||||||
"HEARTBEAT: Prune peer {:?} with negative score [score = {}, topic = \
|
"HEARTBEAT: Prune peer {:?} with negative score [score = {}, topic = \
|
||||||
{}]",
|
{}]",
|
||||||
p,
|
peer_id, peer_score, topic_hash
|
||||||
score(p),
|
|
||||||
topic_hash
|
|
||||||
);
|
);
|
||||||
|
|
||||||
let current_topic = to_prune.entry(*p).or_insert_with(Vec::new);
|
let current_topic = to_prune.entry(*peer_id).or_insert_with(Vec::new);
|
||||||
current_topic.push(topic_hash.clone());
|
current_topic.push(topic_hash.clone());
|
||||||
no_px.insert(*p);
|
no_px.insert(*peer_id);
|
||||||
true
|
to_remove_peers.push(*peer_id);
|
||||||
} else {
|
}
|
||||||
false
|
|
||||||
}
|
}
|
||||||
})
|
|
||||||
.cloned()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
if let Some(m) = self.metrics.as_mut() {
|
if let Some(m) = self.metrics.as_mut() {
|
||||||
m.peers_removed(topic_hash, Churn::BadScore, to_remove.len())
|
m.peers_removed(topic_hash, Churn::BadScore, to_remove_peers.len())
|
||||||
}
|
}
|
||||||
|
|
||||||
for peer in to_remove {
|
for peer_id in to_remove_peers {
|
||||||
peers.remove(&peer);
|
peers.remove(&peer_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
// too little peers - add some
|
// too little peers - add some
|
||||||
@ -2166,7 +2187,7 @@ where
|
|||||||
!peers.contains(peer)
|
!peers.contains(peer)
|
||||||
&& !explicit_peers.contains(peer)
|
&& !explicit_peers.contains(peer)
|
||||||
&& !backoffs.is_backoff_with_slack(topic_hash, peer)
|
&& !backoffs.is_backoff_with_slack(topic_hash, peer)
|
||||||
&& score(peer) >= 0.0
|
&& *scores.get(peer).unwrap_or(&0.0) >= 0.0
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
for peer in &peer_list {
|
for peer in &peer_list {
|
||||||
@ -2195,8 +2216,12 @@ where
|
|||||||
let mut rng = thread_rng();
|
let mut rng = thread_rng();
|
||||||
let mut shuffled = peers.iter().cloned().collect::<Vec<_>>();
|
let mut shuffled = peers.iter().cloned().collect::<Vec<_>>();
|
||||||
shuffled.shuffle(&mut rng);
|
shuffled.shuffle(&mut rng);
|
||||||
shuffled
|
shuffled.sort_by(|p1, p2| {
|
||||||
.sort_by(|p1, p2| score(p1).partial_cmp(&score(p2)).unwrap_or(Ordering::Equal));
|
let score_p1 = *scores.get(p1).unwrap_or(&0.0);
|
||||||
|
let score_p2 = *scores.get(p2).unwrap_or(&0.0);
|
||||||
|
|
||||||
|
score_p1.partial_cmp(&score_p2).unwrap_or(Ordering::Equal)
|
||||||
|
});
|
||||||
// shuffle everything except the last retain_scores many peers (the best ones)
|
// shuffle everything except the last retain_scores many peers (the best ones)
|
||||||
shuffled[..peers.len() - self.config.retain_scores()].shuffle(&mut rng);
|
shuffled[..peers.len() - self.config.retain_scores()].shuffle(&mut rng);
|
||||||
|
|
||||||
@ -2255,7 +2280,7 @@ where
|
|||||||
!peers.contains(peer)
|
!peers.contains(peer)
|
||||||
&& !explicit_peers.contains(peer)
|
&& !explicit_peers.contains(peer)
|
||||||
&& !backoffs.is_backoff_with_slack(topic_hash, peer)
|
&& !backoffs.is_backoff_with_slack(topic_hash, peer)
|
||||||
&& score(peer) >= 0.0
|
&& *scores.get(peer).unwrap_or(&0.0) >= 0.0
|
||||||
&& outbound_peers.contains(peer)
|
&& outbound_peers.contains(peer)
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
@ -2288,19 +2313,27 @@ where
|
|||||||
|
|
||||||
// now compute the median peer score in the mesh
|
// now compute the median peer score in the mesh
|
||||||
let mut peers_by_score: Vec<_> = peers.iter().collect();
|
let mut peers_by_score: Vec<_> = peers.iter().collect();
|
||||||
peers_by_score
|
peers_by_score.sort_by(|p1, p2| {
|
||||||
.sort_by(|p1, p2| score(p1).partial_cmp(&score(p2)).unwrap_or(Equal));
|
let p1_score = *scores.get(p1).unwrap_or(&0.0);
|
||||||
|
let p2_score = *scores.get(p2).unwrap_or(&0.0);
|
||||||
|
p1_score.partial_cmp(&p2_score).unwrap_or(Equal)
|
||||||
|
});
|
||||||
|
|
||||||
let middle = peers_by_score.len() / 2;
|
let middle = peers_by_score.len() / 2;
|
||||||
let median = if peers_by_score.len() % 2 == 0 {
|
let median = if peers_by_score.len() % 2 == 0 {
|
||||||
(score(
|
let sub_middle_peer = *peers_by_score
|
||||||
*peers_by_score.get(middle - 1).expect(
|
.get(middle - 1)
|
||||||
"middle < vector length and middle > 0 since peers.len() > 0",
|
.expect("middle < vector length and middle > 0 since peers.len() > 0");
|
||||||
),
|
let sub_middle_score = *scores.get(sub_middle_peer).unwrap_or(&0.0);
|
||||||
) + score(*peers_by_score.get(middle).expect("middle < vector length")))
|
let middle_peer =
|
||||||
* 0.5
|
*peers_by_score.get(middle).expect("middle < vector length");
|
||||||
|
let middle_score = *scores.get(middle_peer).unwrap_or(&0.0);
|
||||||
|
|
||||||
|
(sub_middle_score + middle_score) * 0.5
|
||||||
} else {
|
} else {
|
||||||
score(*peers_by_score.get(middle).expect("middle < vector length"))
|
*scores
|
||||||
|
.get(*peers_by_score.get(middle).expect("middle < vector length"))
|
||||||
|
.unwrap_or(&0.0)
|
||||||
};
|
};
|
||||||
|
|
||||||
// if the median score is below the threshold, select a better peer (if any) and
|
// if the median score is below the threshold, select a better peer (if any) and
|
||||||
@ -2311,11 +2344,11 @@ where
|
|||||||
&self.connected_peers,
|
&self.connected_peers,
|
||||||
topic_hash,
|
topic_hash,
|
||||||
self.config.opportunistic_graft_peers(),
|
self.config.opportunistic_graft_peers(),
|
||||||
|peer| {
|
|peer_id| {
|
||||||
!peers.contains(peer)
|
!peers.contains(peer_id)
|
||||||
&& !explicit_peers.contains(peer)
|
&& !explicit_peers.contains(peer_id)
|
||||||
&& !backoffs.is_backoff_with_slack(topic_hash, peer)
|
&& !backoffs.is_backoff_with_slack(topic_hash, peer_id)
|
||||||
&& score(peer) > median
|
&& *scores.get(peer_id).unwrap_or(&0.0) > median
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
for peer in &peer_list {
|
for peer in &peer_list {
|
||||||
@ -2367,9 +2400,10 @@ where
|
|||||||
};
|
};
|
||||||
for peer in peers.iter() {
|
for peer in peers.iter() {
|
||||||
// is the peer still subscribed to the topic?
|
// is the peer still subscribed to the topic?
|
||||||
|
let peer_score = *scores.get(peer).unwrap_or(&0.0);
|
||||||
match self.peer_topics.get(peer) {
|
match self.peer_topics.get(peer) {
|
||||||
Some(topics) => {
|
Some(topics) => {
|
||||||
if !topics.contains(topic_hash) || score(peer) < publish_threshold {
|
if !topics.contains(topic_hash) || peer_score < publish_threshold {
|
||||||
debug!(
|
debug!(
|
||||||
"HEARTBEAT: Peer removed from fanout for topic: {:?}",
|
"HEARTBEAT: Peer removed from fanout for topic: {:?}",
|
||||||
topic_hash
|
topic_hash
|
||||||
@ -2401,10 +2435,10 @@ where
|
|||||||
&self.connected_peers,
|
&self.connected_peers,
|
||||||
topic_hash,
|
topic_hash,
|
||||||
needed_peers,
|
needed_peers,
|
||||||
|peer| {
|
|peer_id| {
|
||||||
!peers.contains(peer)
|
!peers.contains(peer_id)
|
||||||
&& !explicit_peers.contains(peer)
|
&& !explicit_peers.contains(peer_id)
|
||||||
&& score(peer) < publish_threshold
|
&& *scores.get(peer_id).unwrap_or(&0.0) < publish_threshold
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
peers.extend(new_peers);
|
peers.extend(new_peers);
|
||||||
@ -2412,12 +2446,6 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
if self.peer_score.is_some() {
|
if self.peer_score.is_some() {
|
||||||
trace!("Peer_scores: {:?}", {
|
|
||||||
for peer in self.peer_topics.keys() {
|
|
||||||
score(peer);
|
|
||||||
}
|
|
||||||
scores
|
|
||||||
});
|
|
||||||
trace!("Mesh message deliveries: {:?}", {
|
trace!("Mesh message deliveries: {:?}", {
|
||||||
self.mesh
|
self.mesh
|
||||||
.iter()
|
.iter()
|
||||||
@ -2429,7 +2457,7 @@ where
|
|||||||
.map(|p| {
|
.map(|p| {
|
||||||
(
|
(
|
||||||
*p,
|
*p,
|
||||||
peer_score
|
self.peer_score
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.expect("peer_score.is_some()")
|
.expect("peer_score.is_some()")
|
||||||
.0
|
.0
|
||||||
@ -2458,6 +2486,10 @@ where
|
|||||||
self.mcache.shift();
|
self.mcache.shift();
|
||||||
|
|
||||||
debug!("Completed Heartbeat");
|
debug!("Completed Heartbeat");
|
||||||
|
if let Some(metrics) = self.metrics.as_mut() {
|
||||||
|
let duration = u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX);
|
||||||
|
metrics.observe_heartbeat_duration(duration);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Emits gossip - Send IHAVE messages to a random set of gossip peers. This is applied to mesh
|
/// Emits gossip - Send IHAVE messages to a random set of gossip peers. This is applied to mesh
|
||||||
@ -2695,11 +2727,12 @@ where
|
|||||||
}
|
}
|
||||||
.into_protobuf();
|
.into_protobuf();
|
||||||
|
|
||||||
|
let msg_bytes = event.encoded_len();
|
||||||
for peer in recipient_peers.iter() {
|
for peer in recipient_peers.iter() {
|
||||||
debug!("Sending message: {:?} to peer {:?}", msg_id, peer);
|
debug!("Sending message: {:?} to peer {:?}", msg_id, peer);
|
||||||
self.send_message(*peer, event.clone())?;
|
self.send_message(*peer, event.clone())?;
|
||||||
if let Some(m) = self.metrics.as_mut() {
|
if let Some(m) = self.metrics.as_mut() {
|
||||||
m.msg_sent(&message.topic, message.data.len());
|
m.msg_sent(&message.topic, msg_bytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
debug!("Completed forwarding message");
|
debug!("Completed forwarding message");
|
||||||
@ -3110,6 +3143,17 @@ where
|
|||||||
// NOTE: It is possible the peer has already been removed from all mappings if it does not
|
// NOTE: It is possible the peer has already been removed from all mappings if it does not
|
||||||
// support the protocol.
|
// support the protocol.
|
||||||
self.peer_topics.remove(peer_id);
|
self.peer_topics.remove(peer_id);
|
||||||
|
|
||||||
|
// If metrics are enabled, register the disconnection of a peer based on its protocol.
|
||||||
|
if let Some(metrics) = self.metrics.as_mut() {
|
||||||
|
let peer_kind = &self
|
||||||
|
.connected_peers
|
||||||
|
.get(peer_id)
|
||||||
|
.expect("Connected peer must be registered")
|
||||||
|
.kind;
|
||||||
|
metrics.peer_protocol_disconnected(peer_kind.clone());
|
||||||
|
}
|
||||||
|
|
||||||
self.connected_peers.remove(peer_id);
|
self.connected_peers.remove(peer_id);
|
||||||
|
|
||||||
if let Some((peer_score, ..)) = &mut self.peer_score {
|
if let Some((peer_score, ..)) = &mut self.peer_score {
|
||||||
@ -3257,6 +3301,11 @@ where
|
|||||||
match handler_event {
|
match handler_event {
|
||||||
HandlerEvent::PeerKind(kind) => {
|
HandlerEvent::PeerKind(kind) => {
|
||||||
// We have identified the protocol this peer is using
|
// We have identified the protocol this peer is using
|
||||||
|
|
||||||
|
if let Some(metrics) = self.metrics.as_mut() {
|
||||||
|
metrics.peer_protocol_connected(kind.clone());
|
||||||
|
}
|
||||||
|
|
||||||
if let PeerKind::NotSupported = kind {
|
if let PeerKind::NotSupported = kind {
|
||||||
debug!(
|
debug!(
|
||||||
"Peer does not support gossipsub protocols. {}",
|
"Peer does not support gossipsub protocols. {}",
|
||||||
@ -3304,8 +3353,8 @@ where
|
|||||||
for (raw_message, validation_error) in invalid_messages {
|
for (raw_message, validation_error) in invalid_messages {
|
||||||
self.handle_invalid_message(
|
self.handle_invalid_message(
|
||||||
&propagation_source,
|
&propagation_source,
|
||||||
raw_message,
|
&raw_message,
|
||||||
validation_error,
|
RejectReason::ValidationError(validation_error),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -25,11 +25,13 @@ use std::collections::HashMap;
|
|||||||
|
|
||||||
use open_metrics_client::encoding::text::Encode;
|
use open_metrics_client::encoding::text::Encode;
|
||||||
use open_metrics_client::metrics::counter::Counter;
|
use open_metrics_client::metrics::counter::Counter;
|
||||||
use open_metrics_client::metrics::family::Family;
|
use open_metrics_client::metrics::family::{Family, MetricConstructor};
|
||||||
use open_metrics_client::metrics::gauge::Gauge;
|
use open_metrics_client::metrics::gauge::Gauge;
|
||||||
|
use open_metrics_client::metrics::histogram::{linear_buckets, Histogram};
|
||||||
use open_metrics_client::registry::Registry;
|
use open_metrics_client::registry::Registry;
|
||||||
|
|
||||||
use crate::topic::TopicHash;
|
use crate::topic::TopicHash;
|
||||||
|
use crate::types::{MessageAcceptance, PeerKind};
|
||||||
|
|
||||||
// Default value that limits for how many topics do we store metrics.
|
// Default value that limits for how many topics do we store metrics.
|
||||||
const DEFAULT_MAX_TOPICS: usize = 300;
|
const DEFAULT_MAX_TOPICS: usize = 300;
|
||||||
@ -60,48 +62,6 @@ impl Default for Config {
|
|||||||
/// Whether we have ever been subscribed to this topic.
|
/// Whether we have ever been subscribed to this topic.
|
||||||
type EverSubscribed = bool;
|
type EverSubscribed = bool;
|
||||||
|
|
||||||
/// Reasons why a peer was included in the mesh.
|
|
||||||
#[derive(PartialEq, Eq, Hash, Encode, Clone)]
|
|
||||||
pub enum Inclusion {
|
|
||||||
/// Peer was a fanaout peer.
|
|
||||||
Fanaout,
|
|
||||||
/// Included from random selection.
|
|
||||||
Random,
|
|
||||||
/// Peer subscribed.
|
|
||||||
Subscribed,
|
|
||||||
/// Peer was included to fill the outbound quota.
|
|
||||||
Outbound,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Reasons why a peer was removed from the mesh.
|
|
||||||
#[derive(PartialEq, Eq, Hash, Encode, Clone)]
|
|
||||||
pub enum Churn {
|
|
||||||
/// Peer disconnected.
|
|
||||||
Dc,
|
|
||||||
/// Peer had a bad score.
|
|
||||||
BadScore,
|
|
||||||
/// Peer sent a PRUNE.
|
|
||||||
Prune,
|
|
||||||
/// Peer unsubscribed.
|
|
||||||
Unsub,
|
|
||||||
/// Too many peers.
|
|
||||||
Excess,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Label for the mesh inclusion event metrics.
|
|
||||||
#[derive(PartialEq, Eq, Hash, Encode, Clone)]
|
|
||||||
struct InclusionLabel {
|
|
||||||
topic: TopicHash,
|
|
||||||
reason: Inclusion,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Label for the mesh churn event metrics.
|
|
||||||
#[derive(PartialEq, Eq, Hash, Encode, Clone)]
|
|
||||||
struct ChurnLabel {
|
|
||||||
topic: TopicHash,
|
|
||||||
reason: Churn,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A collection of metrics used throughout the Gossipsub behaviour.
|
/// A collection of metrics used throughout the Gossipsub behaviour.
|
||||||
pub struct Metrics {
|
pub struct Metrics {
|
||||||
/* Configuration parameters */
|
/* Configuration parameters */
|
||||||
@ -123,6 +83,14 @@ pub struct Metrics {
|
|||||||
/// Number of peers subscribed to each topic. This allows us to analyze a topic's behaviour
|
/// Number of peers subscribed to each topic. This allows us to analyze a topic's behaviour
|
||||||
/// regardless of our subscription status.
|
/// regardless of our subscription status.
|
||||||
topic_peers_count: Family<TopicHash, Gauge>,
|
topic_peers_count: Family<TopicHash, Gauge>,
|
||||||
|
/// The number of invalid messages received for a given topic.
|
||||||
|
invalid_messages: Family<TopicHash, Counter>,
|
||||||
|
/// The number of messages accepted by the application (validation result).
|
||||||
|
accepted_messages: Family<TopicHash, Counter>,
|
||||||
|
/// The number of messages ignored by the application (validation result).
|
||||||
|
ignored_messages: Family<TopicHash, Counter>,
|
||||||
|
/// The number of messages rejected by the application (validation result).
|
||||||
|
rejected_messages: Family<TopicHash, Counter>,
|
||||||
|
|
||||||
/* Metrics regarding mesh state */
|
/* Metrics regarding mesh state */
|
||||||
/// Number of peers in our mesh. This metric should be updated with the count of peers for a
|
/// Number of peers in our mesh. This metric should be updated with the count of peers for a
|
||||||
@ -133,11 +101,43 @@ pub struct Metrics {
|
|||||||
/// Number of times we remove peers in a topic mesh for different reasons.
|
/// Number of times we remove peers in a topic mesh for different reasons.
|
||||||
mesh_peer_churn_events: Family<ChurnLabel, Counter>,
|
mesh_peer_churn_events: Family<ChurnLabel, Counter>,
|
||||||
|
|
||||||
/* Metrics regarding messages sent */
|
/* Metrics regarding messages sent/received */
|
||||||
/// Number of gossip messages sent to each topic.
|
/// Number of gossip messages sent to each topic.
|
||||||
topic_msg_sent_counts: Family<TopicHash, Counter>,
|
topic_msg_sent_counts: Family<TopicHash, Counter>,
|
||||||
/// Bytes from gossip messages sent to each topic .
|
/// Bytes from gossip messages sent to each topic.
|
||||||
topic_msg_sent_bytes: Family<TopicHash, Counter>,
|
topic_msg_sent_bytes: Family<TopicHash, Counter>,
|
||||||
|
/// Number of gossipsub messages published to each topic.
|
||||||
|
topic_msg_published: Family<TopicHash, Counter>,
|
||||||
|
|
||||||
|
/// Number of gossipsub messages received on each topic (without filtering duplicates).
|
||||||
|
topic_msg_recv_counts_unfiltered: Family<TopicHash, Counter>,
|
||||||
|
/// Number of gossipsub messages received on each topic (after filtering duplicates).
|
||||||
|
topic_msg_recv_counts: Family<TopicHash, Counter>,
|
||||||
|
/// Bytes received from gossip messages for each topic.
|
||||||
|
topic_msg_recv_bytes: Family<TopicHash, Counter>,
|
||||||
|
|
||||||
|
/* Metrics related to scoring */
|
||||||
|
/// Histogram of the scores for each mesh topic.
|
||||||
|
score_per_mesh: Family<TopicHash, Histogram, HistBuilder>,
|
||||||
|
/// A counter of the kind of penalties being applied to peers.
|
||||||
|
scoring_penalties: Family<PenaltyLabel, Counter>,
|
||||||
|
|
||||||
|
/* General Metrics */
|
||||||
|
/// Gossipsub supports floodsub, gossipsub v1.0 and gossipsub v1.1. Peers are classified based
|
||||||
|
/// on which protocol they support. This metric keeps track of the number of peers that are
|
||||||
|
/// connected of each type.
|
||||||
|
peers_per_protocol: Family<ProtocolLabel, Gauge>,
|
||||||
|
/// The time it takes to complete one iteration of the heartbeat.
|
||||||
|
heartbeat_duration: Histogram,
|
||||||
|
|
||||||
|
/* Performance metrics */
|
||||||
|
/// When the user validates a message, it tries to re propagate it to its mesh peers. If the
|
||||||
|
/// message expires from the memcache before it can be validated, we count this a cache miss
|
||||||
|
/// and it is an indicator that the memcache size should be increased.
|
||||||
|
memcache_misses: Counter,
|
||||||
|
/// The number of times we have decided that an IWANT control message is required for this
|
||||||
|
/// topic. A very high metric might indicate an underperforming network.
|
||||||
|
topic_iwant_msgs: Family<TopicHash, Counter>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Metrics {
|
impl Metrics {
|
||||||
@ -165,6 +165,26 @@ impl Metrics {
|
|||||||
"Number of peers subscribed to each topic"
|
"Number of peers subscribed to each topic"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let invalid_messages = register_family!(
|
||||||
|
"invalid_messages_per_topic",
|
||||||
|
"Number of invalid messages received for each topic"
|
||||||
|
);
|
||||||
|
|
||||||
|
let accepted_messages = register_family!(
|
||||||
|
"accepted_messages_per_topic",
|
||||||
|
"Number of accepted messages received for each topic"
|
||||||
|
);
|
||||||
|
|
||||||
|
let ignored_messages = register_family!(
|
||||||
|
"ignored_messages_per_topic",
|
||||||
|
"Number of ignored messages received for each topic"
|
||||||
|
);
|
||||||
|
|
||||||
|
let rejected_messages = register_family!(
|
||||||
|
"accepted_messages_per_topic",
|
||||||
|
"Number of rejected messages received for each topic"
|
||||||
|
);
|
||||||
|
|
||||||
let mesh_peer_counts = register_family!(
|
let mesh_peer_counts = register_family!(
|
||||||
"mesh_peer_counts",
|
"mesh_peer_counts",
|
||||||
"Number of peers in each topic in our mesh"
|
"Number of peers in each topic in our mesh"
|
||||||
@ -177,27 +197,114 @@ impl Metrics {
|
|||||||
"mesh_peer_churn_events",
|
"mesh_peer_churn_events",
|
||||||
"Number of times a peer gets removed from our mesh for different reasons"
|
"Number of times a peer gets removed from our mesh for different reasons"
|
||||||
);
|
);
|
||||||
|
|
||||||
let topic_msg_sent_counts = register_family!(
|
let topic_msg_sent_counts = register_family!(
|
||||||
"topic_msg_sent_counts",
|
"topic_msg_sent_counts",
|
||||||
"Number of gossip messages sent to each topic."
|
"Number of gossip messages sent to each topic"
|
||||||
|
);
|
||||||
|
let topic_msg_published = register_family!(
|
||||||
|
"topic_msg_published",
|
||||||
|
"Number of gossip messages published to each topic"
|
||||||
);
|
);
|
||||||
let topic_msg_sent_bytes = register_family!(
|
let topic_msg_sent_bytes = register_family!(
|
||||||
"topic_msg_sent_bytes",
|
"topic_msg_sent_bytes",
|
||||||
"Bytes from gossip messages sent to each topic."
|
"Bytes from gossip messages sent to each topic"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let topic_msg_recv_counts_unfiltered = register_family!(
|
||||||
|
"topic_msg_recv_counts_unfiltered",
|
||||||
|
"Number of gossip messages received on each topic (without duplicates being filtered)"
|
||||||
|
);
|
||||||
|
|
||||||
|
let topic_msg_recv_counts = register_family!(
|
||||||
|
"topic_msg_recv_counts",
|
||||||
|
"Number of gossip messages received on each topic (after duplicates have been filtered)"
|
||||||
|
);
|
||||||
|
let topic_msg_recv_bytes = register_family!(
|
||||||
|
"topic_msg_recv_bytes",
|
||||||
|
"Bytes received from gossip messages for each topic"
|
||||||
|
);
|
||||||
|
// TODO: Update default variables once a builder pattern is used.
|
||||||
|
let gossip_threshold = -4000.0;
|
||||||
|
let publish_threshold = -8000.0;
|
||||||
|
let greylist_threshold = -16000.0;
|
||||||
|
let histogram_buckets: Vec<f64> = vec![
|
||||||
|
greylist_threshold,
|
||||||
|
publish_threshold,
|
||||||
|
gossip_threshold,
|
||||||
|
gossip_threshold / 2.0,
|
||||||
|
gossip_threshold / 4.0,
|
||||||
|
0.0,
|
||||||
|
1.0,
|
||||||
|
10.0,
|
||||||
|
100.0,
|
||||||
|
];
|
||||||
|
|
||||||
|
let hist_builder = HistBuilder {
|
||||||
|
buckets: histogram_buckets,
|
||||||
|
};
|
||||||
|
|
||||||
|
let score_per_mesh: Family<_, _, HistBuilder> = Family::new_with_constructor(hist_builder);
|
||||||
|
registry.register(
|
||||||
|
"score_per_mesh",
|
||||||
|
"Histogram of scores per mesh topic",
|
||||||
|
Box::new(score_per_mesh.clone()),
|
||||||
|
);
|
||||||
|
|
||||||
|
let scoring_penalties = register_family!(
|
||||||
|
"scoring_penalties",
|
||||||
|
"Counter of types of scoring penalties given to peers"
|
||||||
|
);
|
||||||
|
let peers_per_protocol = register_family!(
|
||||||
|
"peers_per_protocol",
|
||||||
|
"Number of connected peers by protocol type"
|
||||||
|
);
|
||||||
|
|
||||||
|
let heartbeat_duration = Histogram::new(linear_buckets(0.0, 50.0, 10));
|
||||||
|
registry.register(
|
||||||
|
"heartbeat_duration",
|
||||||
|
"Histogram of observed heartbeat durations",
|
||||||
|
Box::new(heartbeat_duration.clone()),
|
||||||
|
);
|
||||||
|
|
||||||
|
let topic_iwant_msgs = register_family!(
|
||||||
|
"topic_iwant_msgs",
|
||||||
|
"Number of times we have decided an IWANT is required for this topic"
|
||||||
|
);
|
||||||
|
let memcache_misses = {
|
||||||
|
let metric = Counter::default();
|
||||||
|
registry.register(
|
||||||
|
"memcache_misses",
|
||||||
|
"Number of times a message is not found in the duplicate cache when validating",
|
||||||
|
Box::new(metric.clone()),
|
||||||
|
);
|
||||||
|
metric
|
||||||
|
};
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
max_topics,
|
max_topics,
|
||||||
max_never_subscribed_topics,
|
max_never_subscribed_topics,
|
||||||
topic_info: HashMap::default(),
|
topic_info: HashMap::default(),
|
||||||
topic_subscription_status,
|
topic_subscription_status,
|
||||||
topic_peers_count,
|
topic_peers_count,
|
||||||
|
invalid_messages,
|
||||||
|
accepted_messages,
|
||||||
|
ignored_messages,
|
||||||
|
rejected_messages,
|
||||||
mesh_peer_counts,
|
mesh_peer_counts,
|
||||||
mesh_peer_inclusion_events,
|
mesh_peer_inclusion_events,
|
||||||
mesh_peer_churn_events,
|
mesh_peer_churn_events,
|
||||||
topic_msg_sent_counts,
|
topic_msg_sent_counts,
|
||||||
topic_msg_sent_bytes,
|
topic_msg_sent_bytes,
|
||||||
|
topic_msg_published,
|
||||||
|
topic_msg_recv_counts_unfiltered,
|
||||||
|
topic_msg_recv_counts,
|
||||||
|
topic_msg_recv_bytes,
|
||||||
|
score_per_mesh,
|
||||||
|
scoring_penalties,
|
||||||
|
peers_per_protocol,
|
||||||
|
heartbeat_duration,
|
||||||
|
memcache_misses,
|
||||||
|
topic_iwant_msgs,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -265,7 +372,7 @@ impl Metrics {
|
|||||||
if self.register_topic(topic).is_ok() {
|
if self.register_topic(topic).is_ok() {
|
||||||
self.mesh_peer_inclusion_events
|
self.mesh_peer_inclusion_events
|
||||||
.get_or_create(&InclusionLabel {
|
.get_or_create(&InclusionLabel {
|
||||||
topic: topic.clone(),
|
hash: topic.to_string(),
|
||||||
reason,
|
reason,
|
||||||
})
|
})
|
||||||
.inc_by(count as u64);
|
.inc_by(count as u64);
|
||||||
@ -277,7 +384,7 @@ impl Metrics {
|
|||||||
if self.register_topic(topic).is_ok() {
|
if self.register_topic(topic).is_ok() {
|
||||||
self.mesh_peer_churn_events
|
self.mesh_peer_churn_events
|
||||||
.get_or_create(&ChurnLabel {
|
.get_or_create(&ChurnLabel {
|
||||||
topic: topic.clone(),
|
hash: topic.to_string(),
|
||||||
reason,
|
reason,
|
||||||
})
|
})
|
||||||
.inc_by(count as u64);
|
.inc_by(count as u64);
|
||||||
@ -292,6 +399,27 @@ impl Metrics {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Register that an invalid message was received on a specific topic.
|
||||||
|
pub fn register_invalid_message(&mut self, topic: &TopicHash) {
|
||||||
|
if self.register_topic(topic).is_ok() {
|
||||||
|
self.invalid_messages.get_or_create(topic).inc();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Register a score penalty.
|
||||||
|
pub fn register_score_penalty(&mut self, penalty: Penalty) {
|
||||||
|
self.scoring_penalties
|
||||||
|
.get_or_create(&PenaltyLabel { penalty })
|
||||||
|
.inc();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Registers that a message was published on a specific topic.
|
||||||
|
pub fn register_published_message(&mut self, topic: &TopicHash) {
|
||||||
|
if self.register_topic(topic).is_ok() {
|
||||||
|
self.topic_msg_published.get_or_create(topic).inc();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Register sending a message over a topic.
|
/// Register sending a message over a topic.
|
||||||
pub fn msg_sent(&mut self, topic: &TopicHash, bytes: usize) {
|
pub fn msg_sent(&mut self, topic: &TopicHash, bytes: usize) {
|
||||||
if self.register_topic(topic).is_ok() {
|
if self.register_topic(topic).is_ok() {
|
||||||
@ -301,4 +429,155 @@ impl Metrics {
|
|||||||
.inc_by(bytes as u64);
|
.inc_by(bytes as u64);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Register that a message was received (and was not a duplicate).
|
||||||
|
pub fn msg_recvd(&mut self, topic: &TopicHash) {
|
||||||
|
if self.register_topic(topic).is_ok() {
|
||||||
|
self.topic_msg_recv_counts.get_or_create(topic).inc();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Register that a message was received (could have been a duplicate).
|
||||||
|
pub fn msg_recvd_unfiltered(&mut self, topic: &TopicHash, bytes: usize) {
|
||||||
|
if self.register_topic(topic).is_ok() {
|
||||||
|
self.topic_msg_recv_counts_unfiltered
|
||||||
|
.get_or_create(topic)
|
||||||
|
.inc();
|
||||||
|
self.topic_msg_recv_bytes
|
||||||
|
.get_or_create(topic)
|
||||||
|
.inc_by(bytes as u64);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn register_msg_validation(&mut self, topic: &TopicHash, validation: &MessageAcceptance) {
|
||||||
|
if self.register_topic(topic).is_ok() {
|
||||||
|
match validation {
|
||||||
|
MessageAcceptance::Accept => self.accepted_messages.get_or_create(topic).inc(),
|
||||||
|
MessageAcceptance::Ignore => self.ignored_messages.get_or_create(topic).inc(),
|
||||||
|
MessageAcceptance::Reject => self.rejected_messages.get_or_create(topic).inc(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Register a memcache miss.
|
||||||
|
pub fn memcache_miss(&mut self) {
|
||||||
|
self.memcache_misses.inc();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Register sending an IWANT msg for this topic.
|
||||||
|
pub fn register_iwant(&mut self, topic: &TopicHash) {
|
||||||
|
if self.register_topic(topic).is_ok() {
|
||||||
|
self.topic_iwant_msgs.get_or_create(topic).inc();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Observes a heartbeat duration.
|
||||||
|
pub fn observe_heartbeat_duration(&mut self, millis: u64) {
|
||||||
|
self.heartbeat_duration.observe(millis as f64);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Observe a score of a mesh peer.
|
||||||
|
pub fn observe_mesh_peers_score(&mut self, topic: &TopicHash, score: f64) {
|
||||||
|
if self.register_topic(topic).is_ok() {
|
||||||
|
self.score_per_mesh.get_or_create(topic).observe(score);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Register a new peers connection based on its protocol.
|
||||||
|
pub fn peer_protocol_connected(&mut self, kind: PeerKind) {
|
||||||
|
self.peers_per_protocol
|
||||||
|
.get_or_create(&ProtocolLabel { protocol: kind })
|
||||||
|
.inc();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Removes a peer from the counter based on its protocol when it disconnects.
|
||||||
|
pub fn peer_protocol_disconnected(&mut self, kind: PeerKind) {
|
||||||
|
let metric = self
|
||||||
|
.peers_per_protocol
|
||||||
|
.get_or_create(&ProtocolLabel { protocol: kind });
|
||||||
|
if metric.get() == 0 {
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
// decrement the counter
|
||||||
|
metric.set(metric.get() - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reasons why a peer was included in the mesh.
|
||||||
|
#[derive(PartialEq, Eq, Hash, Encode, Clone)]
|
||||||
|
pub enum Inclusion {
|
||||||
|
/// Peer was a fanaout peer.
|
||||||
|
Fanout,
|
||||||
|
/// Included from random selection.
|
||||||
|
Random,
|
||||||
|
/// Peer subscribed.
|
||||||
|
Subscribed,
|
||||||
|
/// Peer was included to fill the outbound quota.
|
||||||
|
Outbound,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reasons why a peer was removed from the mesh.
|
||||||
|
#[derive(PartialEq, Eq, Hash, Encode, Clone)]
|
||||||
|
pub enum Churn {
|
||||||
|
/// Peer disconnected.
|
||||||
|
Dc,
|
||||||
|
/// Peer had a bad score.
|
||||||
|
BadScore,
|
||||||
|
/// Peer sent a PRUNE.
|
||||||
|
Prune,
|
||||||
|
/// Peer unsubscribed.
|
||||||
|
Unsub,
|
||||||
|
/// Too many peers.
|
||||||
|
Excess,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Kinds of reasons a peer's score has been penalized
|
||||||
|
#[derive(PartialEq, Eq, Hash, Encode, Clone)]
|
||||||
|
pub enum Penalty {
|
||||||
|
/// A peer grafted before waiting the back-off time.
|
||||||
|
GraftBackoff,
|
||||||
|
/// A Peer did not respond to an IWANT request in time.
|
||||||
|
BrokenPromise,
|
||||||
|
/// A Peer did not send enough messages as expected.
|
||||||
|
MessageDeficit,
|
||||||
|
/// Too many peers under one IP address.
|
||||||
|
IPColocation,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Label for the mesh inclusion event metrics.
|
||||||
|
#[derive(PartialEq, Eq, Hash, Encode, Clone)]
|
||||||
|
struct InclusionLabel {
|
||||||
|
hash: String,
|
||||||
|
reason: Inclusion,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Label for the mesh churn event metrics.
|
||||||
|
#[derive(PartialEq, Eq, Hash, Encode, Clone)]
|
||||||
|
struct ChurnLabel {
|
||||||
|
hash: String,
|
||||||
|
reason: Churn,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Label for the kinds of protocols peers can connect as.
|
||||||
|
#[derive(PartialEq, Eq, Hash, Encode, Clone)]
|
||||||
|
struct ProtocolLabel {
|
||||||
|
protocol: PeerKind,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Label for the kinds of scoring penalties that can occur
|
||||||
|
#[derive(PartialEq, Eq, Hash, Encode, Clone)]
|
||||||
|
struct PenaltyLabel {
|
||||||
|
penalty: Penalty,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct HistBuilder {
|
||||||
|
buckets: Vec<f64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MetricConstructor<Histogram> for HistBuilder {
|
||||||
|
fn new_metric(&self) -> Histogram {
|
||||||
|
Histogram::new(self.buckets.clone().into_iter())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
//!
|
//!
|
||||||
//! Manages and stores the Scoring logic of a particular peer on the gossipsub behaviour.
|
//! Manages and stores the Scoring logic of a particular peer on the gossipsub behaviour.
|
||||||
|
|
||||||
|
use crate::metrics::{Metrics, Penalty};
|
||||||
use crate::time_cache::TimeCache;
|
use crate::time_cache::TimeCache;
|
||||||
use crate::{MessageId, TopicHash};
|
use crate::{MessageId, TopicHash};
|
||||||
use instant::Instant;
|
use instant::Instant;
|
||||||
@ -212,8 +213,14 @@ impl PeerScore {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the score for a peer.
|
/// Returns the score for a peer
|
||||||
pub fn score(&self, peer_id: &PeerId) -> f64 {
|
pub fn score(&self, peer_id: &PeerId) -> f64 {
|
||||||
|
self.metric_score(peer_id, None)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the score for a peer, logging metrics. This is called from the heartbeat and
|
||||||
|
/// increments the metric counts for penalties.
|
||||||
|
pub fn metric_score(&self, peer_id: &PeerId, mut metrics: Option<&mut Metrics>) -> f64 {
|
||||||
let peer_stats = match self.peer_stats.get(peer_id) {
|
let peer_stats = match self.peer_stats.get(peer_id) {
|
||||||
Some(v) => v,
|
Some(v) => v,
|
||||||
None => return 0.0,
|
None => return 0.0,
|
||||||
@ -264,6 +271,9 @@ impl PeerScore {
|
|||||||
- topic_stats.mesh_message_deliveries;
|
- topic_stats.mesh_message_deliveries;
|
||||||
let p3 = deficit * deficit;
|
let p3 = deficit * deficit;
|
||||||
topic_score += p3 * topic_params.mesh_message_deliveries_weight;
|
topic_score += p3 * topic_params.mesh_message_deliveries_weight;
|
||||||
|
if let Some(metrics) = metrics.as_mut() {
|
||||||
|
metrics.register_score_penalty(Penalty::MessageDeficit);
|
||||||
|
}
|
||||||
debug!(
|
debug!(
|
||||||
"[Penalty] The peer {} has a mesh message deliveries deficit of {} in topic\
|
"[Penalty] The peer {} has a mesh message deliveries deficit of {} in topic\
|
||||||
{} and will get penalized by {}",
|
{} and will get penalized by {}",
|
||||||
@ -313,6 +323,9 @@ impl PeerScore {
|
|||||||
if (peers_in_ip as f64) > self.params.ip_colocation_factor_threshold {
|
if (peers_in_ip as f64) > self.params.ip_colocation_factor_threshold {
|
||||||
let surplus = (peers_in_ip as f64) - self.params.ip_colocation_factor_threshold;
|
let surplus = (peers_in_ip as f64) - self.params.ip_colocation_factor_threshold;
|
||||||
let p6 = surplus * surplus;
|
let p6 = surplus * surplus;
|
||||||
|
if let Some(metrics) = metrics.as_mut() {
|
||||||
|
metrics.register_score_penalty(Penalty::IPColocation);
|
||||||
|
}
|
||||||
debug!(
|
debug!(
|
||||||
"[Penalty] The peer {} gets penalized because of too many peers with the ip {}. \
|
"[Penalty] The peer {} gets penalized because of too many peers with the ip {}. \
|
||||||
The surplus is {}. ",
|
The surplus is {}. ",
|
||||||
@ -600,6 +613,7 @@ impl PeerScore {
|
|||||||
"[Penalty] Message from {} rejected because of ValidationError or SelfOrigin",
|
"[Penalty] Message from {} rejected because of ValidationError or SelfOrigin",
|
||||||
from
|
from
|
||||||
);
|
);
|
||||||
|
|
||||||
self.mark_invalid_message_delivery(from, topic_hash);
|
self.mark_invalid_message_delivery(from, topic_hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,6 +22,8 @@
|
|||||||
use crate::rpc_proto;
|
use crate::rpc_proto;
|
||||||
use crate::TopicHash;
|
use crate::TopicHash;
|
||||||
use libp2p_core::{connection::ConnectionId, PeerId};
|
use libp2p_core::{connection::ConnectionId, PeerId};
|
||||||
|
use open_metrics_client::encoding::text::Encode;
|
||||||
|
use prost::Message;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
|
|
||||||
@ -89,7 +91,7 @@ pub struct PeerConnections {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Describes the types of peers that can exist in the gossipsub context.
|
/// Describes the types of peers that can exist in the gossipsub context.
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq, Hash, Encode, Eq)]
|
||||||
pub enum PeerKind {
|
pub enum PeerKind {
|
||||||
/// A gossipsub 1.1 peer.
|
/// A gossipsub 1.1 peer.
|
||||||
Gossipsubv1_1,
|
Gossipsubv1_1,
|
||||||
@ -126,6 +128,21 @@ pub struct RawGossipsubMessage {
|
|||||||
pub validated: bool,
|
pub validated: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl RawGossipsubMessage {
|
||||||
|
/// Calculates the encoded length of this message (used for calculating metrics).
|
||||||
|
pub fn raw_protobuf_len(&self) -> usize {
|
||||||
|
let message = rpc_proto::Message {
|
||||||
|
from: self.source.map(|m| m.to_bytes()),
|
||||||
|
data: Some(self.data.clone()),
|
||||||
|
seqno: self.sequence_number.map(|s| s.to_be_bytes().to_vec()),
|
||||||
|
topic: TopicHash::into_string(self.topic.clone()),
|
||||||
|
signature: self.signature.clone(),
|
||||||
|
key: self.key.clone(),
|
||||||
|
};
|
||||||
|
message.encoded_len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// The message sent to the user after a [`RawGossipsubMessage`] has been transformed by a
|
/// The message sent to the user after a [`RawGossipsubMessage`] has been transformed by a
|
||||||
/// [`crate::DataTransform`].
|
/// [`crate::DataTransform`].
|
||||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||||
|
@ -37,7 +37,7 @@ use libp2p_gossipsub::{
|
|||||||
ValidationMode,
|
ValidationMode,
|
||||||
};
|
};
|
||||||
use libp2p_plaintext::PlainText2Config;
|
use libp2p_plaintext::PlainText2Config;
|
||||||
use libp2p_swarm::{dial_opts::DialOpts, Swarm, SwarmEvent};
|
use libp2p_swarm::{Swarm, SwarmEvent};
|
||||||
use libp2p_yamux as yamux;
|
use libp2p_yamux as yamux;
|
||||||
|
|
||||||
struct Graph {
|
struct Graph {
|
||||||
|
Reference in New Issue
Block a user