protocols/kad: Implement S-Kademlia's lookup over disjoint paths v2 (#1473)

The extension paper S-Kademlia includes a proposal for lookups over
disjoint paths. Within vanilla Kademlia, queries keep track of the
closest nodes in a single bucket. Any adversary along the path can thus
influence all future paths, in case they can come up with the
next-closest (not overall closest) hops. S-Kademlia tries to solve the
attack above by querying over disjoint paths using multiple buckets.

To adjust the libp2p Kademlia implementation accordingly this change-set
introduces an additional peers iterator: `ClosestDisjointPeersIter`.
This new iterator wraps around a set of `ClosestPeersIter`
`ClosestDisjointPeersIter` enforces that each of the `ClosestPeersIter`
explore disjoint paths by having each peer instantly return that was
queried by a different iterator before.
This commit is contained in:
Max Inden
2020-06-19 12:22:26 +02:00
committed by GitHub
parent 00fc223487
commit 9dd2d662e9
10 changed files with 1432 additions and 120 deletions

View File

@ -22,14 +22,15 @@
use super::*;
use crate::{ALPHA_VALUE, K_VALUE};
use crate::K_VALUE;
use crate::kbucket::Distance;
use crate::record::store::MemoryStore;
use crate::record::{Key, store::MemoryStore};
use futures::{
prelude::*,
executor::block_on,
future::poll_fn,
};
use futures_timer::Delay;
use libp2p_core::{
PeerId,
Transport,
@ -43,8 +44,8 @@ use libp2p_secio::SecioConfig;
use libp2p_swarm::Swarm;
use libp2p_yamux as yamux;
use quickcheck::*;
use rand::{Rng, random, thread_rng};
use std::{collections::{HashSet, HashMap}, io, num::NonZeroUsize, u64};
use rand::{Rng, random, thread_rng, rngs::StdRng, SeedableRng};
use std::{collections::{HashSet, HashMap}, time::Duration, io, num::NonZeroUsize, u64};
use multihash::{wrap, Code, Multihash};
type TestSwarm = Swarm<Kademlia<MemoryStore>>;
@ -132,21 +133,45 @@ fn random_multihash() -> Multihash {
wrap(Code::Sha2_256, &thread_rng().gen::<[u8; 32]>())
}
#[derive(Clone, Debug)]
struct Seed([u8; 32]);
impl Arbitrary for Seed {
fn arbitrary<G: Gen>(g: &mut G) -> Seed {
Seed(g.gen())
}
}
#[test]
fn bootstrap() {
fn run(rng: &mut impl Rng) {
let num_total = rng.gen_range(2, 20);
// When looking for the closest node to a key, Kademlia considers ALPHA_VALUE nodes to query
// at initialization. If `num_groups` is larger than ALPHA_VALUE the remaining locally known
// nodes will not be considered. Given that no other node is aware of them, they would be
// lost entirely. To prevent the above restrict `num_groups` to be equal or smaller than
// ALPHA_VALUE.
let num_group = rng.gen_range(1, (num_total % ALPHA_VALUE.get()) + 2);
fn prop(seed: Seed) {
let mut rng = StdRng::from_seed(seed.0);
let mut swarms = build_connected_nodes(num_total, num_group).into_iter()
let num_total = rng.gen_range(2, 20);
// When looking for the closest node to a key, Kademlia considers
// K_VALUE nodes to query at initialization. If `num_group` is larger
// than K_VALUE the remaining locally known nodes will not be
// considered. Given that no other node is aware of them, they would be
// lost entirely. To prevent the above restrict `num_group` to be equal
// or smaller than K_VALUE.
let num_group = rng.gen_range(1, (num_total % K_VALUE.get()) + 2);
let mut cfg = KademliaConfig::default();
if rng.gen() {
cfg.disjoint_query_paths(true);
}
let mut swarms = build_connected_nodes_with_config(
num_total,
num_group,
cfg,
).into_iter()
.map(|(_a, s)| s)
.collect::<Vec<_>>();
let swarm_ids: Vec<_> = swarms.iter().map(Swarm::local_peer_id).cloned().collect();
let swarm_ids: Vec<_> = swarms.iter()
.map(Swarm::local_peer_id)
.cloned()
.collect();
let qid = swarms[0].bootstrap().unwrap();
@ -190,10 +215,7 @@ fn bootstrap() {
)
}
let mut rng = thread_rng();
for _ in 0 .. 10 {
run(&mut rng)
}
QuickCheck::new().tests(10).quickcheck(prop as fn(_) -> _)
}
#[test]
@ -415,16 +437,22 @@ fn get_record_not_found() {
)
}
/// A node joining a fully connected network via a single bootnode should be able to put a record to
/// the X closest nodes of the network where X is equal to the configured replication factor.
/// A node joining a fully connected network via three (ALPHA_VALUE) bootnodes
/// should be able to put a record to the X closest nodes of the network where X
/// is equal to the configured replication factor.
#[test]
fn put_record() {
fn prop(replication_factor: usize, records: Vec<Record>) {
let replication_factor = NonZeroUsize::new(replication_factor % (K_VALUE.get() / 2) + 1).unwrap();
let num_total = replication_factor.get() * 2;
fn prop(records: Vec<Record>, seed: Seed) {
let mut rng = StdRng::from_seed(seed.0);
let replication_factor = NonZeroUsize::new(rng.gen_range(1, (K_VALUE.get() / 2) + 1)).unwrap();
// At least 4 nodes, 1 under test + 3 bootnodes.
let num_total = usize::max(4, replication_factor.get() * 2);
let mut config = KademliaConfig::default();
config.set_replication_factor(replication_factor);
if rng.gen() {
config.disjoint_query_paths(true);
}
let mut swarms = {
let mut fully_connected_swarms = build_fully_connected_nodes_with_config(
@ -433,10 +461,13 @@ fn put_record() {
);
let mut single_swarm = build_node_with_config(config);
single_swarm.1.add_address(
Swarm::local_peer_id(&fully_connected_swarms[0].1),
fully_connected_swarms[0].0.clone(),
);
// Connect `single_swarm` to three bootnodes.
for i in 0..3 {
single_swarm.1.add_address(
Swarm::local_peer_id(&fully_connected_swarms[i].1),
fully_connected_swarms[i].0.clone(),
);
}
let mut swarms = vec![single_swarm];
swarms.append(&mut fully_connected_swarms);
@ -618,11 +649,13 @@ fn get_record() {
loop {
match swarm.poll_next_unpin(ctx) {
Poll::Ready(Some(KademliaEvent::QueryResult {
id, result: QueryResult::GetRecord(Ok(ok)), ..
id,
result: QueryResult::GetRecord(Ok(GetRecordOk { records })),
..
})) => {
assert_eq!(id, qid);
assert_eq!(ok.records.len(), 1);
assert_eq!(ok.records.first(), Some(&record));
assert_eq!(records.len(), 1);
assert_eq!(records.first().unwrap().record, record);
return Poll::Ready(());
}
// Ignore any other event.
@ -662,11 +695,13 @@ fn get_record_many() {
loop {
match swarm.poll_next_unpin(ctx) {
Poll::Ready(Some(KademliaEvent::QueryResult {
id, result: QueryResult::GetRecord(Ok(ok)), ..
id,
result: QueryResult::GetRecord(Ok(GetRecordOk { records })),
..
})) => {
assert_eq!(id, qid);
assert_eq!(ok.records.len(), num_results);
assert_eq!(ok.records.first(), Some(&record));
assert_eq!(records.len(), num_results);
assert_eq!(records.first().unwrap().record, record);
return Poll::Ready(());
}
// Ignore any other event.
@ -681,17 +716,22 @@ fn get_record_many() {
)
}
/// A node joining a fully connected network via a single bootnode should be able to add itself as a
/// provider to the X closest nodes of the network where X is equal to the configured replication
/// factor.
/// A node joining a fully connected network via three (ALPHA_VALUE) bootnodes
/// should be able to add itself as a provider to the X closest nodes of the
/// network where X is equal to the configured replication factor.
#[test]
fn add_provider() {
fn prop(replication_factor: usize, keys: Vec<record::Key>) {
let replication_factor = NonZeroUsize::new(replication_factor % (K_VALUE.get() / 2) + 1).unwrap();
let num_total = replication_factor.get() * 2;
fn prop(keys: Vec<record::Key>, seed: Seed) {
let mut rng = StdRng::from_seed(seed.0);
let replication_factor = NonZeroUsize::new(rng.gen_range(1, (K_VALUE.get() / 2) + 1)).unwrap();
// At least 4 nodes, 1 under test + 3 bootnodes.
let num_total = usize::max(4, replication_factor.get() * 2);
let mut config = KademliaConfig::default();
config.set_replication_factor(replication_factor);
if rng.gen() {
config.disjoint_query_paths(true);
}
let mut swarms = {
let mut fully_connected_swarms = build_fully_connected_nodes_with_config(
@ -700,10 +740,13 @@ fn add_provider() {
);
let mut single_swarm = build_node_with_config(config);
single_swarm.1.add_address(
Swarm::local_peer_id(&fully_connected_swarms[0].1),
fully_connected_swarms[0].0.clone(),
);
// Connect `single_swarm` to three bootnodes.
for i in 0..3 {
single_swarm.1.add_address(
Swarm::local_peer_id(&fully_connected_swarms[i].1),
fully_connected_swarms[i].0.clone(),
);
}
let mut swarms = vec![single_swarm];
swarms.append(&mut fully_connected_swarms);
@ -877,3 +920,135 @@ fn exp_decr_expiration_overflow() {
quickcheck(prop_no_panic as fn(_, _))
}
#[test]
fn disjoint_query_does_not_finish_before_all_paths_did() {
let mut config = KademliaConfig::default();
config.disjoint_query_paths(true);
// I.e. setting the amount disjoint paths to be explored to 2.
config.set_parallelism(NonZeroUsize::new(2).unwrap());
let mut alice = build_node_with_config(config);
let mut trudy = build_node(); // Trudy the intrudor, an adversary.
let mut bob = build_node();
let key = Key::new(&multihash::Sha2_256::digest(&thread_rng().gen::<[u8; 32]>()));
let record_bob = Record::new(key.clone(), b"bob".to_vec());
let record_trudy = Record::new(key.clone(), b"trudy".to_vec());
// Make `bob` and `trudy` aware of their version of the record searched by
// `alice`.
bob.1.store.put(record_bob.clone()).unwrap();
trudy.1.store.put(record_trudy.clone()).unwrap();
// Make `trudy` and `bob` known to `alice`.
alice.1.add_address(Swarm::local_peer_id(&trudy.1), trudy.0.clone());
alice.1.add_address(Swarm::local_peer_id(&bob.1), bob.0.clone());
// Drop the swarm addresses.
let (mut alice, mut bob, mut trudy) = (alice.1, bob.1, trudy.1);
// Have `alice` query the Dht for `key` with a quorum of 1.
alice.get_record(&key, Quorum::One);
// The default peer timeout is 10 seconds. Choosing 1 seconds here should
// give enough head room to prevent connections to `bob` to time out.
let mut before_timeout = Delay::new(Duration::from_secs(1));
// Poll only `alice` and `trudy` expecting `alice` not yet to return a query
// result as it is not able to connect to `bob` just yet.
block_on(
poll_fn(|ctx| {
for (i, swarm) in [&mut alice, &mut trudy].iter_mut().enumerate() {
loop {
match swarm.poll_next_unpin(ctx) {
Poll::Ready(Some(KademliaEvent::QueryResult{
result: QueryResult::GetRecord(result),
..
})) => {
if i != 0 {
panic!("Expected `QueryResult` from Alice.")
}
match result {
Ok(_) => panic!(
"Expected query not to finish until all \
disjoint paths have been explored.",
),
Err(e) => panic!("{:?}", e),
}
}
// Ignore any other event.
Poll::Ready(Some(_)) => (),
Poll::Ready(None) => panic!("Expected Kademlia behaviour not to finish."),
Poll::Pending => break,
}
}
}
// Make sure not to wait until connections to `bob` time out.
before_timeout.poll_unpin(ctx)
})
);
// Make sure `alice` has exactly one query with `trudy`'s record only.
assert_eq!(1, alice.queries.iter().count());
alice.queries.iter().for_each(|q| {
match &q.inner.info {
QueryInfo::GetRecord{ records, .. } => {
assert_eq!(
*records,
vec![PeerRecord {
peer: Some(Swarm::local_peer_id(&trudy).clone()),
record: record_trudy.clone(),
}],
);
},
i @ _ => panic!("Unexpected query info: {:?}", i),
}
});
// Poll `alice` and `bob` expecting `alice` to return a successful query
// result as it is now able to explore the second disjoint path.
let records = block_on(
poll_fn(|ctx| {
for (i, swarm) in [&mut alice, &mut bob].iter_mut().enumerate() {
loop {
match swarm.poll_next_unpin(ctx) {
Poll::Ready(Some(KademliaEvent::QueryResult{
result: QueryResult::GetRecord(result),
..
})) => {
if i != 0 {
panic!("Expected `QueryResult` from Alice.")
}
match result {
Ok(ok) => return Poll::Ready(ok.records),
Err(e) => unreachable!("{:?}", e),
}
}
// Ignore any other event.
Poll::Ready(Some(_)) => (),
Poll::Ready(None) => panic!(
"Expected Kademlia behaviour not to finish.",
),
Poll::Pending => break,
}
}
}
Poll::Pending
})
);
assert_eq!(2, records.len());
assert!(records.contains(&PeerRecord {
peer: Some(Swarm::local_peer_id(&bob).clone()),
record: record_bob,
}));
assert!(records.contains(&PeerRecord {
peer: Some(Swarm::local_peer_id(&trudy).clone()),
record: record_trudy,
}));
}