Kademlia Records (#1144)

* initial implementation of the records

* move to multihash keys

* correctly process query results

* comments and formatting

* correctly return closer_peers in query

* checking wrong peer id in test

* Apply suggestions from code review

Co-Authored-By: Roman Borschel <romanb@users.noreply.github.com>

* Fix changes from suggestions

* Send responses to PUT_VALUE requests

* Shortcut in get_value

* Update protocols/kad/src/behaviour.rs

Co-Authored-By: Roman Borschel <romanb@users.noreply.github.com>

* Revert "Update protocols/kad/src/behaviour.rs"

This reverts commit 579ce742a7f4c94587f1e1f0866d2a3a37418efb.

* Remove duplicate insertion

* Adds a record to a PUT_VALUE response

* Fix a racy put_value test

* Store value ourselves only if we are in K closest

* Abstract over storage

* Revert "Abstract over storage": bad take

This reverts commit eaebf5b6d915712eaf3b05929577fdf697f204d8.

* Abstract over records storage using hashmap as default

* Constructor for custom records

* New Record type and its traits

* Fix outdated storage name

* Fixes returning an event

* Change FindNodeReq key type to Multihash

* WriteState for a second stage of a PUT_VALUE request

* GET_VALUE should not have a record

* Refactor a match arm

* Add successes and failures counters to PutValueRes

* If value is found no need to return closer peers

* Remove a custo storage from tests

* Rename a test to get_value_not_found

* Adds a TODO to change FindNode request key to Multihash

Co-Authored-By: Roman Borschel <romanb@users.noreply.github.com>

* Move MemoryRecordStorage to record.rs

* Return a Cow-ed Record from get

* Fix incorrect GET_VALUE parsing

* Various fixes with review

* Fixes get_value_not_found

* Fix peerids names in test

* another fix

* PutValue correctly distributes values

* Simplify the test

* Check that results are actually the closest

* Reverts changes to tests

* Fix the test topology and checking the results

* Run put_value test ten times

* Adds a get_value test

* Apply suggestions from code review

Co-Authored-By: Roman Borschel <romanb@users.noreply.github.com>

* Make Record fields public

* Moves WriteState to write.rs

* A couple of minor fixes

* Another few fixes of review

* Simplify the put_value test

* Dont synchronously return an error from put_value

* Formatting fixes and comments

* Collect a bunch of results

* Take exactly as much elements as neede

* Check if the peer is still connected

* Adds a multiple GetValueResults results number test

* Unnecessary mut iterators in put_value

* Ask for num_results in get_value

* Dont allocate twice in get_value

* Dont count same errored peer multiple times

* Apply suggestions from code review

Co-Authored-By: Roman Borschel <romanb@users.noreply.github.com>

* Fix another review

* Apply suggestions from code review

Co-Authored-By: Pierre Krieger <pierre.krieger1708@gmail.com>

* Bring back FromIterator and improve a panic message

* Update protocols/kad/src/behaviour.rs

Co-Authored-By: Pierre Krieger <pierre.krieger1708@gmail.com>
This commit is contained in:
Fedor Sakharov
2019-06-04 14:44:24 +03:00
committed by Pierre Krieger
parent 603fd5744f
commit 22527e7eb6
8 changed files with 1037 additions and 29 deletions

View File

@@ -23,20 +23,22 @@ use crate::handler::{KademliaHandler, KademliaHandlerEvent, KademliaHandlerIn};
use crate::kbucket::{self, KBucketsTable, NodeStatus};
use crate::protocol::{KadConnectionType, KadPeer};
use crate::query::{QueryConfig, QueryState, QueryStatePollOut};
use crate::write::WriteState;
use crate::record::{MemoryRecordStorage, RecordStore, Record, RecordStorageError};
use fnv::{FnvHashMap, FnvHashSet};
use futures::{prelude::*, stream};
use libp2p_core::swarm::{ConnectedPoint, NetworkBehaviour, NetworkBehaviourAction, PollParameters};
use libp2p_core::{protocols_handler::ProtocolsHandler, Multiaddr, PeerId};
use multihash::Multihash;
use smallvec::SmallVec;
use std::{borrow::Cow, error, marker::PhantomData, time::Duration};
use std::{borrow::Cow, error, iter::FromIterator, marker::PhantomData, num::NonZeroU8, time::Duration};
use tokio_io::{AsyncRead, AsyncWrite};
use wasm_timer::{Instant, Interval};
mod test;
/// Network behaviour that handles Kademlia.
pub struct Kademlia<TSubstream> {
pub struct Kademlia<TSubstream, TRecordStorage: RecordStore = MemoryRecordStorage> {
/// Storage for the nodes. Contains the known multiaddresses for this node.
kbuckets: KBucketsTable<PeerId, Addresses>,
@@ -47,6 +49,9 @@ pub struct Kademlia<TSubstream> {
/// is the list of accumulated providers for `GET_PROVIDERS` queries.
active_queries: FnvHashMap<QueryId, QueryState<QueryInfo, PeerId>>,
/// All the `PUT_VALUE` actions we are currently performing
active_writes: FnvHashMap<QueryId, WriteState<PeerId, Multihash>>,
/// List of peers the swarm is connected to.
connected_peers: FnvHashSet<PeerId>,
@@ -89,6 +94,9 @@ pub struct Kademlia<TSubstream> {
/// Marker to pin the generics.
marker: PhantomData<TSubstream>,
/// The records that we keep.
records: TRecordStorage,
}
/// Opaque type. Each query that we start gets a unique number.
@@ -131,6 +139,24 @@ enum QueryInfoInner {
/// Which hash we're targetting.
target: Multihash,
},
/// Put the value to the dht records
PutValue {
/// The key of the record being inserted
key: Multihash,
/// The value of the record being inserted
value: Vec<u8>,
},
/// Get value from the dht record
GetValue {
/// The key we're looking for
key: Multihash,
/// The results from peers are stored here
results: Vec<Record>,
/// The number of results to look for.
num_results: usize,
},
}
impl Into<kbucket::Key<QueryInfo>> for QueryInfo {
@@ -146,6 +172,8 @@ impl AsRef<[u8]> for QueryInfo {
QueryInfoInner::FindPeer(peer) => peer.as_ref(),
QueryInfoInner::GetProviders { target, .. } => target.as_bytes(),
QueryInfoInner::AddProvider { target } => target.as_bytes(),
QueryInfoInner::GetValue { key, .. } => key.as_bytes(),
QueryInfoInner::PutValue { key, .. } => key.as_bytes(),
}
}
}
@@ -155,11 +183,12 @@ impl QueryInfo {
fn to_rpc_request<TUserData>(&self, user_data: TUserData) -> KademliaHandlerIn<TUserData> {
match &self.inner {
QueryInfoInner::Initialization { target } => KademliaHandlerIn::FindNodeReq {
key: target.clone(),
key: target.clone().into(),
user_data,
},
QueryInfoInner::FindPeer(key) => KademliaHandlerIn::FindNodeReq {
key: key.clone(),
// TODO: Change the `key` of `QueryInfoInner::FindPeer` to be a `Multihash`.
key: key.clone().into(),
user_data,
},
QueryInfoInner::GetProviders { target, .. } => KademliaHandlerIn::GetProvidersReq {
@@ -170,35 +199,62 @@ impl QueryInfo {
key: unimplemented!(), // TODO: target.clone(),
user_data,
},
QueryInfoInner::GetValue { key, .. } => KademliaHandlerIn::GetValue {
key: key.clone(),
user_data,
},
QueryInfoInner::PutValue { key, .. } => KademliaHandlerIn::FindNodeReq {
key: key.clone(),
user_data,
}
}
}
}
impl<TSubstream> Kademlia<TSubstream> {
impl<TSubstream, TRecordStorage> Kademlia<TSubstream, TRecordStorage>
where
TRecordStorage: RecordStore
{
/// Creates a `Kademlia`.
#[inline]
pub fn new(local_peer_id: PeerId) -> Self {
Self::new_inner(local_peer_id)
pub fn new(local_peer_id: PeerId) -> Self
where
TRecordStorage: Default
{
Self::new_inner(local_peer_id, Default::default())
}
/// The same as `new`, but using a custom protocol name.
///
/// Kademlia nodes only communicate with other nodes using the same protocol name. Using a
/// custom name therefore allows to segregate the DHT from others, if that is desired.
pub fn with_protocol_name(local_peer_id: PeerId, name: impl Into<Cow<'static, [u8]>>) -> Self {
let mut me = Kademlia::new_inner(local_peer_id);
pub fn with_protocol_name(local_peer_id: PeerId, name: impl Into<Cow<'static, [u8]>>) -> Self
where
TRecordStorage: Default
{
let mut me = Kademlia::new_inner(local_peer_id, Default::default());
me.protocol_name_override = Some(name.into());
me
}
/// The same as `new`, but with a custom storage.
///
/// The default records storage is in memory, this lets override the
/// storage with user-defined behaviour
pub fn with_storage(local_peer_id: PeerId, records: TRecordStorage) -> Self {
Self::new_inner(local_peer_id, records)
}
/// Creates a `Kademlia`.
///
/// Contrary to `new`, doesn't perform the initialization queries that store our local ID into
/// the DHT and fill our buckets.
#[inline]
#[deprecated(note="this function is now equivalent to new() and will be removed in the future")]
pub fn without_init(local_peer_id: PeerId) -> Self {
Self::new_inner(local_peer_id)
pub fn without_init(local_peer_id: PeerId) -> Self
where TRecordStorage: Default
{
Self::new_inner(local_peer_id, Default::default())
}
/// Adds a known address of a peer participating in the Kademlia DHT to the
@@ -242,7 +298,7 @@ impl<TSubstream> Kademlia<TSubstream> {
}
/// Inner implementation of the constructors.
fn new_inner(local_peer_id: PeerId) -> Self {
fn new_inner(local_peer_id: PeerId, records: TRecordStorage) -> Self {
let parallelism = 3;
Kademlia {
@@ -250,6 +306,7 @@ impl<TSubstream> Kademlia<TSubstream> {
protocol_name_override: None,
queued_events: SmallVec::new(),
active_queries: Default::default(),
active_writes: Default::default(),
connected_peers: Default::default(),
pending_rpcs: SmallVec::with_capacity(parallelism),
next_query_id: QueryId(0),
@@ -261,6 +318,7 @@ impl<TSubstream> Kademlia<TSubstream> {
rpc_timeout: Duration::from_secs(8),
add_provider: SmallVec::new(),
marker: PhantomData,
records,
}
}
@@ -283,6 +341,45 @@ impl<TSubstream> Kademlia<TSubstream> {
self.start_query(QueryInfoInner::GetProviders { target, pending_results: Vec::new() });
}
/// Starts an iterative `GET_VALUE` request.
///
/// Returns a number of results that is in the interval [1, 20],
/// if the user requested a larger amount of results it is cropped to 20.
pub fn get_value(&mut self, key: &Multihash, num_results: NonZeroU8) {
let num_results = usize::min(num_results.get() as usize, kbucket::MAX_NODES_PER_BUCKET);
let mut results = Vec::with_capacity(num_results);
if let Some(record) = self.records.get(key) {
results.push(record.into_owned());
if num_results == 1 {
self.queued_events.push(NetworkBehaviourAction::GenerateEvent(
KademliaOut::GetValueResult(
GetValueResult::Found { results }
)));
return;
}
}
self.start_query(QueryInfoInner::GetValue {
key: key.clone(),
results,
num_results
});
}
/// Starts an iterative `PUT_VALUE` request
pub fn put_value(&mut self, key: Multihash, value: Vec<u8>) {
if let Err(error) = self.records.put(Record { key: key.clone(), value: value.clone() }) {
self.queued_events.push(NetworkBehaviourAction::GenerateEvent(
KademliaOut::PutValueResult(
PutValueResult::Err { key, cause: error }
)
));
} else {
self.start_query(QueryInfoInner::PutValue { key, value });
}
}
/// Register the local node as the provider for the given key.
///
/// This will periodically send `ADD_PROVIDER` messages to the nodes closest to the key. When
@@ -454,9 +551,10 @@ impl<TSubstream> Kademlia<TSubstream> {
}
}
impl<TSubstream> NetworkBehaviour for Kademlia<TSubstream>
impl<TSubstream, TRecordStorage> NetworkBehaviour for Kademlia<TSubstream, TRecordStorage>
where
TSubstream: AsyncRead + AsyncWrite,
TRecordStorage: RecordStore,
{
type ProtocolsHandler = KademliaHandler<TSubstream, QueryId>;
type OutEvent = KademliaOut;
@@ -532,12 +630,18 @@ where
for query in self.active_queries.values_mut() {
query.inject_rpc_error(peer_id);
}
for write in self.active_writes.values_mut() {
write.inject_write_error(peer_id);
}
}
fn inject_disconnected(&mut self, id: &PeerId, _old_endpoint: ConnectedPoint) {
for query in self.active_queries.values_mut() {
query.inject_rpc_error(id);
}
for write in self.active_writes.values_mut() {
write.inject_write_error(id);
}
self.connection_updated(id.clone(), None, NodeStatus::Disconnected);
self.connected_peers.remove(id);
}
@@ -613,6 +717,10 @@ where
if let Some(query) = self.active_queries.get_mut(&user_data) {
query.inject_rpc_error(&source)
}
if let Some(write) = self.active_writes.get_mut(&user_data) {
write.inject_write_error(&source);
}
}
KademliaHandlerEvent::AddProvider { key, provider_peer } => {
self.queued_events.push(NetworkBehaviourAction::GenerateEvent(KademliaOut::Discovered {
@@ -623,6 +731,94 @@ where
self.add_provider.push((key, provider_peer.node_id));
return;
}
KademliaHandlerEvent::GetValue { key, request_id } => {
let (result, closer_peers) = match self.records.get(&key) {
Some(record) => {
(Some(record.into_owned()), Vec::new())
},
None => {
let closer_peers = self.find_closest(&kbucket::Key::from(key), &source);
(None, closer_peers)
}
};
self.queued_events.push(NetworkBehaviourAction::SendEvent {
peer_id: source,
event: KademliaHandlerIn::GetValueRes {
result,
closer_peers,
request_id,
},
});
}
KademliaHandlerEvent::GetValueRes {
result,
closer_peers,
user_data,
} => {
let mut finished_query = None;
if let Some(query) = self.active_queries.get_mut(&user_data) {
if let QueryInfoInner::GetValue {
key: _,
results,
num_results,
} = &mut query.target_mut().inner {
if let Some(result) = result {
results.push(result);
if results.len() == *num_results {
finished_query = Some(user_data);
}
}
}
}
if let Some(finished_query) = finished_query {
let (query_info, _) = self
.active_queries
.remove(&finished_query)
.expect("finished_query was gathered when peeking into active_queries; QED.")
.into_target_and_closest_peers();
match query_info.inner {
QueryInfoInner::GetValue { key: _, results, .. } => {
let result = GetValueResult::Found { results };
let event = KademliaOut::GetValueResult(result);
self.queued_events.push(NetworkBehaviourAction::GenerateEvent(event));
}
// TODO: write a better proof
_ => panic!("unexpected query_info.inner variant for a get_value result; QED.")
}
}
self.discovered(&user_data, &source, closer_peers.iter());
}
KademliaHandlerEvent::PutValue {
key,
value,
request_id
} => {
// TODO: Log errors and immediately reset the stream on error instead of letting the request time out.
if let Ok(()) = self.records.put(Record { key: key.clone(), value: value.clone() }) {
self.queued_events.push(NetworkBehaviourAction::SendEvent {
peer_id: source,
event: KademliaHandlerIn::PutValueRes {
key,
value,
request_id,
},
});
}
}
KademliaHandlerEvent::PutValueRes {
key: _,
user_data,
} => {
if let Some(write) = self.active_writes.get_mut(&user_data) {
write.inject_write_success(&source);
}
}
};
}
@@ -712,6 +908,29 @@ where
}
}
let finished_write = self.active_writes.iter()
.find_map(|(&query_id, write)|
if write.done() {
Some(query_id)
} else {
None
});
if let Some(finished_write) = finished_write {
let (t, successes, failures) = self
.active_writes
.remove(&finished_write)
.expect("finished_write was gathered when iterating active_writes; QED.")
.into_inner();
let event = KademliaOut::PutValueResult(PutValueResult::Ok {
key: t,
successes,
failures,
});
break Async::Ready(NetworkBehaviourAction::GenerateEvent(event));
}
if let Some(finished_query) = finished_query {
let (query_info, closer_peers) = self
.active_queries
@@ -753,6 +972,43 @@ where
self.queued_events.push(event);
}
},
QueryInfoInner::GetValue { key: _, results, .. } => {
let result = match results.len() {
0 => GetValueResult::NotFound{
closest_peers: closer_peers.collect()
},
_ => GetValueResult::Found{ results },
};
let event = KademliaOut::GetValueResult(result);
break Async::Ready(NetworkBehaviourAction::GenerateEvent(event));
},
QueryInfoInner::PutValue { key, value } => {
let closer_peers = Vec::from_iter(closer_peers);
for peer in &closer_peers {
let event = KademliaHandlerIn::PutValue {
key: key.clone(),
value: value.clone(),
user_data: finished_query,
};
if self.connected_peers.contains(peer) {
let event = NetworkBehaviourAction::SendEvent {
peer_id: peer.clone(),
event
};
self.queued_events.push(event);
} else {
self.pending_rpcs.push((peer.clone(), event));
self.queued_events.push(NetworkBehaviourAction::DialPeer {
peer_id: peer.clone(),
});
}
}
self.active_writes.insert(finished_query, WriteState::new(key, closer_peers));
},
}
} else {
break Async::NotReady;
@@ -761,6 +1017,24 @@ where
}
}
/// The result of a `GET_VALUE` query.
#[derive(Debug, Clone, PartialEq)]
pub enum GetValueResult {
/// The results received from peers. Always contains non-zero number of results.
Found { results: Vec<Record> },
/// The record wasn't found.
NotFound { closest_peers: Vec<PeerId> }
}
/// The result of a `PUT_VALUE` query.
#[derive(Debug, Clone, PartialEq)]
pub enum PutValueResult {
/// The value has been put successfully.
Ok { key: Multihash, successes: usize, failures: usize },
/// The put value failed.
Err { key: Multihash, cause: RecordStorageError }
}
/// Output event of the `Kademlia` behaviour.
#[derive(Debug, Clone)]
pub enum KademliaOut {
@@ -802,6 +1076,12 @@ pub enum KademliaOut {
/// List of peers ordered from closest to furthest away.
closer_peers: Vec<PeerId>,
},
/// Result of a `GET_VALUE` query
GetValueResult(GetValueResult),
/// Result of a `PUT_VALUE` query
PutValueResult(PutValueResult),
}
impl From<kbucket::EntryView<PeerId, Addresses>> for KadPeer {