2019-07-03 16:16:25 +02:00
|
|
|
// Copyright 2019 Parity Technologies (UK) Ltd.
|
2018-03-15 15:18:21 +01:00
|
|
|
//
|
|
|
|
// Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
// copy of this software and associated documentation files (the "Software"),
|
|
|
|
// to deal in the Software without restriction, including without limitation
|
|
|
|
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
// and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
// Software is furnished to do so, subject to the following conditions:
|
|
|
|
//
|
|
|
|
// The above copyright notice and this permission notice shall be included in
|
|
|
|
// all copies or substantial portions of the Software.
|
|
|
|
//
|
|
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
|
|
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
|
|
// DEALINGS IN THE SOFTWARE.
|
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
mod peers;
|
2018-11-29 12:11:35 +01:00
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
use peers::PeersIterState;
|
2020-06-19 12:22:26 +02:00
|
|
|
use peers::closest::{ClosestPeersIterConfig, ClosestPeersIter, disjoint::ClosestDisjointPeersIter};
|
2019-07-03 16:16:25 +02:00
|
|
|
use peers::fixed::FixedPeersIter;
|
2018-11-29 12:11:35 +01:00
|
|
|
|
2020-06-19 12:22:26 +02:00
|
|
|
use crate::{ALPHA_VALUE, K_VALUE};
|
2019-07-03 16:16:25 +02:00
|
|
|
use crate::kbucket::{Key, KeyBytes};
|
|
|
|
use either::Either;
|
|
|
|
use fnv::FnvHashMap;
|
|
|
|
use libp2p_core::PeerId;
|
Kademlia: Somewhat complete the records implementation. (#1189)
* Somewhat complete the implementation of Kademlia records.
This commit relates to [libp2p-146] and [libp2p-1089].
* All records expire (by default, configurable).
* Provider records are also stored in the RecordStore, and the RecordStore
API extended.
* Background jobs for periodic (re-)replication and (re-)publication
of records. Regular (value-)records are subject to re-replication and
re-publication as per standard Kademlia. Provider records are only
subject to re-publication.
* For standard Kademlia value lookups (quorum = 1), the record is cached
at the closest peer to the key that did not return the value, as per
standard Kademlia.
* Expiration times of regular (value-)records is computed exponentially
inversely proportional to the number of nodes between the local node
and the closest node known to the key (beyond the k closest), as per
standard Kademlia.
The protobuf messages are extended with two fields: `ttl` and `publisher`
in order to implement the different semantics of re-replication (by any
of the k closest peers to the key, not affecting expiry) and re-publication
(by the original publisher, resetting the expiry). This is not done yet in
other libp2p Kademlia implementations, see e.g. [libp2p-go-323]. The new protobuf fields
have been given somewhat unique identifiers to prevent future collision.
Similarly, periodic re-publication of provider records does not seem to
be done yet in other implementations, see e.g. [libp2p-js-98].
[libp2p-146]: https://github.com/libp2p/rust-libp2p/issues/146
[libp2p-1089]: https://github.com/libp2p/rust-libp2p/issues/1089
[libp2p-go-323]: https://github.com/libp2p/go-libp2p-kad-dht/issues/323
[libp2p-js-98]: https://github.com/libp2p/js-libp2p-kad-dht/issues/98
* Tweak kad-ipfs example.
* Add missing files.
* Ensure new delays are polled immediately.
To ensure task notification, since `NotReady` is returned right after.
* Fix ipfs-kad example and use wasm_timer.
* Small cleanup.
* Incorporate some feedback.
* Adjustments after rebase.
* Distinguish events further.
In order for a user to easily distinguish the result of e.g.
a `put_record` operation from the result of a later republication,
different event constructors are used. Furthermore, for now,
re-replication and "caching" of records (at the closest peer to
the key that did not return a value during a successful lookup)
do not yield events for now as they are less interesting.
* Speed up tests for CI.
* Small refinements and more documentation.
* Guard a node against overriding records for which it considers
itself to be the publisher.
* Document the jobs module more extensively.
* More inline docs around removal of "unreachable" addresses.
* Remove wildcard re-exports.
* Use NonZeroUsize for the constants.
* Re-add method lost on merge.
* Add missing 'pub'.
* Further increase the timeout in the ipfs-kad example.
* Readd log dependency to libp2p-kad.
* Simplify RecordStore API slightly.
* Some more commentary.
* Change Addresses::remove to return Result<(),()>.
Change the semantics of `Addresses::remove` so that the error case
is unambiguous, instead of the success case. Use the `Result` for
clearer semantics to that effect.
* Add some documentation to .
2019-07-17 14:40:48 +02:00
|
|
|
use std::{time::Duration, num::NonZeroUsize};
|
2019-07-03 16:16:25 +02:00
|
|
|
use wasm_timer::Instant;
|
2018-11-29 12:11:35 +01:00
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// A `QueryPool` provides an aggregate state machine for driving `Query`s to completion.
|
|
|
|
///
|
|
|
|
/// Internally, a `Query` is in turn driven by an underlying `QueryPeerIter`
|
|
|
|
/// that determines the peer selection strategy, i.e. the order in which the
|
|
|
|
/// peers involved in the query should be contacted.
|
|
|
|
pub struct QueryPool<TInner> {
|
|
|
|
next_id: usize,
|
|
|
|
config: QueryConfig,
|
|
|
|
queries: FnvHashMap<QueryId, Query<TInner>>,
|
2018-06-07 17:15:19 +02:00
|
|
|
}
|
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// The observable states emitted by [`QueryPool::poll`].
|
|
|
|
pub enum QueryPoolState<'a, TInner> {
|
|
|
|
/// The pool is idle, i.e. there are no queries to process.
|
|
|
|
Idle,
|
|
|
|
/// At least one query is waiting for results. `Some(request)` indicates
|
|
|
|
/// that a new request is now being waited on.
|
Kademlia: Somewhat complete the records implementation. (#1189)
* Somewhat complete the implementation of Kademlia records.
This commit relates to [libp2p-146] and [libp2p-1089].
* All records expire (by default, configurable).
* Provider records are also stored in the RecordStore, and the RecordStore
API extended.
* Background jobs for periodic (re-)replication and (re-)publication
of records. Regular (value-)records are subject to re-replication and
re-publication as per standard Kademlia. Provider records are only
subject to re-publication.
* For standard Kademlia value lookups (quorum = 1), the record is cached
at the closest peer to the key that did not return the value, as per
standard Kademlia.
* Expiration times of regular (value-)records is computed exponentially
inversely proportional to the number of nodes between the local node
and the closest node known to the key (beyond the k closest), as per
standard Kademlia.
The protobuf messages are extended with two fields: `ttl` and `publisher`
in order to implement the different semantics of re-replication (by any
of the k closest peers to the key, not affecting expiry) and re-publication
(by the original publisher, resetting the expiry). This is not done yet in
other libp2p Kademlia implementations, see e.g. [libp2p-go-323]. The new protobuf fields
have been given somewhat unique identifiers to prevent future collision.
Similarly, periodic re-publication of provider records does not seem to
be done yet in other implementations, see e.g. [libp2p-js-98].
[libp2p-146]: https://github.com/libp2p/rust-libp2p/issues/146
[libp2p-1089]: https://github.com/libp2p/rust-libp2p/issues/1089
[libp2p-go-323]: https://github.com/libp2p/go-libp2p-kad-dht/issues/323
[libp2p-js-98]: https://github.com/libp2p/js-libp2p-kad-dht/issues/98
* Tweak kad-ipfs example.
* Add missing files.
* Ensure new delays are polled immediately.
To ensure task notification, since `NotReady` is returned right after.
* Fix ipfs-kad example and use wasm_timer.
* Small cleanup.
* Incorporate some feedback.
* Adjustments after rebase.
* Distinguish events further.
In order for a user to easily distinguish the result of e.g.
a `put_record` operation from the result of a later republication,
different event constructors are used. Furthermore, for now,
re-replication and "caching" of records (at the closest peer to
the key that did not return a value during a successful lookup)
do not yield events for now as they are less interesting.
* Speed up tests for CI.
* Small refinements and more documentation.
* Guard a node against overriding records for which it considers
itself to be the publisher.
* Document the jobs module more extensively.
* More inline docs around removal of "unreachable" addresses.
* Remove wildcard re-exports.
* Use NonZeroUsize for the constants.
* Re-add method lost on merge.
* Add missing 'pub'.
* Further increase the timeout in the ipfs-kad example.
* Readd log dependency to libp2p-kad.
* Simplify RecordStore API slightly.
* Some more commentary.
* Change Addresses::remove to return Result<(),()>.
Change the semantics of `Addresses::remove` so that the error case
is unambiguous, instead of the success case. Use the `Result` for
clearer semantics to that effect.
* Add some documentation to .
2019-07-17 14:40:48 +02:00
|
|
|
Waiting(Option<(&'a mut Query<TInner>, PeerId)>),
|
2019-07-03 16:16:25 +02:00
|
|
|
/// A query has finished.
|
|
|
|
Finished(Query<TInner>),
|
|
|
|
/// A query has timed out.
|
|
|
|
Timeout(Query<TInner>)
|
2018-03-15 15:18:21 +01:00
|
|
|
}
|
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
impl<TInner> QueryPool<TInner> {
|
|
|
|
/// Creates a new `QueryPool` with the given configuration.
|
|
|
|
pub fn new(config: QueryConfig) -> Self {
|
|
|
|
QueryPool {
|
|
|
|
next_id: 0,
|
|
|
|
config,
|
|
|
|
queries: Default::default()
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
}
|
|
|
|
}
|
2018-03-15 15:18:21 +01:00
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// Gets a reference to the `QueryConfig` used by the pool.
|
|
|
|
pub fn config(&self) -> &QueryConfig {
|
|
|
|
&self.config
|
|
|
|
}
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// Returns an iterator over the queries in the pool.
|
|
|
|
pub fn iter(&self) -> impl Iterator<Item = &Query<TInner>> {
|
|
|
|
self.queries.values()
|
|
|
|
}
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
|
Kademlia: Somewhat complete the records implementation. (#1189)
* Somewhat complete the implementation of Kademlia records.
This commit relates to [libp2p-146] and [libp2p-1089].
* All records expire (by default, configurable).
* Provider records are also stored in the RecordStore, and the RecordStore
API extended.
* Background jobs for periodic (re-)replication and (re-)publication
of records. Regular (value-)records are subject to re-replication and
re-publication as per standard Kademlia. Provider records are only
subject to re-publication.
* For standard Kademlia value lookups (quorum = 1), the record is cached
at the closest peer to the key that did not return the value, as per
standard Kademlia.
* Expiration times of regular (value-)records is computed exponentially
inversely proportional to the number of nodes between the local node
and the closest node known to the key (beyond the k closest), as per
standard Kademlia.
The protobuf messages are extended with two fields: `ttl` and `publisher`
in order to implement the different semantics of re-replication (by any
of the k closest peers to the key, not affecting expiry) and re-publication
(by the original publisher, resetting the expiry). This is not done yet in
other libp2p Kademlia implementations, see e.g. [libp2p-go-323]. The new protobuf fields
have been given somewhat unique identifiers to prevent future collision.
Similarly, periodic re-publication of provider records does not seem to
be done yet in other implementations, see e.g. [libp2p-js-98].
[libp2p-146]: https://github.com/libp2p/rust-libp2p/issues/146
[libp2p-1089]: https://github.com/libp2p/rust-libp2p/issues/1089
[libp2p-go-323]: https://github.com/libp2p/go-libp2p-kad-dht/issues/323
[libp2p-js-98]: https://github.com/libp2p/js-libp2p-kad-dht/issues/98
* Tweak kad-ipfs example.
* Add missing files.
* Ensure new delays are polled immediately.
To ensure task notification, since `NotReady` is returned right after.
* Fix ipfs-kad example and use wasm_timer.
* Small cleanup.
* Incorporate some feedback.
* Adjustments after rebase.
* Distinguish events further.
In order for a user to easily distinguish the result of e.g.
a `put_record` operation from the result of a later republication,
different event constructors are used. Furthermore, for now,
re-replication and "caching" of records (at the closest peer to
the key that did not return a value during a successful lookup)
do not yield events for now as they are less interesting.
* Speed up tests for CI.
* Small refinements and more documentation.
* Guard a node against overriding records for which it considers
itself to be the publisher.
* Document the jobs module more extensively.
* More inline docs around removal of "unreachable" addresses.
* Remove wildcard re-exports.
* Use NonZeroUsize for the constants.
* Re-add method lost on merge.
* Add missing 'pub'.
* Further increase the timeout in the ipfs-kad example.
* Readd log dependency to libp2p-kad.
* Simplify RecordStore API slightly.
* Some more commentary.
* Change Addresses::remove to return Result<(),()>.
Change the semantics of `Addresses::remove` so that the error case
is unambiguous, instead of the success case. Use the `Result` for
clearer semantics to that effect.
* Add some documentation to .
2019-07-17 14:40:48 +02:00
|
|
|
/// Gets the current size of the pool, i.e. the number of running queries.
|
|
|
|
pub fn size(&self) -> usize {
|
|
|
|
self.queries.len()
|
|
|
|
}
|
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// Returns an iterator that allows modifying each query in the pool.
|
|
|
|
pub fn iter_mut(&mut self) -> impl Iterator<Item = &mut Query<TInner>> {
|
|
|
|
self.queries.values_mut()
|
|
|
|
}
|
2019-05-17 17:27:57 +02:00
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// Adds a query to the pool that contacts a fixed set of peers.
|
|
|
|
pub fn add_fixed<I>(&mut self, peers: I, inner: TInner) -> QueryId
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
where
|
2020-05-13 16:47:02 +02:00
|
|
|
I: IntoIterator<Item = PeerId>
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
{
|
2020-05-16 10:43:09 +02:00
|
|
|
let id = self.next_query_id();
|
|
|
|
self.continue_fixed(id, peers, inner);
|
|
|
|
id
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Continues an earlier query with a fixed set of peers, reusing
|
|
|
|
/// the given query ID, which must be from a query that finished
|
|
|
|
/// earlier.
|
|
|
|
pub fn continue_fixed<I>(&mut self, id: QueryId, peers: I, inner: TInner)
|
|
|
|
where
|
|
|
|
I: IntoIterator<Item = PeerId>
|
|
|
|
{
|
|
|
|
assert!(!self.queries.contains_key(&id));
|
2020-06-19 12:22:26 +02:00
|
|
|
let parallelism = self.config.replication_factor;
|
2019-07-03 16:16:25 +02:00
|
|
|
let peer_iter = QueryPeerIter::Fixed(FixedPeersIter::new(peers, parallelism));
|
2020-05-16 10:43:09 +02:00
|
|
|
let query = Query::new(id, peer_iter, inner);
|
|
|
|
self.queries.insert(id, query);
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
}
|
2019-01-15 17:25:09 +01:00
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// Adds a query to the pool that iterates towards the closest peers to the target.
|
|
|
|
pub fn add_iter_closest<T, I>(&mut self, target: T, peers: I, inner: TInner) -> QueryId
|
2020-05-16 10:43:09 +02:00
|
|
|
where
|
2020-06-19 12:22:26 +02:00
|
|
|
T: Into<KeyBytes> + Clone,
|
2020-05-16 10:43:09 +02:00
|
|
|
I: IntoIterator<Item = Key<PeerId>>
|
|
|
|
{
|
|
|
|
let id = self.next_query_id();
|
|
|
|
self.continue_iter_closest(id, target, peers, inner);
|
|
|
|
id
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Adds a query to the pool that iterates towards the closest peers to the target.
|
|
|
|
pub fn continue_iter_closest<T, I>(&mut self, id: QueryId, target: T, peers: I, inner: TInner)
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
where
|
2020-06-19 12:22:26 +02:00
|
|
|
T: Into<KeyBytes> + Clone,
|
2019-07-03 16:16:25 +02:00
|
|
|
I: IntoIterator<Item = Key<PeerId>>
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
{
|
2019-07-03 16:16:25 +02:00
|
|
|
let cfg = ClosestPeersIterConfig {
|
2020-06-19 12:22:26 +02:00
|
|
|
num_results: self.config.replication_factor,
|
|
|
|
parallelism: self.config.parallelism,
|
2019-07-03 16:16:25 +02:00
|
|
|
.. ClosestPeersIterConfig::default()
|
|
|
|
};
|
2020-06-19 12:22:26 +02:00
|
|
|
|
|
|
|
let peer_iter = if self.config.disjoint_query_paths {
|
|
|
|
QueryPeerIter::ClosestDisjoint(
|
|
|
|
ClosestDisjointPeersIter::with_config(cfg, target, peers),
|
|
|
|
)
|
|
|
|
} else {
|
|
|
|
QueryPeerIter::Closest(ClosestPeersIter::with_config(cfg, target, peers))
|
|
|
|
};
|
|
|
|
|
2020-05-16 10:43:09 +02:00
|
|
|
let query = Query::new(id, peer_iter, inner);
|
|
|
|
self.queries.insert(id, query);
|
2018-11-29 12:11:35 +01:00
|
|
|
}
|
2018-03-15 15:18:21 +01:00
|
|
|
|
2020-05-16 10:43:09 +02:00
|
|
|
fn next_query_id(&mut self) -> QueryId {
|
2019-07-03 16:16:25 +02:00
|
|
|
let id = QueryId(self.next_id);
|
|
|
|
self.next_id = self.next_id.wrapping_add(1);
|
|
|
|
id
|
2018-03-15 15:18:21 +01:00
|
|
|
}
|
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// Returns a reference to a query with the given ID, if it is in the pool.
|
|
|
|
pub fn get(&self, id: &QueryId) -> Option<&Query<TInner>> {
|
|
|
|
self.queries.get(id)
|
2019-03-18 18:20:57 +01:00
|
|
|
}
|
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// Returns a mutablereference to a query with the given ID, if it is in the pool.
|
|
|
|
pub fn get_mut(&mut self, id: &QueryId) -> Option<&mut Query<TInner>> {
|
|
|
|
self.queries.get_mut(id)
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
}
|
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// Polls the pool to advance the queries.
|
2020-07-27 20:27:33 +00:00
|
|
|
pub fn poll(&mut self, now: Instant) -> QueryPoolState<'_, TInner> {
|
2019-07-03 16:16:25 +02:00
|
|
|
let mut finished = None;
|
|
|
|
let mut timeout = None;
|
|
|
|
let mut waiting = None;
|
2018-03-15 15:18:21 +01:00
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
for (&query_id, query) in self.queries.iter_mut() {
|
2020-05-16 10:43:09 +02:00
|
|
|
query.stats.start = query.stats.start.or(Some(now));
|
2019-07-03 16:16:25 +02:00
|
|
|
match query.next(now) {
|
|
|
|
PeersIterState::Finished => {
|
|
|
|
finished = Some(query_id);
|
|
|
|
break
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
}
|
2019-07-03 16:16:25 +02:00
|
|
|
PeersIterState::Waiting(Some(peer_id)) => {
|
|
|
|
let peer = peer_id.into_owned();
|
|
|
|
waiting = Some((query_id, peer));
|
|
|
|
break
|
|
|
|
}
|
|
|
|
PeersIterState::Waiting(None) | PeersIterState::WaitingAtCapacity => {
|
2020-05-16 10:43:09 +02:00
|
|
|
let elapsed = now - query.stats.start.unwrap_or(now);
|
2019-07-03 16:16:25 +02:00
|
|
|
if elapsed >= self.config.timeout {
|
|
|
|
timeout = Some(query_id);
|
|
|
|
break
|
|
|
|
}
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
}
|
Fix self-dialing in Kademlia. (#1097)
* Fix self-dialing in Kademlia.
Addresses https://github.com/libp2p/rust-libp2p/issues/341 which is the cause
for one of the observations made in https://github.com/libp2p/rust-libp2p/issues/1053.
However, the latter is not assumed to be fully addressed by these changes and
needs further investigation.
Currently, whenever a search for a key yields a response containing the initiating
peer as one of the closest peers known to the remote, the local node
would attempt to dial itself. That attempt is ignored by the Swarm, but
the Kademlia behaviour now believes it still has a query ongoing which is
always doomed to time out. That timeout delays successful completion of the query.
Hence, any query where a remote responds with the ID of the local node takes at
least as long as the `rpc_timeout` to complete, which possibly affects almost
all queries in smaller clusters where every node knows about every other.
This problem is fixed here by ensuring that Kademlia never tries to dial the local node.
Furthermore, `Discovered` events are no longer emitted for the local node
and it is not inserted into the `untrusted_addresses` from discovery, as described
in #341.
This commit also includes a change to the condition for freezing / terminating
a Kademlia query upon receiving a response. Specifically, the condition is
tightened such that it only applies if in addition to `parallelism`
consecutive responses that failed to yield a peer closer to the target, the
last response must also either not have reported any new peer or the
number of collected peers has already reached the number of desired results.
In effect, a Kademlia query now tries harder to actually return `k`
closest peers.
Tests have been refactored and expanded.
* Add another comment.
2019-05-02 21:43:29 +02:00
|
|
|
}
|
2018-11-29 12:11:35 +01:00
|
|
|
}
|
2018-03-15 15:18:21 +01:00
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
if let Some((query_id, peer_id)) = waiting {
|
Kademlia: Somewhat complete the records implementation. (#1189)
* Somewhat complete the implementation of Kademlia records.
This commit relates to [libp2p-146] and [libp2p-1089].
* All records expire (by default, configurable).
* Provider records are also stored in the RecordStore, and the RecordStore
API extended.
* Background jobs for periodic (re-)replication and (re-)publication
of records. Regular (value-)records are subject to re-replication and
re-publication as per standard Kademlia. Provider records are only
subject to re-publication.
* For standard Kademlia value lookups (quorum = 1), the record is cached
at the closest peer to the key that did not return the value, as per
standard Kademlia.
* Expiration times of regular (value-)records is computed exponentially
inversely proportional to the number of nodes between the local node
and the closest node known to the key (beyond the k closest), as per
standard Kademlia.
The protobuf messages are extended with two fields: `ttl` and `publisher`
in order to implement the different semantics of re-replication (by any
of the k closest peers to the key, not affecting expiry) and re-publication
(by the original publisher, resetting the expiry). This is not done yet in
other libp2p Kademlia implementations, see e.g. [libp2p-go-323]. The new protobuf fields
have been given somewhat unique identifiers to prevent future collision.
Similarly, periodic re-publication of provider records does not seem to
be done yet in other implementations, see e.g. [libp2p-js-98].
[libp2p-146]: https://github.com/libp2p/rust-libp2p/issues/146
[libp2p-1089]: https://github.com/libp2p/rust-libp2p/issues/1089
[libp2p-go-323]: https://github.com/libp2p/go-libp2p-kad-dht/issues/323
[libp2p-js-98]: https://github.com/libp2p/js-libp2p-kad-dht/issues/98
* Tweak kad-ipfs example.
* Add missing files.
* Ensure new delays are polled immediately.
To ensure task notification, since `NotReady` is returned right after.
* Fix ipfs-kad example and use wasm_timer.
* Small cleanup.
* Incorporate some feedback.
* Adjustments after rebase.
* Distinguish events further.
In order for a user to easily distinguish the result of e.g.
a `put_record` operation from the result of a later republication,
different event constructors are used. Furthermore, for now,
re-replication and "caching" of records (at the closest peer to
the key that did not return a value during a successful lookup)
do not yield events for now as they are less interesting.
* Speed up tests for CI.
* Small refinements and more documentation.
* Guard a node against overriding records for which it considers
itself to be the publisher.
* Document the jobs module more extensively.
* More inline docs around removal of "unreachable" addresses.
* Remove wildcard re-exports.
* Use NonZeroUsize for the constants.
* Re-add method lost on merge.
* Add missing 'pub'.
* Further increase the timeout in the ipfs-kad example.
* Readd log dependency to libp2p-kad.
* Simplify RecordStore API slightly.
* Some more commentary.
* Change Addresses::remove to return Result<(),()>.
Change the semantics of `Addresses::remove` so that the error case
is unambiguous, instead of the success case. Use the `Result` for
clearer semantics to that effect.
* Add some documentation to .
2019-07-17 14:40:48 +02:00
|
|
|
let query = self.queries.get_mut(&query_id).expect("s.a.");
|
2019-07-03 16:16:25 +02:00
|
|
|
return QueryPoolState::Waiting(Some((query, peer_id)))
|
2018-11-29 12:11:35 +01:00
|
|
|
}
|
2018-06-07 17:15:19 +02:00
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
if let Some(query_id) = finished {
|
2020-05-16 10:43:09 +02:00
|
|
|
let mut query = self.queries.remove(&query_id).expect("s.a.");
|
|
|
|
query.stats.end = Some(now);
|
2019-07-03 16:16:25 +02:00
|
|
|
return QueryPoolState::Finished(query)
|
|
|
|
}
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
if let Some(query_id) = timeout {
|
2020-05-16 10:43:09 +02:00
|
|
|
let mut query = self.queries.remove(&query_id).expect("s.a.");
|
|
|
|
query.stats.end = Some(now);
|
2019-07-03 16:16:25 +02:00
|
|
|
return QueryPoolState::Timeout(query)
|
2018-03-15 15:18:21 +01:00
|
|
|
}
|
2018-11-29 12:11:35 +01:00
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
if self.queries.is_empty() {
|
|
|
|
return QueryPoolState::Idle
|
2018-11-29 12:11:35 +01:00
|
|
|
} else {
|
2019-07-03 16:16:25 +02:00
|
|
|
return QueryPoolState::Waiting(None)
|
2018-03-15 15:18:21 +01:00
|
|
|
}
|
2018-11-29 12:11:35 +01:00
|
|
|
}
|
2019-07-03 16:16:25 +02:00
|
|
|
}
|
2018-03-15 15:18:21 +01:00
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// Unique identifier for an active query.
|
|
|
|
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
|
|
|
|
pub struct QueryId(usize);
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// The configuration for queries in a `QueryPool`.
|
Kademlia: Somewhat complete the records implementation. (#1189)
* Somewhat complete the implementation of Kademlia records.
This commit relates to [libp2p-146] and [libp2p-1089].
* All records expire (by default, configurable).
* Provider records are also stored in the RecordStore, and the RecordStore
API extended.
* Background jobs for periodic (re-)replication and (re-)publication
of records. Regular (value-)records are subject to re-replication and
re-publication as per standard Kademlia. Provider records are only
subject to re-publication.
* For standard Kademlia value lookups (quorum = 1), the record is cached
at the closest peer to the key that did not return the value, as per
standard Kademlia.
* Expiration times of regular (value-)records is computed exponentially
inversely proportional to the number of nodes between the local node
and the closest node known to the key (beyond the k closest), as per
standard Kademlia.
The protobuf messages are extended with two fields: `ttl` and `publisher`
in order to implement the different semantics of re-replication (by any
of the k closest peers to the key, not affecting expiry) and re-publication
(by the original publisher, resetting the expiry). This is not done yet in
other libp2p Kademlia implementations, see e.g. [libp2p-go-323]. The new protobuf fields
have been given somewhat unique identifiers to prevent future collision.
Similarly, periodic re-publication of provider records does not seem to
be done yet in other implementations, see e.g. [libp2p-js-98].
[libp2p-146]: https://github.com/libp2p/rust-libp2p/issues/146
[libp2p-1089]: https://github.com/libp2p/rust-libp2p/issues/1089
[libp2p-go-323]: https://github.com/libp2p/go-libp2p-kad-dht/issues/323
[libp2p-js-98]: https://github.com/libp2p/js-libp2p-kad-dht/issues/98
* Tweak kad-ipfs example.
* Add missing files.
* Ensure new delays are polled immediately.
To ensure task notification, since `NotReady` is returned right after.
* Fix ipfs-kad example and use wasm_timer.
* Small cleanup.
* Incorporate some feedback.
* Adjustments after rebase.
* Distinguish events further.
In order for a user to easily distinguish the result of e.g.
a `put_record` operation from the result of a later republication,
different event constructors are used. Furthermore, for now,
re-replication and "caching" of records (at the closest peer to
the key that did not return a value during a successful lookup)
do not yield events for now as they are less interesting.
* Speed up tests for CI.
* Small refinements and more documentation.
* Guard a node against overriding records for which it considers
itself to be the publisher.
* Document the jobs module more extensively.
* More inline docs around removal of "unreachable" addresses.
* Remove wildcard re-exports.
* Use NonZeroUsize for the constants.
* Re-add method lost on merge.
* Add missing 'pub'.
* Further increase the timeout in the ipfs-kad example.
* Readd log dependency to libp2p-kad.
* Simplify RecordStore API slightly.
* Some more commentary.
* Change Addresses::remove to return Result<(),()>.
Change the semantics of `Addresses::remove` so that the error case
is unambiguous, instead of the success case. Use the `Result` for
clearer semantics to that effect.
* Add some documentation to .
2019-07-17 14:40:48 +02:00
|
|
|
#[derive(Debug, Clone)]
|
2019-07-03 16:16:25 +02:00
|
|
|
pub struct QueryConfig {
|
2020-06-19 12:22:26 +02:00
|
|
|
/// Timeout of a single query.
|
|
|
|
///
|
|
|
|
/// See [`crate::behaviour::KademliaConfig::set_query_timeout`] for details.
|
2019-07-03 16:16:25 +02:00
|
|
|
pub timeout: Duration,
|
2020-06-19 12:22:26 +02:00
|
|
|
|
|
|
|
/// The replication factor to use.
|
|
|
|
///
|
|
|
|
/// See [`crate::behaviour::KademliaConfig::set_replication_factor`] for details.
|
Kademlia: Somewhat complete the records implementation. (#1189)
* Somewhat complete the implementation of Kademlia records.
This commit relates to [libp2p-146] and [libp2p-1089].
* All records expire (by default, configurable).
* Provider records are also stored in the RecordStore, and the RecordStore
API extended.
* Background jobs for periodic (re-)replication and (re-)publication
of records. Regular (value-)records are subject to re-replication and
re-publication as per standard Kademlia. Provider records are only
subject to re-publication.
* For standard Kademlia value lookups (quorum = 1), the record is cached
at the closest peer to the key that did not return the value, as per
standard Kademlia.
* Expiration times of regular (value-)records is computed exponentially
inversely proportional to the number of nodes between the local node
and the closest node known to the key (beyond the k closest), as per
standard Kademlia.
The protobuf messages are extended with two fields: `ttl` and `publisher`
in order to implement the different semantics of re-replication (by any
of the k closest peers to the key, not affecting expiry) and re-publication
(by the original publisher, resetting the expiry). This is not done yet in
other libp2p Kademlia implementations, see e.g. [libp2p-go-323]. The new protobuf fields
have been given somewhat unique identifiers to prevent future collision.
Similarly, periodic re-publication of provider records does not seem to
be done yet in other implementations, see e.g. [libp2p-js-98].
[libp2p-146]: https://github.com/libp2p/rust-libp2p/issues/146
[libp2p-1089]: https://github.com/libp2p/rust-libp2p/issues/1089
[libp2p-go-323]: https://github.com/libp2p/go-libp2p-kad-dht/issues/323
[libp2p-js-98]: https://github.com/libp2p/js-libp2p-kad-dht/issues/98
* Tweak kad-ipfs example.
* Add missing files.
* Ensure new delays are polled immediately.
To ensure task notification, since `NotReady` is returned right after.
* Fix ipfs-kad example and use wasm_timer.
* Small cleanup.
* Incorporate some feedback.
* Adjustments after rebase.
* Distinguish events further.
In order for a user to easily distinguish the result of e.g.
a `put_record` operation from the result of a later republication,
different event constructors are used. Furthermore, for now,
re-replication and "caching" of records (at the closest peer to
the key that did not return a value during a successful lookup)
do not yield events for now as they are less interesting.
* Speed up tests for CI.
* Small refinements and more documentation.
* Guard a node against overriding records for which it considers
itself to be the publisher.
* Document the jobs module more extensively.
* More inline docs around removal of "unreachable" addresses.
* Remove wildcard re-exports.
* Use NonZeroUsize for the constants.
* Re-add method lost on merge.
* Add missing 'pub'.
* Further increase the timeout in the ipfs-kad example.
* Readd log dependency to libp2p-kad.
* Simplify RecordStore API slightly.
* Some more commentary.
* Change Addresses::remove to return Result<(),()>.
Change the semantics of `Addresses::remove` so that the error case
is unambiguous, instead of the success case. Use the `Result` for
clearer semantics to that effect.
* Add some documentation to .
2019-07-17 14:40:48 +02:00
|
|
|
pub replication_factor: NonZeroUsize,
|
2020-06-19 12:22:26 +02:00
|
|
|
|
|
|
|
/// Allowed level of parallelism for iterative queries.
|
|
|
|
///
|
|
|
|
/// See [`crate::behaviour::KademliaConfig::set_parallelism`] for details.
|
|
|
|
pub parallelism: NonZeroUsize,
|
|
|
|
|
|
|
|
/// Whether to use disjoint paths on iterative lookups.
|
|
|
|
///
|
|
|
|
/// See [`crate::behaviour::KademliaConfig::disjoint_query_paths`] for details.
|
|
|
|
pub disjoint_query_paths: bool,
|
2019-07-03 16:16:25 +02:00
|
|
|
}
|
2019-03-18 18:20:57 +01:00
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
impl Default for QueryConfig {
|
|
|
|
fn default() -> Self {
|
|
|
|
QueryConfig {
|
|
|
|
timeout: Duration::from_secs(60),
|
2020-06-19 12:22:26 +02:00
|
|
|
replication_factor: NonZeroUsize::new(K_VALUE.get()).expect("K_VALUE > 0"),
|
|
|
|
parallelism: ALPHA_VALUE,
|
|
|
|
disjoint_query_paths: false,
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
}
|
2018-11-29 12:11:35 +01:00
|
|
|
}
|
|
|
|
}
|
2018-03-15 15:18:21 +01:00
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// A query in a `QueryPool`.
|
|
|
|
pub struct Query<TInner> {
|
|
|
|
/// The unique ID of the query.
|
|
|
|
id: QueryId,
|
|
|
|
/// The peer iterator that drives the query state.
|
|
|
|
peer_iter: QueryPeerIter,
|
2020-05-16 10:43:09 +02:00
|
|
|
/// Execution statistics of the query.
|
|
|
|
stats: QueryStats,
|
2019-07-03 16:16:25 +02:00
|
|
|
/// The opaque inner query state.
|
|
|
|
pub inner: TInner,
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
}
|
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// The peer selection strategies that can be used by queries.
|
|
|
|
enum QueryPeerIter {
|
|
|
|
Closest(ClosestPeersIter),
|
2020-06-19 12:22:26 +02:00
|
|
|
ClosestDisjoint(ClosestDisjointPeersIter),
|
2019-07-03 16:16:25 +02:00
|
|
|
Fixed(FixedPeersIter)
|
2018-11-29 12:11:35 +01:00
|
|
|
}
|
2018-03-15 15:18:21 +01:00
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
impl<TInner> Query<TInner> {
|
|
|
|
/// Creates a new query without starting it.
|
|
|
|
fn new(id: QueryId, peer_iter: QueryPeerIter, inner: TInner) -> Self {
|
2020-05-16 10:43:09 +02:00
|
|
|
Query { id, inner, peer_iter, stats: QueryStats::empty() }
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
}
|
2018-11-29 12:11:35 +01:00
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// Gets the unique ID of the query.
|
|
|
|
pub fn id(&self) -> QueryId {
|
|
|
|
self.id
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
}
|
|
|
|
|
2020-05-16 10:43:09 +02:00
|
|
|
/// Gets the current execution statistics of the query.
|
|
|
|
pub fn stats(&self) -> &QueryStats {
|
|
|
|
&self.stats
|
|
|
|
}
|
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// Informs the query that the attempt to contact `peer` failed.
|
|
|
|
pub fn on_failure(&mut self, peer: &PeerId) {
|
2020-05-16 10:43:09 +02:00
|
|
|
let updated = match &mut self.peer_iter {
|
2019-07-03 16:16:25 +02:00
|
|
|
QueryPeerIter::Closest(iter) => iter.on_failure(peer),
|
2020-06-19 12:22:26 +02:00
|
|
|
QueryPeerIter::ClosestDisjoint(iter) => iter.on_failure(peer),
|
2019-07-03 16:16:25 +02:00
|
|
|
QueryPeerIter::Fixed(iter) => iter.on_failure(peer)
|
2020-05-16 10:43:09 +02:00
|
|
|
};
|
|
|
|
if updated {
|
|
|
|
self.stats.failure += 1;
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
}
|
2018-11-29 12:11:35 +01:00
|
|
|
}
|
2018-03-15 15:18:21 +01:00
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// Informs the query that the attempt to contact `peer` succeeded,
|
|
|
|
/// possibly resulting in new peers that should be incorporated into
|
|
|
|
/// the query, if applicable.
|
|
|
|
pub fn on_success<I>(&mut self, peer: &PeerId, new_peers: I)
|
|
|
|
where
|
|
|
|
I: IntoIterator<Item = PeerId>
|
|
|
|
{
|
2020-05-16 10:43:09 +02:00
|
|
|
let updated = match &mut self.peer_iter {
|
2019-07-03 16:16:25 +02:00
|
|
|
QueryPeerIter::Closest(iter) => iter.on_success(peer, new_peers),
|
2020-06-19 12:22:26 +02:00
|
|
|
QueryPeerIter::ClosestDisjoint(iter) => iter.on_success(peer, new_peers),
|
2019-07-03 16:16:25 +02:00
|
|
|
QueryPeerIter::Fixed(iter) => iter.on_success(peer)
|
2020-05-16 10:43:09 +02:00
|
|
|
};
|
|
|
|
if updated {
|
|
|
|
self.stats.success += 1;
|
2019-07-03 16:16:25 +02:00
|
|
|
}
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
}
|
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// Checks whether the query is currently waiting for a result from `peer`.
|
|
|
|
pub fn is_waiting(&self, peer: &PeerId) -> bool {
|
|
|
|
match &self.peer_iter {
|
|
|
|
QueryPeerIter::Closest(iter) => iter.is_waiting(peer),
|
2020-06-19 12:22:26 +02:00
|
|
|
QueryPeerIter::ClosestDisjoint(iter) => iter.is_waiting(peer),
|
2019-07-03 16:16:25 +02:00
|
|
|
QueryPeerIter::Fixed(iter) => iter.is_waiting(peer)
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
}
|
2018-11-29 12:11:35 +01:00
|
|
|
}
|
2018-03-15 15:18:21 +01:00
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// Advances the state of the underlying peer iterator.
|
2020-07-27 20:27:33 +00:00
|
|
|
fn next(&mut self, now: Instant) -> PeersIterState<'_> {
|
2020-05-16 10:43:09 +02:00
|
|
|
let state = match &mut self.peer_iter {
|
2019-07-03 16:16:25 +02:00
|
|
|
QueryPeerIter::Closest(iter) => iter.next(now),
|
2020-06-19 12:22:26 +02:00
|
|
|
QueryPeerIter::ClosestDisjoint(iter) => iter.next(now),
|
2019-07-03 16:16:25 +02:00
|
|
|
QueryPeerIter::Fixed(iter) => iter.next()
|
2020-05-16 10:43:09 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
if let PeersIterState::Waiting(Some(_)) = state {
|
|
|
|
self.stats.requests += 1;
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
}
|
2020-05-16 10:43:09 +02:00
|
|
|
|
|
|
|
state
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
}
|
|
|
|
|
2020-06-19 12:22:26 +02:00
|
|
|
/// Tries to (gracefully) finish the query prematurely, providing the peers
|
|
|
|
/// that are no longer of interest for further progress of the query.
|
|
|
|
///
|
|
|
|
/// A query may require that in order to finish gracefully a certain subset
|
|
|
|
/// of peers must be contacted. E.g. in the case of disjoint query paths a
|
|
|
|
/// query may only finish gracefully if every path contacted a peer whose
|
|
|
|
/// response permits termination of the query. The given peers are those for
|
|
|
|
/// which this is considered to be the case, i.e. for which a termination
|
|
|
|
/// condition is satisfied.
|
|
|
|
///
|
|
|
|
/// Returns `true` if the query did indeed finish, `false` otherwise. In the
|
|
|
|
/// latter case, a new attempt at finishing the query may be made with new
|
|
|
|
/// `peers`.
|
|
|
|
///
|
|
|
|
/// A finished query immediately stops yielding new peers to contact and
|
|
|
|
/// will be reported by [`QueryPool::poll`] via
|
|
|
|
/// [`QueryPoolState::Finished`].
|
|
|
|
pub fn try_finish<'a, I>(&mut self, peers: I) -> bool
|
|
|
|
where
|
|
|
|
I: IntoIterator<Item = &'a PeerId>
|
|
|
|
{
|
|
|
|
match &mut self.peer_iter {
|
|
|
|
QueryPeerIter::Closest(iter) => { iter.finish(); true },
|
|
|
|
QueryPeerIter::ClosestDisjoint(iter) => iter.finish_paths(peers),
|
|
|
|
QueryPeerIter::Fixed(iter) => { iter.finish(); true }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// Finishes the query prematurely.
|
|
|
|
///
|
|
|
|
/// A finished query immediately stops yielding new peers to contact and will be
|
|
|
|
/// reported by [`QueryPool::poll`] via [`QueryPoolState::Finished`].
|
|
|
|
pub fn finish(&mut self) {
|
|
|
|
match &mut self.peer_iter {
|
|
|
|
QueryPeerIter::Closest(iter) => iter.finish(),
|
2020-06-19 12:22:26 +02:00
|
|
|
QueryPeerIter::ClosestDisjoint(iter) => iter.finish(),
|
2019-07-03 16:16:25 +02:00
|
|
|
QueryPeerIter::Fixed(iter) => iter.finish()
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
}
|
2019-07-03 16:16:25 +02:00
|
|
|
}
|
Refactor iterative queries. (#1154)
Refactoring of iterative queries (`query.rs`) to improve both
correctness and performance (for larger DHTs):
Correctness:
1. Queries no longer terminate prematurely due to counting results
from peers farther from the target while results from closer
peers are still pending. (#1105).
2. Queries no longer ignore reported closer peers that are not duplicates
just because they are currently not among the `num_results` closest.
The currently `max_results` closest may contain peers marked as failed
or pending / waiting. Hence all reported closer peers that are not
duplicates must be considered candidates that may still end up
among the `num_results` closest that successfully responded.
3. Bounded parallelism based on the `active_counter` was not working
correctly, as new (not yet contacted) peers closer to the target
may be discovered at any time and thus appear in `closer_peers`
before the already active / pending peers.
4. The `Frozen` query mechanism allowed all remaining not-yet contacted
peers to be contacted, but their results were discarded, because
`inject_rpc_result` would only incorporate results while the
query is `Iterating`. The `Frozen` state has been reworked into
a `Stalled` state that implements a slightly more permissive
variant of the following from the paper / specs: "If a round of
FIND_NODEs fails to return a node any closer than the closest
already seen, the initiator resends the FIND_NODE to all of the
k closest nodes it has not already queried.". Importantly, though
not explicitly mentioned, the query can move back to `Iterating`
if it makes further progress again as a result of these requests.
The `Stalled` state thus allows (temporarily) higher parallelism
in an effort to make progress and bring the query to an end.
Performance:
1. Repeated distance calculations between the same peers and the
target is avoided.
2. Enabled by #1108, use of a more appropriate data structure (`BTreeMap`) for
the incrementally updated list of closer peers. The data structure needs
efficient lookups (to avoid duplicates) and insertions at any position,
both of which large(r) vectors are not that good at. Unscientific benchmarks
showed a ~40-60% improvement in somewhat pathological scenarios with at least
20 healthy nodes, each possibly returning a distinct list of closer 20 peers
to the requestor. A previous assumption may have been that the vector always
stays very small, but that is not the case in larger clusters: Even if the
lists of closer peers reported by the 20 contacted peers are heavily overlapping,
typically a lot more than 20 peers have to be (at least temporarily) considered
as closest peers until the query completes. See also issue (2) above.
New tests are added for:
* Query termination conditions.
* Bounded parallelism.
* Absence of duplicates.
2019-06-20 13:26:09 +02:00
|
|
|
|
2020-05-16 10:43:09 +02:00
|
|
|
/// Checks whether the query has finished.
|
|
|
|
///
|
|
|
|
/// A finished query is eventually reported by `QueryPool::next()` and
|
|
|
|
/// removed from the pool.
|
|
|
|
pub fn is_finished(&self) -> bool {
|
|
|
|
match &self.peer_iter {
|
|
|
|
QueryPeerIter::Closest(iter) => iter.is_finished(),
|
2020-06-19 12:22:26 +02:00
|
|
|
QueryPeerIter::ClosestDisjoint(iter) => iter.is_finished(),
|
2020-05-16 10:43:09 +02:00
|
|
|
QueryPeerIter::Fixed(iter) => iter.is_finished()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-03 16:16:25 +02:00
|
|
|
/// Consumes the query, producing the final `QueryResult`.
|
|
|
|
pub fn into_result(self) -> QueryResult<TInner, impl Iterator<Item = PeerId>> {
|
|
|
|
let peers = match self.peer_iter {
|
2020-06-19 12:22:26 +02:00
|
|
|
QueryPeerIter::Closest(iter) => Either::Left(Either::Left(iter.into_result())),
|
|
|
|
QueryPeerIter::ClosestDisjoint(iter) => Either::Left(Either::Right(iter.into_result())),
|
2019-07-03 16:16:25 +02:00
|
|
|
QueryPeerIter::Fixed(iter) => Either::Right(iter.into_result())
|
|
|
|
};
|
2020-05-16 10:43:09 +02:00
|
|
|
QueryResult { peers, inner: self.inner, stats: self.stats }
|
2018-11-29 12:11:35 +01:00
|
|
|
}
|
2018-03-15 15:18:21 +01:00
|
|
|
}
|
2019-07-03 16:16:25 +02:00
|
|
|
|
|
|
|
/// The result of a `Query`.
|
|
|
|
pub struct QueryResult<TInner, TPeers> {
|
|
|
|
/// The opaque inner query state.
|
|
|
|
pub inner: TInner,
|
|
|
|
/// The successfully contacted peers.
|
2020-05-16 10:43:09 +02:00
|
|
|
pub peers: TPeers,
|
|
|
|
/// The collected query statistics.
|
|
|
|
pub stats: QueryStats
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Execution statistics of a query.
|
|
|
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
|
|
|
pub struct QueryStats {
|
|
|
|
requests: u32,
|
|
|
|
success: u32,
|
|
|
|
failure: u32,
|
|
|
|
start: Option<Instant>,
|
|
|
|
end: Option<Instant>
|
|
|
|
}
|
|
|
|
|
|
|
|
impl QueryStats {
|
|
|
|
pub fn empty() -> Self {
|
|
|
|
QueryStats {
|
|
|
|
requests: 0,
|
|
|
|
success: 0,
|
|
|
|
failure: 0,
|
|
|
|
start: None,
|
|
|
|
end: None,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Gets the total number of requests initiated by the query.
|
|
|
|
pub fn num_requests(&self) -> u32 {
|
|
|
|
self.requests
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Gets the number of successful requests.
|
|
|
|
pub fn num_successes(&self) -> u32 {
|
|
|
|
self.success
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Gets the number of failed requests.
|
|
|
|
pub fn num_failures(&self) -> u32 {
|
|
|
|
self.failure
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Gets the number of pending requests.
|
|
|
|
///
|
|
|
|
/// > **Note**: A query can finish while still having pending
|
|
|
|
/// > requests, if the termination conditions are already met.
|
|
|
|
pub fn num_pending(&self) -> u32 {
|
|
|
|
self.requests - (self.success + self.failure)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Gets the duration of the query.
|
|
|
|
///
|
|
|
|
/// If the query has not yet finished, the duration is measured from the
|
|
|
|
/// start of the query to the current instant.
|
|
|
|
///
|
|
|
|
/// If the query did not yet start (i.e. yield the first peer to contact),
|
|
|
|
/// `None` is returned.
|
|
|
|
pub fn duration(&self) -> Option<Duration> {
|
|
|
|
if let Some(s) = self.start {
|
|
|
|
if let Some(e) = self.end {
|
|
|
|
Some(e - s)
|
|
|
|
} else {
|
|
|
|
Some(Instant::now() - s)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Merges these stats with the given stats of another query,
|
|
|
|
/// e.g. to accumulate statistics from a multi-phase query.
|
|
|
|
///
|
|
|
|
/// Counters are merged cumulatively while the instants for
|
|
|
|
/// start and end of the queries are taken as the minimum and
|
|
|
|
/// maximum, respectively.
|
|
|
|
pub fn merge(self, other: QueryStats) -> Self {
|
|
|
|
QueryStats {
|
|
|
|
requests: self.requests + other.requests,
|
|
|
|
success: self.success + other.success,
|
|
|
|
failure: self.failure + other.failure,
|
|
|
|
start: match (self.start, other.start) {
|
|
|
|
(Some(a), Some(b)) => Some(std::cmp::min(a, b)),
|
|
|
|
(a, b) => a.or(b)
|
|
|
|
},
|
|
|
|
end: std::cmp::max(self.end, other.end)
|
|
|
|
}
|
|
|
|
}
|
2019-07-03 16:16:25 +02:00
|
|
|
}
|