Fix races with DialQueue variables

This commit is contained in:
Matt Joiner 2019-01-31 13:08:45 +11:00
parent 0998477db1
commit 7246a3b0f4
3 changed files with 24 additions and 34 deletions

View File

@ -9,7 +9,7 @@ import (
queue "github.com/libp2p/go-libp2p-peerstore/queue" queue "github.com/libp2p/go-libp2p-peerstore/queue"
) )
var ( const (
// DialQueueMinParallelism is the minimum number of worker dial goroutines that will be alive at any time. // DialQueueMinParallelism is the minimum number of worker dial goroutines that will be alive at any time.
DialQueueMinParallelism = 6 DialQueueMinParallelism = 6
// DialQueueMaxParallelism is the maximum number of worker dial goroutines that can be alive at any time. // DialQueueMaxParallelism is the maximum number of worker dial goroutines that can be alive at any time.
@ -27,6 +27,8 @@ type dialQueue struct {
nWorkers int nWorkers int
scalingFactor float64 scalingFactor float64
scalingMutePeriod time.Duration
maxIdle time.Duration
in *queue.ChanQueue in *queue.ChanQueue
out *queue.ChanQueue out *queue.ChanQueue
@ -61,12 +63,16 @@ type waitingCh struct {
// Dialler throttling (e.g. FD limit exceeded) is a concern, as we can easily spin up more workers to compensate, and // Dialler throttling (e.g. FD limit exceeded) is a concern, as we can easily spin up more workers to compensate, and
// end up adding fuel to the fire. Since we have no deterministic way to detect this for now, we hard-limit concurrency // end up adding fuel to the fire. Since we have no deterministic way to detect this for now, we hard-limit concurrency
// to DialQueueMaxParallelism. // to DialQueueMaxParallelism.
func newDialQueue(ctx context.Context, target string, in *queue.ChanQueue, dialFn func(context.Context, peer.ID) error) *dialQueue { func newDialQueue(ctx context.Context, target string, in *queue.ChanQueue, dialFn func(context.Context, peer.ID) error,
maxIdle, scalingMutePeriod time.Duration,
) *dialQueue {
sq := &dialQueue{ sq := &dialQueue{
ctx: ctx, ctx: ctx,
dialFn: dialFn, dialFn: dialFn,
nWorkers: DialQueueMinParallelism, nWorkers: DialQueueMinParallelism,
scalingFactor: 1.5, scalingFactor: 1.5,
scalingMutePeriod: scalingMutePeriod,
maxIdle: maxIdle,
in: in, in: in,
out: queue.NewChanQueue(ctx, queue.NewXORDistancePQ(target)), out: queue.NewChanQueue(ctx, queue.NewXORDistancePQ(target)),
@ -145,13 +151,13 @@ func (dq *dialQueue) control() {
dialled = nil dialled = nil
} }
case <-dq.growCh: case <-dq.growCh:
if time.Since(lastScalingEvt) < DialQueueScalingMutePeriod { if time.Since(lastScalingEvt) < dq.scalingMutePeriod {
continue continue
} }
dq.grow() dq.grow()
lastScalingEvt = time.Now() lastScalingEvt = time.Now()
case <-dq.shrinkCh: case <-dq.shrinkCh:
if time.Since(lastScalingEvt) < DialQueueScalingMutePeriod { if time.Since(lastScalingEvt) < dq.scalingMutePeriod {
continue continue
} }
dq.shrink() dq.shrink()
@ -259,7 +265,7 @@ func (dq *dialQueue) worker() {
case <-idleTimer.C: case <-idleTimer.C:
default: default:
} }
idleTimer.Reset(DialQueueMaxIdle) idleTimer.Reset(dq.maxIdle)
select { select {
case <-dq.dieCh: case <-dq.dieCh:

View File

@ -2,22 +2,16 @@ package dht
import ( import (
"context" "context"
"fmt"
"sync" "sync"
"sync/atomic" "sync/atomic"
"testing" "testing"
"time" "time"
peer "github.com/libp2p/go-libp2p-peer" "github.com/libp2p/go-libp2p-peer"
queue "github.com/libp2p/go-libp2p-peerstore/queue" "github.com/libp2p/go-libp2p-peerstore/queue"
) )
func init() {
DialQueueScalingMutePeriod = 0
}
func TestDialQueueGrowsOnSlowDials(t *testing.T) { func TestDialQueueGrowsOnSlowDials(t *testing.T) {
DialQueueMaxIdle = 10 * time.Minute
in := queue.NewChanQueue(context.Background(), queue.NewXORDistancePQ("test")) in := queue.NewChanQueue(context.Background(), queue.NewXORDistancePQ("test"))
hang := make(chan struct{}) hang := make(chan struct{})
@ -35,7 +29,7 @@ func TestDialQueueGrowsOnSlowDials(t *testing.T) {
} }
// remove the mute period to grow faster. // remove the mute period to grow faster.
dq := newDialQueue(context.Background(), "test", in, dialFn) dq := newDialQueue(context.Background(), "test", in, dialFn, 10*time.Minute, 0)
for i := 0; i < 4; i++ { for i := 0; i < 4; i++ {
_ = dq.Consume() _ = dq.Consume()
@ -55,7 +49,6 @@ func TestDialQueueGrowsOnSlowDials(t *testing.T) {
func TestDialQueueShrinksWithNoConsumers(t *testing.T) { func TestDialQueueShrinksWithNoConsumers(t *testing.T) {
// reduce interference from the other shrink path. // reduce interference from the other shrink path.
DialQueueMaxIdle = 10 * time.Minute
in := queue.NewChanQueue(context.Background(), queue.NewXORDistancePQ("test")) in := queue.NewChanQueue(context.Background(), queue.NewXORDistancePQ("test"))
hang := make(chan struct{}) hang := make(chan struct{})
@ -68,12 +61,7 @@ func TestDialQueueShrinksWithNoConsumers(t *testing.T) {
return nil return nil
} }
dq := newDialQueue(context.Background(), "test", in, dialFn) dq := newDialQueue(context.Background(), "test", in, dialFn, 10*time.Minute, 0)
defer func() {
recover()
fmt.Println(dq.nWorkers)
}()
// acquire 3 consumers, everytime we acquire a consumer, we will grow the pool because no dial job is completed // acquire 3 consumers, everytime we acquire a consumer, we will grow the pool because no dial job is completed
// and immediately returnable. // and immediately returnable.
@ -117,8 +105,6 @@ func TestDialQueueShrinksWithNoConsumers(t *testing.T) {
// Inactivity = workers are idle because the DHT query is progressing slow and is producing too few peers to dial. // Inactivity = workers are idle because the DHT query is progressing slow and is producing too few peers to dial.
func TestDialQueueShrinksWithWhenIdle(t *testing.T) { func TestDialQueueShrinksWithWhenIdle(t *testing.T) {
DialQueueMaxIdle = 1 * time.Second
in := queue.NewChanQueue(context.Background(), queue.NewXORDistancePQ("test")) in := queue.NewChanQueue(context.Background(), queue.NewXORDistancePQ("test"))
hang := make(chan struct{}) hang := make(chan struct{})
@ -135,7 +121,7 @@ func TestDialQueueShrinksWithWhenIdle(t *testing.T) {
in.EnqChan <- peer.ID(i) in.EnqChan <- peer.ID(i)
} }
dq := newDialQueue(context.Background(), "test", in, dialFn) dq := newDialQueue(context.Background(), "test", in, dialFn, time.Second, 0)
// keep up to speed with backlog by releasing the dial function every time we acquire a channel. // keep up to speed with backlog by releasing the dial function every time we acquire a channel.
for i := 0; i < 13; i++ { for i := 0; i < 13; i++ {
@ -161,8 +147,6 @@ func TestDialQueueShrinksWithWhenIdle(t *testing.T) {
} }
func TestDialQueueMutePeriodHonored(t *testing.T) { func TestDialQueueMutePeriodHonored(t *testing.T) {
DialQueueScalingMutePeriod = 2 * time.Second
in := queue.NewChanQueue(context.Background(), queue.NewXORDistancePQ("test")) in := queue.NewChanQueue(context.Background(), queue.NewXORDistancePQ("test"))
hang := make(chan struct{}) hang := make(chan struct{})
var wg sync.WaitGroup var wg sync.WaitGroup
@ -178,7 +162,7 @@ func TestDialQueueMutePeriodHonored(t *testing.T) {
in.EnqChan <- peer.ID(i) in.EnqChan <- peer.ID(i)
} }
dq := newDialQueue(context.Background(), "test", in, dialFn) dq := newDialQueue(context.Background(), "test", in, dialFn, DialQueueMaxIdle, 2*time.Second)
// pick up three consumers. // pick up three consumers.
for i := 0; i < 3; i++ { for i := 0; i < 3; i++ {

View File

@ -103,7 +103,7 @@ func newQueryRunner(q *dhtQuery) *dhtQueryRunner {
peersToQuery: peersToQuery, peersToQuery: peersToQuery,
proc: proc, proc: proc,
} }
r.peersDialed = newDialQueue(ctx, q.key, peersToQuery, r.dialPeer) r.peersDialed = newDialQueue(ctx, q.key, peersToQuery, r.dialPeer, DialQueueMaxIdle, DialQueueScalingMutePeriod)
return r return r
} }