limit number of /subscribe clients and queries per client (#3269)

* limit number of /subscribe clients and queries per client

Add the following config variables (under [rpc] section):
  * max_subscription_clients
  * max_subscriptions_per_client
  * timeout_broadcast_tx_commit

Fixes #2826

new HTTPClient interface for subscriptions

finalize HTTPClient events interface

remove EventSubscriber

fix data race

```
WARNING: DATA RACE
Read at 0x00c000a36060 by goroutine 129:
  github.com/tendermint/tendermint/rpc/client.(*Local).Subscribe.func1()
      /go/src/github.com/tendermint/tendermint/rpc/client/localclient.go:168 +0x1f0

Previous write at 0x00c000a36060 by goroutine 132:
  github.com/tendermint/tendermint/rpc/client.(*Local).Subscribe()
      /go/src/github.com/tendermint/tendermint/rpc/client/localclient.go:191 +0x4e0
  github.com/tendermint/tendermint/rpc/client.WaitForOneEvent()
      /go/src/github.com/tendermint/tendermint/rpc/client/helpers.go:64 +0x178
  github.com/tendermint/tendermint/rpc/client_test.TestTxEventsSentWithBroadcastTxSync.func1()
      /go/src/github.com/tendermint/tendermint/rpc/client/event_test.go:139 +0x298
  testing.tRunner()
      /usr/local/go/src/testing/testing.go:827 +0x162

Goroutine 129 (running) created at:
  github.com/tendermint/tendermint/rpc/client.(*Local).Subscribe()
      /go/src/github.com/tendermint/tendermint/rpc/client/localclient.go:164 +0x4b7
  github.com/tendermint/tendermint/rpc/client.WaitForOneEvent()
      /go/src/github.com/tendermint/tendermint/rpc/client/helpers.go:64 +0x178
  github.com/tendermint/tendermint/rpc/client_test.TestTxEventsSentWithBroadcastTxSync.func1()
      /go/src/github.com/tendermint/tendermint/rpc/client/event_test.go:139 +0x298
  testing.tRunner()
      /usr/local/go/src/testing/testing.go:827 +0x162

Goroutine 132 (running) created at:
  testing.(*T).Run()
      /usr/local/go/src/testing/testing.go:878 +0x659
  github.com/tendermint/tendermint/rpc/client_test.TestTxEventsSentWithBroadcastTxSync()
      /go/src/github.com/tendermint/tendermint/rpc/client/event_test.go:119 +0x186
  testing.tRunner()
      /usr/local/go/src/testing/testing.go:827 +0x162
==================
```

lite client works (tested manually)

godoc comments

httpclient: do not close the out channel

use TimeoutBroadcastTxCommit

no timeout for unsubscribe

but 1s Local (5s HTTP) timeout for resubscribe

format code

change Subscribe#out cap to 1

and replace config vars with RPCConfig

TimeoutBroadcastTxCommit can't be greater than rpcserver.WriteTimeout

rpc: Context as first parameter to all functions

reformat code

fixes after my own review

fixes after Ethan's review

add test stubs

fix config.toml

* fixes after manual testing

- rpc: do not recommend to use BroadcastTxCommit because it's slow and wastes
Tendermint resources (pubsub)
- rpc: better error in Subscribe and BroadcastTxCommit
- HTTPClient: do not resubscribe if err = ErrAlreadySubscribed

* fixes after Ismail's review

* Update rpc/grpc/grpc_test.go

Co-Authored-By: melekes <anton.kalyaev@gmail.com>
This commit is contained in:
Anton Kaliaev
2019-03-11 22:45:58 +04:00
committed by GitHub
parent 15f621141d
commit d741c7b478
36 changed files with 657 additions and 350 deletions

View File

@ -2,11 +2,14 @@ package client
import (
"context"
"strings"
"sync"
"time"
"github.com/pkg/errors"
amino "github.com/tendermint/go-amino"
cmn "github.com/tendermint/tendermint/libs/common"
tmpubsub "github.com/tendermint/tendermint/libs/pubsub"
ctypes "github.com/tendermint/tendermint/rpc/core/types"
@ -15,13 +18,18 @@ import (
)
/*
HTTP is a Client implementation that communicates
with a tendermint node over json rpc and websockets.
HTTP is a Client implementation that communicates with a tendermint node over
json rpc and websockets.
This is the main implementation you probably want to use in
production code. There are other implementations when calling
the tendermint node in-process (local), or when you want to mock
out the server for test code (mock).
This is the main implementation you probably want to use in production code.
There are other implementations when calling the tendermint node in-process
(Local), or when you want to mock out the server for test code (mock).
You can subscribe for any event published by Tendermint using Subscribe method.
Note delivery is best-effort. If you don't read events fast enough or network
is slow, Tendermint might cancel the subscription. The client will attempt to
resubscribe (you don't need to do anything). It will keep trying every second
indefinitely until successful.
*/
type HTTP struct {
remote string
@ -249,28 +257,6 @@ func (c *HTTP) Validators(height *int64) (*ctypes.ResultValidators, error) {
/** websocket event stuff here... **/
type subscription struct {
out chan tmpubsub.Message
cancelled chan struct{}
mtx sync.RWMutex
err error
}
func (s *subscription) Out() <-chan tmpubsub.Message {
return s.out
}
func (s *subscription) Cancelled() <-chan struct{} {
return s.cancelled
}
func (s *subscription) Err() error {
s.mtx.RLock()
defer s.mtx.RUnlock()
return s.err
}
type WSEvents struct {
cmn.BaseService
cdc *amino.Codec
@ -279,8 +265,8 @@ type WSEvents struct {
ws *rpcclient.WSClient
mtx sync.RWMutex
// query -> subscription
subscriptions map[string]*subscription
// query -> chan
subscriptions map[string]chan ctypes.ResultEvent
}
func newWSEvents(cdc *amino.Codec, remote, endpoint string) *WSEvents {
@ -288,16 +274,18 @@ func newWSEvents(cdc *amino.Codec, remote, endpoint string) *WSEvents {
cdc: cdc,
endpoint: endpoint,
remote: remote,
subscriptions: make(map[string]*subscription),
subscriptions: make(map[string]chan ctypes.ResultEvent),
}
wsEvents.BaseService = *cmn.NewBaseService(nil, "WSEvents", wsEvents)
return wsEvents
}
// OnStart implements cmn.Service by starting WSClient and event loop.
func (w *WSEvents) OnStart() error {
w.ws = rpcclient.NewWSClient(w.remote, w.endpoint, rpcclient.OnReconnect(func() {
w.redoSubscriptions()
// resubscribe immediately
w.redoSubscriptionsAfter(0 * time.Second)
}))
w.ws.SetCodec(w.cdc)
@ -310,75 +298,63 @@ func (w *WSEvents) OnStart() error {
return nil
}
// Stop wraps the BaseService/eventSwitch actions as Start does
// OnStop implements cmn.Service by stopping WSClient.
func (w *WSEvents) OnStop() {
err := w.ws.Stop()
if err != nil {
w.Logger.Error("failed to stop WSClient", "err", err)
}
_ = w.ws.Stop()
}
func (w *WSEvents) Subscribe(ctx context.Context, subscriber string, query tmpubsub.Query, outCapacity ...int) (types.Subscription, error) {
q := query.String()
// Subscribe implements EventsClient by using WSClient to subscribe given
// subscriber to query. By default, returns a channel with cap=1. Error is
// returned if it fails to subscribe.
// Channel is never closed to prevent clients from seeing an erroneus event.
func (w *WSEvents) Subscribe(ctx context.Context, subscriber, query string,
outCapacity ...int) (out <-chan ctypes.ResultEvent, err error) {
err := w.ws.Subscribe(ctx, q)
if err != nil {
if err := w.ws.Subscribe(ctx, query); err != nil {
return nil, err
}
outCap := 1
if len(outCapacity) > 0 && outCapacity[0] >= 0 {
if len(outCapacity) > 0 {
outCap = outCapacity[0]
}
outc := make(chan ctypes.ResultEvent, outCap)
w.mtx.Lock()
// subscriber param is ignored because Tendermint will override it with
// remote IP anyway.
w.subscriptions[q] = &subscription{
out: make(chan tmpubsub.Message, outCap),
cancelled: make(chan struct{}),
}
w.subscriptions[query] = outc
w.mtx.Unlock()
return w.subscriptions[q], nil
return outc, nil
}
func (w *WSEvents) Unsubscribe(ctx context.Context, subscriber string, query tmpubsub.Query) error {
q := query.String()
err := w.ws.Unsubscribe(ctx, q)
if err != nil {
// Unsubscribe implements EventsClient by using WSClient to unsubscribe given
// subscriber from query.
func (w *WSEvents) Unsubscribe(ctx context.Context, subscriber, query string) error {
if err := w.ws.Unsubscribe(ctx, query); err != nil {
return err
}
w.mtx.Lock()
sub, ok := w.subscriptions[q]
_, ok := w.subscriptions[query]
if ok {
close(sub.cancelled)
sub.mtx.Lock()
sub.err = errors.New("unsubscribed")
sub.mtx.Unlock()
delete(w.subscriptions, q)
delete(w.subscriptions, query)
}
w.mtx.Unlock()
return nil
}
// UnsubscribeAll implements EventsClient by using WSClient to unsubscribe
// given subscriber from all the queries.
func (w *WSEvents) UnsubscribeAll(ctx context.Context, subscriber string) error {
err := w.ws.UnsubscribeAll(ctx)
if err != nil {
if err := w.ws.UnsubscribeAll(ctx); err != nil {
return err
}
w.mtx.Lock()
for _, sub := range w.subscriptions {
close(sub.cancelled)
sub.mtx.Lock()
sub.err = errors.New("unsubscribed")
sub.mtx.Unlock()
}
w.subscriptions = make(map[string]*subscription)
w.subscriptions = make(map[string]chan ctypes.ResultEvent)
w.mtx.Unlock()
return nil
@ -386,18 +362,21 @@ func (w *WSEvents) UnsubscribeAll(ctx context.Context, subscriber string) error
// After being reconnected, it is necessary to redo subscription to server
// otherwise no data will be automatically received.
func (w *WSEvents) redoSubscriptions() {
func (w *WSEvents) redoSubscriptionsAfter(d time.Duration) {
time.Sleep(d)
for q := range w.subscriptions {
// NOTE: no timeout for resubscribing
// FIXME: better logging/handling of errors??
w.ws.Subscribe(context.Background(), q)
err := w.ws.Subscribe(context.Background(), q)
if err != nil {
w.Logger.Error("Failed to resubscribe", "err", err)
}
}
}
// eventListener is an infinite loop pulling all websocket events
// and pushing them to the EventSwitch.
//
// the goroutine only stops by closing quit
func isErrAlreadySubscribed(err error) bool {
return strings.Contains(err.Error(), tmpubsub.ErrAlreadySubscribed.Error())
}
func (w *WSEvents) eventListener() {
for {
select {
@ -405,21 +384,39 @@ func (w *WSEvents) eventListener() {
if !ok {
return
}
if resp.Error != nil {
w.Logger.Error("WS error", "err", resp.Error.Error())
// Error can be ErrAlreadySubscribed or max client (subscriptions per
// client) reached or Tendermint exited.
// We can ignore ErrAlreadySubscribed, but need to retry in other
// cases.
if !isErrAlreadySubscribed(resp.Error) {
// Resubscribe after 1 second to give Tendermint time to restart (if
// crashed).
w.redoSubscriptionsAfter(1 * time.Second)
}
continue
}
result := new(ctypes.ResultEvent)
err := w.cdc.UnmarshalJSON(resp.Result, result)
if err != nil {
w.Logger.Error("failed to unmarshal response", "err", err)
continue
}
// NOTE: writing also happens inside mutex so we can't close a channel in
// Unsubscribe/UnsubscribeAll.
w.mtx.RLock()
if sub, ok := w.subscriptions[result.Query]; ok {
sub.out <- tmpubsub.NewMessage(result.Data, result.Tags)
if out, ok := w.subscriptions[result.Query]; ok {
if cap(out) == 0 {
out <- *result
} else {
select {
case out <- *result:
default:
w.Logger.Error("wanted to publish ResultEvent, but out channel is full", "result", result, "query", result.Query)
}
}
}
w.mtx.RUnlock()
case <-w.Quit():