mirror of
https://github.com/fluencelabs/tendermint
synced 2025-06-12 12:51:22 +00:00
limit number of /subscribe clients and queries per client (#3269)
* limit number of /subscribe clients and queries per client Add the following config variables (under [rpc] section): * max_subscription_clients * max_subscriptions_per_client * timeout_broadcast_tx_commit Fixes #2826 new HTTPClient interface for subscriptions finalize HTTPClient events interface remove EventSubscriber fix data race ``` WARNING: DATA RACE Read at 0x00c000a36060 by goroutine 129: github.com/tendermint/tendermint/rpc/client.(*Local).Subscribe.func1() /go/src/github.com/tendermint/tendermint/rpc/client/localclient.go:168 +0x1f0 Previous write at 0x00c000a36060 by goroutine 132: github.com/tendermint/tendermint/rpc/client.(*Local).Subscribe() /go/src/github.com/tendermint/tendermint/rpc/client/localclient.go:191 +0x4e0 github.com/tendermint/tendermint/rpc/client.WaitForOneEvent() /go/src/github.com/tendermint/tendermint/rpc/client/helpers.go:64 +0x178 github.com/tendermint/tendermint/rpc/client_test.TestTxEventsSentWithBroadcastTxSync.func1() /go/src/github.com/tendermint/tendermint/rpc/client/event_test.go:139 +0x298 testing.tRunner() /usr/local/go/src/testing/testing.go:827 +0x162 Goroutine 129 (running) created at: github.com/tendermint/tendermint/rpc/client.(*Local).Subscribe() /go/src/github.com/tendermint/tendermint/rpc/client/localclient.go:164 +0x4b7 github.com/tendermint/tendermint/rpc/client.WaitForOneEvent() /go/src/github.com/tendermint/tendermint/rpc/client/helpers.go:64 +0x178 github.com/tendermint/tendermint/rpc/client_test.TestTxEventsSentWithBroadcastTxSync.func1() /go/src/github.com/tendermint/tendermint/rpc/client/event_test.go:139 +0x298 testing.tRunner() /usr/local/go/src/testing/testing.go:827 +0x162 Goroutine 132 (running) created at: testing.(*T).Run() /usr/local/go/src/testing/testing.go:878 +0x659 github.com/tendermint/tendermint/rpc/client_test.TestTxEventsSentWithBroadcastTxSync() /go/src/github.com/tendermint/tendermint/rpc/client/event_test.go:119 +0x186 testing.tRunner() /usr/local/go/src/testing/testing.go:827 +0x162 ================== ``` lite client works (tested manually) godoc comments httpclient: do not close the out channel use TimeoutBroadcastTxCommit no timeout for unsubscribe but 1s Local (5s HTTP) timeout for resubscribe format code change Subscribe#out cap to 1 and replace config vars with RPCConfig TimeoutBroadcastTxCommit can't be greater than rpcserver.WriteTimeout rpc: Context as first parameter to all functions reformat code fixes after my own review fixes after Ethan's review add test stubs fix config.toml * fixes after manual testing - rpc: do not recommend to use BroadcastTxCommit because it's slow and wastes Tendermint resources (pubsub) - rpc: better error in Subscribe and BroadcastTxCommit - HTTPClient: do not resubscribe if err = ErrAlreadySubscribed * fixes after Ismail's review * Update rpc/grpc/grpc_test.go Co-Authored-By: melekes <anton.kalyaev@gmail.com>
This commit is contained in:
@ -2,11 +2,14 @@ package client
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
amino "github.com/tendermint/go-amino"
|
||||
|
||||
cmn "github.com/tendermint/tendermint/libs/common"
|
||||
tmpubsub "github.com/tendermint/tendermint/libs/pubsub"
|
||||
ctypes "github.com/tendermint/tendermint/rpc/core/types"
|
||||
@ -15,13 +18,18 @@ import (
|
||||
)
|
||||
|
||||
/*
|
||||
HTTP is a Client implementation that communicates
|
||||
with a tendermint node over json rpc and websockets.
|
||||
HTTP is a Client implementation that communicates with a tendermint node over
|
||||
json rpc and websockets.
|
||||
|
||||
This is the main implementation you probably want to use in
|
||||
production code. There are other implementations when calling
|
||||
the tendermint node in-process (local), or when you want to mock
|
||||
out the server for test code (mock).
|
||||
This is the main implementation you probably want to use in production code.
|
||||
There are other implementations when calling the tendermint node in-process
|
||||
(Local), or when you want to mock out the server for test code (mock).
|
||||
|
||||
You can subscribe for any event published by Tendermint using Subscribe method.
|
||||
Note delivery is best-effort. If you don't read events fast enough or network
|
||||
is slow, Tendermint might cancel the subscription. The client will attempt to
|
||||
resubscribe (you don't need to do anything). It will keep trying every second
|
||||
indefinitely until successful.
|
||||
*/
|
||||
type HTTP struct {
|
||||
remote string
|
||||
@ -249,28 +257,6 @@ func (c *HTTP) Validators(height *int64) (*ctypes.ResultValidators, error) {
|
||||
|
||||
/** websocket event stuff here... **/
|
||||
|
||||
type subscription struct {
|
||||
out chan tmpubsub.Message
|
||||
cancelled chan struct{}
|
||||
|
||||
mtx sync.RWMutex
|
||||
err error
|
||||
}
|
||||
|
||||
func (s *subscription) Out() <-chan tmpubsub.Message {
|
||||
return s.out
|
||||
}
|
||||
|
||||
func (s *subscription) Cancelled() <-chan struct{} {
|
||||
return s.cancelled
|
||||
}
|
||||
|
||||
func (s *subscription) Err() error {
|
||||
s.mtx.RLock()
|
||||
defer s.mtx.RUnlock()
|
||||
return s.err
|
||||
}
|
||||
|
||||
type WSEvents struct {
|
||||
cmn.BaseService
|
||||
cdc *amino.Codec
|
||||
@ -279,8 +265,8 @@ type WSEvents struct {
|
||||
ws *rpcclient.WSClient
|
||||
|
||||
mtx sync.RWMutex
|
||||
// query -> subscription
|
||||
subscriptions map[string]*subscription
|
||||
// query -> chan
|
||||
subscriptions map[string]chan ctypes.ResultEvent
|
||||
}
|
||||
|
||||
func newWSEvents(cdc *amino.Codec, remote, endpoint string) *WSEvents {
|
||||
@ -288,16 +274,18 @@ func newWSEvents(cdc *amino.Codec, remote, endpoint string) *WSEvents {
|
||||
cdc: cdc,
|
||||
endpoint: endpoint,
|
||||
remote: remote,
|
||||
subscriptions: make(map[string]*subscription),
|
||||
subscriptions: make(map[string]chan ctypes.ResultEvent),
|
||||
}
|
||||
|
||||
wsEvents.BaseService = *cmn.NewBaseService(nil, "WSEvents", wsEvents)
|
||||
return wsEvents
|
||||
}
|
||||
|
||||
// OnStart implements cmn.Service by starting WSClient and event loop.
|
||||
func (w *WSEvents) OnStart() error {
|
||||
w.ws = rpcclient.NewWSClient(w.remote, w.endpoint, rpcclient.OnReconnect(func() {
|
||||
w.redoSubscriptions()
|
||||
// resubscribe immediately
|
||||
w.redoSubscriptionsAfter(0 * time.Second)
|
||||
}))
|
||||
w.ws.SetCodec(w.cdc)
|
||||
|
||||
@ -310,75 +298,63 @@ func (w *WSEvents) OnStart() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop wraps the BaseService/eventSwitch actions as Start does
|
||||
// OnStop implements cmn.Service by stopping WSClient.
|
||||
func (w *WSEvents) OnStop() {
|
||||
err := w.ws.Stop()
|
||||
if err != nil {
|
||||
w.Logger.Error("failed to stop WSClient", "err", err)
|
||||
}
|
||||
_ = w.ws.Stop()
|
||||
}
|
||||
|
||||
func (w *WSEvents) Subscribe(ctx context.Context, subscriber string, query tmpubsub.Query, outCapacity ...int) (types.Subscription, error) {
|
||||
q := query.String()
|
||||
// Subscribe implements EventsClient by using WSClient to subscribe given
|
||||
// subscriber to query. By default, returns a channel with cap=1. Error is
|
||||
// returned if it fails to subscribe.
|
||||
// Channel is never closed to prevent clients from seeing an erroneus event.
|
||||
func (w *WSEvents) Subscribe(ctx context.Context, subscriber, query string,
|
||||
outCapacity ...int) (out <-chan ctypes.ResultEvent, err error) {
|
||||
|
||||
err := w.ws.Subscribe(ctx, q)
|
||||
if err != nil {
|
||||
if err := w.ws.Subscribe(ctx, query); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
outCap := 1
|
||||
if len(outCapacity) > 0 && outCapacity[0] >= 0 {
|
||||
if len(outCapacity) > 0 {
|
||||
outCap = outCapacity[0]
|
||||
}
|
||||
|
||||
outc := make(chan ctypes.ResultEvent, outCap)
|
||||
w.mtx.Lock()
|
||||
// subscriber param is ignored because Tendermint will override it with
|
||||
// remote IP anyway.
|
||||
w.subscriptions[q] = &subscription{
|
||||
out: make(chan tmpubsub.Message, outCap),
|
||||
cancelled: make(chan struct{}),
|
||||
}
|
||||
w.subscriptions[query] = outc
|
||||
w.mtx.Unlock()
|
||||
|
||||
return w.subscriptions[q], nil
|
||||
return outc, nil
|
||||
}
|
||||
|
||||
func (w *WSEvents) Unsubscribe(ctx context.Context, subscriber string, query tmpubsub.Query) error {
|
||||
q := query.String()
|
||||
|
||||
err := w.ws.Unsubscribe(ctx, q)
|
||||
if err != nil {
|
||||
// Unsubscribe implements EventsClient by using WSClient to unsubscribe given
|
||||
// subscriber from query.
|
||||
func (w *WSEvents) Unsubscribe(ctx context.Context, subscriber, query string) error {
|
||||
if err := w.ws.Unsubscribe(ctx, query); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
w.mtx.Lock()
|
||||
sub, ok := w.subscriptions[q]
|
||||
_, ok := w.subscriptions[query]
|
||||
if ok {
|
||||
close(sub.cancelled)
|
||||
sub.mtx.Lock()
|
||||
sub.err = errors.New("unsubscribed")
|
||||
sub.mtx.Unlock()
|
||||
delete(w.subscriptions, q)
|
||||
delete(w.subscriptions, query)
|
||||
}
|
||||
w.mtx.Unlock()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnsubscribeAll implements EventsClient by using WSClient to unsubscribe
|
||||
// given subscriber from all the queries.
|
||||
func (w *WSEvents) UnsubscribeAll(ctx context.Context, subscriber string) error {
|
||||
err := w.ws.UnsubscribeAll(ctx)
|
||||
if err != nil {
|
||||
if err := w.ws.UnsubscribeAll(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
w.mtx.Lock()
|
||||
for _, sub := range w.subscriptions {
|
||||
close(sub.cancelled)
|
||||
sub.mtx.Lock()
|
||||
sub.err = errors.New("unsubscribed")
|
||||
sub.mtx.Unlock()
|
||||
}
|
||||
w.subscriptions = make(map[string]*subscription)
|
||||
w.subscriptions = make(map[string]chan ctypes.ResultEvent)
|
||||
w.mtx.Unlock()
|
||||
|
||||
return nil
|
||||
@ -386,18 +362,21 @@ func (w *WSEvents) UnsubscribeAll(ctx context.Context, subscriber string) error
|
||||
|
||||
// After being reconnected, it is necessary to redo subscription to server
|
||||
// otherwise no data will be automatically received.
|
||||
func (w *WSEvents) redoSubscriptions() {
|
||||
func (w *WSEvents) redoSubscriptionsAfter(d time.Duration) {
|
||||
time.Sleep(d)
|
||||
|
||||
for q := range w.subscriptions {
|
||||
// NOTE: no timeout for resubscribing
|
||||
// FIXME: better logging/handling of errors??
|
||||
w.ws.Subscribe(context.Background(), q)
|
||||
err := w.ws.Subscribe(context.Background(), q)
|
||||
if err != nil {
|
||||
w.Logger.Error("Failed to resubscribe", "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// eventListener is an infinite loop pulling all websocket events
|
||||
// and pushing them to the EventSwitch.
|
||||
//
|
||||
// the goroutine only stops by closing quit
|
||||
func isErrAlreadySubscribed(err error) bool {
|
||||
return strings.Contains(err.Error(), tmpubsub.ErrAlreadySubscribed.Error())
|
||||
}
|
||||
|
||||
func (w *WSEvents) eventListener() {
|
||||
for {
|
||||
select {
|
||||
@ -405,21 +384,39 @@ func (w *WSEvents) eventListener() {
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
|
||||
if resp.Error != nil {
|
||||
w.Logger.Error("WS error", "err", resp.Error.Error())
|
||||
// Error can be ErrAlreadySubscribed or max client (subscriptions per
|
||||
// client) reached or Tendermint exited.
|
||||
// We can ignore ErrAlreadySubscribed, but need to retry in other
|
||||
// cases.
|
||||
if !isErrAlreadySubscribed(resp.Error) {
|
||||
// Resubscribe after 1 second to give Tendermint time to restart (if
|
||||
// crashed).
|
||||
w.redoSubscriptionsAfter(1 * time.Second)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
result := new(ctypes.ResultEvent)
|
||||
err := w.cdc.UnmarshalJSON(resp.Result, result)
|
||||
if err != nil {
|
||||
w.Logger.Error("failed to unmarshal response", "err", err)
|
||||
continue
|
||||
}
|
||||
// NOTE: writing also happens inside mutex so we can't close a channel in
|
||||
// Unsubscribe/UnsubscribeAll.
|
||||
|
||||
w.mtx.RLock()
|
||||
if sub, ok := w.subscriptions[result.Query]; ok {
|
||||
sub.out <- tmpubsub.NewMessage(result.Data, result.Tags)
|
||||
if out, ok := w.subscriptions[result.Query]; ok {
|
||||
if cap(out) == 0 {
|
||||
out <- *result
|
||||
} else {
|
||||
select {
|
||||
case out <- *result:
|
||||
default:
|
||||
w.Logger.Error("wanted to publish ResultEvent, but out channel is full", "result", result, "query", result.Query)
|
||||
}
|
||||
}
|
||||
}
|
||||
w.mtx.RUnlock()
|
||||
case <-w.Quit():
|
||||
|
Reference in New Issue
Block a user