Merge pull request #1737 from tendermint/986-monitoring

metrics
This commit is contained in:
Alexander Simmerl
2018-06-20 20:43:20 +02:00
committed by GitHub
18 changed files with 696 additions and 37 deletions

View File

@ -1,5 +1,10 @@
# Changelog
## TBD
FEATURES:
- [node] added metrics (served under /metrics using a Prometheus client; disabled by default)
## 0.20.1
BUG FIXES:

56
Gopkg.lock generated
View File

@ -1,6 +1,12 @@
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
[[projects]]
branch = "master"
name = "github.com/beorn7/perks"
packages = ["quantile"]
revision = "3a771d992973f24aa725d07868b467d1ddfceafb"
[[projects]]
branch = "master"
name = "github.com/btcsuite/btcd"
@ -36,7 +42,11 @@
packages = [
"log",
"log/level",
"log/term"
"log/term",
"metrics",
"metrics/discard",
"metrics/internal/lv",
"metrics/prometheus"
]
revision = "4dc7be5d2d12881735283bcab7352178e190fc71"
version = "v0.6.0"
@ -131,6 +141,12 @@
revision = "c2353362d570a7bfa228149c62842019201cfb71"
version = "v1.8.0"
[[projects]]
name = "github.com/matttproud/golang_protobuf_extensions"
packages = ["pbutil"]
revision = "c12348ce28de40eed0136aa2b644d0ee0650e56c"
version = "v1.0.1"
[[projects]]
branch = "master"
name = "github.com/mitchellh/mapstructure"
@ -155,6 +171,42 @@
revision = "792786c7400a136282c1664665ae0a8db921c6c2"
version = "v1.0.0"
[[projects]]
name = "github.com/prometheus/client_golang"
packages = [
"prometheus",
"prometheus/promhttp"
]
revision = "c5b7fccd204277076155f10851dad72b76a49317"
version = "v0.8.0"
[[projects]]
branch = "master"
name = "github.com/prometheus/client_model"
packages = ["go"]
revision = "99fa1f4be8e564e8a6b613da7fa6f46c9edafc6c"
[[projects]]
branch = "master"
name = "github.com/prometheus/common"
packages = [
"expfmt",
"internal/bitbucket.org/ww/goautoneg",
"model"
]
revision = "7600349dcfe1abd18d72d3a1770870d9800a7801"
[[projects]]
branch = "master"
name = "github.com/prometheus/procfs"
packages = [
".",
"internal/util",
"nfs",
"xfs"
]
revision = "94663424ae5ae9856b40a9f170762b4197024661"
[[projects]]
branch = "master"
name = "github.com/rcrowley/go-metrics"
@ -374,6 +426,6 @@
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
inputs-digest = "62d0626fa963b25411234da915fb3b8bfc7aefffd76824d27ac1647ca2b5d489"
inputs-digest = "3bd388e520a08cd0aa14df2d6f5ecb46449d7c36fd80cf52eb775798e6accbaa"
solver-name = "gps-cdcl"
solver-version = 1

View File

@ -97,3 +97,7 @@
[prune]
go-tests = true
unused-packages = true
[[constraint]]
name = "github.com/prometheus/client_golang"
version = "0.8.0"

View File

@ -45,34 +45,37 @@ type Config struct {
BaseConfig `mapstructure:",squash"`
// Options for services
RPC *RPCConfig `mapstructure:"rpc"`
P2P *P2PConfig `mapstructure:"p2p"`
Mempool *MempoolConfig `mapstructure:"mempool"`
Consensus *ConsensusConfig `mapstructure:"consensus"`
TxIndex *TxIndexConfig `mapstructure:"tx_index"`
RPC *RPCConfig `mapstructure:"rpc"`
P2P *P2PConfig `mapstructure:"p2p"`
Mempool *MempoolConfig `mapstructure:"mempool"`
Consensus *ConsensusConfig `mapstructure:"consensus"`
TxIndex *TxIndexConfig `mapstructure:"tx_index"`
Instrumentation *InstrumentationConfig `mapstructure:"instrumentation"`
}
// DefaultConfig returns a default configuration for a Tendermint node
func DefaultConfig() *Config {
return &Config{
BaseConfig: DefaultBaseConfig(),
RPC: DefaultRPCConfig(),
P2P: DefaultP2PConfig(),
Mempool: DefaultMempoolConfig(),
Consensus: DefaultConsensusConfig(),
TxIndex: DefaultTxIndexConfig(),
BaseConfig: DefaultBaseConfig(),
RPC: DefaultRPCConfig(),
P2P: DefaultP2PConfig(),
Mempool: DefaultMempoolConfig(),
Consensus: DefaultConsensusConfig(),
TxIndex: DefaultTxIndexConfig(),
Instrumentation: DefaultInstrumentationConfig(),
}
}
// TestConfig returns a configuration that can be used for testing
func TestConfig() *Config {
return &Config{
BaseConfig: TestBaseConfig(),
RPC: TestRPCConfig(),
P2P: TestP2PConfig(),
Mempool: TestMempoolConfig(),
Consensus: TestConsensusConfig(),
TxIndex: TestTxIndexConfig(),
BaseConfig: TestBaseConfig(),
RPC: TestRPCConfig(),
P2P: TestP2PConfig(),
Mempool: TestMempoolConfig(),
Consensus: TestConsensusConfig(),
TxIndex: TestTxIndexConfig(),
Instrumentation: TestInstrumentationConfig(),
}
}
@ -411,7 +414,7 @@ func (cfg *MempoolConfig) WalDir() string {
//-----------------------------------------------------------------------------
// ConsensusConfig
// ConsensusConfig defines the confuguration for the Tendermint consensus service,
// ConsensusConfig defines the configuration for the Tendermint consensus service,
// including timeouts and details about the WAL and the block structure.
type ConsensusConfig struct {
RootDir string `mapstructure:"home"`
@ -536,14 +539,14 @@ func (cfg *ConsensusConfig) SetWalFile(walFile string) {
//-----------------------------------------------------------------------------
// TxIndexConfig
// TxIndexConfig defines the confuguration for the transaction
// TxIndexConfig defines the configuration for the transaction
// indexer, including tags to index.
type TxIndexConfig struct {
// What indexer to use for transactions
//
// Options:
// 1) "null" (default)
// 2) "kv" - the simplest possible indexer, backed by key-value storage (defaults to levelDB; see DBBackend).
// 1) "null"
// 2) "kv" (default) - the simplest possible indexer, backed by key-value storage (defaults to levelDB; see DBBackend).
Indexer string `mapstructure:"indexer"`
// Comma-separated list of tags to index (by default the only tag is tx hash)
@ -573,6 +576,35 @@ func TestTxIndexConfig() *TxIndexConfig {
return DefaultTxIndexConfig()
}
//-----------------------------------------------------------------------------
// InstrumentationConfig
// InstrumentationConfig defines the configuration for metrics reporting.
type InstrumentationConfig struct {
// When true, Prometheus metrics are served under /metrics on
// PrometheusListenAddr.
// Check out the documentation for the list of available metrics.
Prometheus bool `mapstructure:"prometheus"`
// Address to listen for Prometheus collector(s) connections.
PrometheusListenAddr string `mapstructure:"prometheus_listen_addr"`
}
// DefaultInstrumentationConfig returns a default configuration for metrics
// reporting.
func DefaultInstrumentationConfig() *InstrumentationConfig {
return &InstrumentationConfig{
Prometheus: false,
PrometheusListenAddr: ":26660",
}
}
// TestInstrumentationConfig returns a default configuration for metrics
// reporting.
func TestInstrumentationConfig() *InstrumentationConfig {
return DefaultInstrumentationConfig()
}
//-----------------------------------------------------------------------------
// Utils

View File

@ -232,6 +232,17 @@ index_tags = "{{ .TxIndex.IndexTags }}"
# desirable (see the comment above). IndexTags has a precedence over
# IndexAllTags (i.e. when given both, IndexTags will be indexed).
index_all_tags = {{ .TxIndex.IndexAllTags }}
##### instrumentation configuration options #####
[instrumentation]
# When true, Prometheus metrics are served under /metrics on
# PrometheusListenAddr.
# Check out the documentation for the list of available metrics.
prometheus = {{ .Instrumentation.Prometheus }}
# Address to listen for Prometheus collector(s) connections
prometheus_listen_addr = "{{ .Instrumentation.PrometheusListenAddr }}"
`
/****** these are for test settings ***********/

133
consensus/metrics.go Normal file
View File

@ -0,0 +1,133 @@
package consensus
import (
"github.com/go-kit/kit/metrics"
"github.com/go-kit/kit/metrics/discard"
prometheus "github.com/go-kit/kit/metrics/prometheus"
stdprometheus "github.com/prometheus/client_golang/prometheus"
)
// Metrics contains metrics exposed by this package.
type Metrics struct {
// Height of the chain.
Height metrics.Gauge
// Number of rounds.
Rounds metrics.Gauge
// Number of validators.
Validators metrics.Gauge
// Total power of all validators.
ValidatorsPower metrics.Gauge
// Number of validators who did not sign.
MissingValidators metrics.Gauge
// Total power of the missing validators.
MissingValidatorsPower metrics.Gauge
// Number of validators who tried to double sign.
ByzantineValidators metrics.Gauge
// Total power of the byzantine validators.
ByzantineValidatorsPower metrics.Gauge
// Time between this and the last block.
BlockIntervalSeconds metrics.Histogram
// Number of transactions.
NumTxs metrics.Gauge
// Size of the block.
BlockSizeBytes metrics.Gauge
// Total number of transactions.
TotalTxs metrics.Gauge
}
// PrometheusMetrics returns Metrics build using Prometheus client library.
func PrometheusMetrics() *Metrics {
return &Metrics{
Height: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Name: "height",
Help: "Height of the chain.",
}, []string{}),
Rounds: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Name: "rounds",
Help: "Number of rounds.",
}, []string{}),
Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Name: "validators",
Help: "Number of validators.",
}, []string{}),
ValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Name: "validators_power",
Help: "Total power of all validators.",
}, []string{}),
MissingValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Name: "missing_validators",
Help: "Number of validators who did not sign.",
}, []string{}),
MissingValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Name: "missing_validators_power",
Help: "Total power of the missing validators.",
}, []string{}),
ByzantineValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Name: "byzantine_validators",
Help: "Number of validators who tried to double sign.",
}, []string{}),
ByzantineValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Name: "byzantine_validators_power",
Help: "Total power of the byzantine validators.",
}, []string{}),
BlockIntervalSeconds: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
Subsystem: "consensus",
Name: "block_interval_seconds",
Help: "Time between this and the last block.",
Buckets: []float64{1, 2.5, 5, 10, 60},
}, []string{}),
NumTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Name: "num_txs",
Help: "Number of transactions.",
}, []string{}),
BlockSizeBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Name: "block_size_bytes",
Help: "Size of the block.",
}, []string{}),
TotalTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "consensus",
Name: "total_txs",
Help: "Total number of transactions.",
}, []string{}),
}
}
// NopMetrics returns no-op Metrics.
func NopMetrics() *Metrics {
return &Metrics{
Height: discard.NewGauge(),
Rounds: discard.NewGauge(),
Validators: discard.NewGauge(),
ValidatorsPower: discard.NewGauge(),
MissingValidators: discard.NewGauge(),
MissingValidatorsPower: discard.NewGauge(),
ByzantineValidators: discard.NewGauge(),
ByzantineValidatorsPower: discard.NewGauge(),
BlockIntervalSeconds: discard.NewHistogram(),
NumTxs: discard.NewGauge(),
BlockSizeBytes: discard.NewGauge(),
TotalTxs: discard.NewGauge(),
}
}

View File

@ -115,10 +115,24 @@ type ConsensusState struct {
// synchronous pubsub between consensus state and reactor.
// state only emits EventNewRoundStep, EventVote and EventProposalHeartbeat
evsw tmevents.EventSwitch
// for reporting metrics
metrics *Metrics
}
// CSOption sets an optional parameter on the ConsensusState.
type CSOption func(*ConsensusState)
// NewConsensusState returns a new ConsensusState.
func NewConsensusState(config *cfg.ConsensusConfig, state sm.State, blockExec *sm.BlockExecutor, blockStore sm.BlockStore, mempool sm.Mempool, evpool sm.EvidencePool) *ConsensusState {
func NewConsensusState(
config *cfg.ConsensusConfig,
state sm.State,
blockExec *sm.BlockExecutor,
blockStore sm.BlockStore,
mempool sm.Mempool,
evpool sm.EvidencePool,
options ...CSOption,
) *ConsensusState {
cs := &ConsensusState{
config: config,
blockExec: blockExec,
@ -132,6 +146,7 @@ func NewConsensusState(config *cfg.ConsensusConfig, state sm.State, blockExec *s
wal: nilWAL{},
evpool: evpool,
evsw: tmevents.NewEventSwitch(),
metrics: NopMetrics(),
}
// set function defaults (may be overwritten before calling Start)
cs.decideProposal = cs.defaultDecideProposal
@ -143,6 +158,9 @@ func NewConsensusState(config *cfg.ConsensusConfig, state sm.State, blockExec *s
// We do that upon Start().
cs.reconstructLastCommit(state)
cs.BaseService = *cmn.NewBaseService(nil, "ConsensusState", cs)
for _, option := range options {
option(cs)
}
return cs
}
@ -161,6 +179,11 @@ func (cs *ConsensusState) SetEventBus(b *types.EventBus) {
cs.blockExec.SetEventBus(b)
}
// WithMetrics sets the metrics.
func WithMetrics(metrics *Metrics) CSOption {
return func(cs *ConsensusState) { cs.metrics = metrics }
}
// String returns a string.
func (cs *ConsensusState) String() string {
// better not to access shared variables
@ -387,6 +410,7 @@ func (cs *ConsensusState) SetProposalAndBlock(proposal *types.Proposal, block *t
// internal functions for managing the state
func (cs *ConsensusState) updateHeight(height int64) {
cs.metrics.Height.Set(float64(height))
cs.Height = height
}
@ -718,6 +742,7 @@ func (cs *ConsensusState) enterNewRound(height int64, round int) {
cs.Votes.SetRound(round + 1) // also track next round (round+1) to allow round-skipping
cs.eventBus.PublishEventNewRound(cs.RoundStateEvent())
cs.metrics.Rounds.Set(float64(round))
// Wait for txs to be available in the mempool
// before we enterPropose in round 0. If the last block changed the app hash,
@ -1276,6 +1301,9 @@ func (cs *ConsensusState) finalizeCommit(height int64) {
fail.Fail() // XXX
// must be called before we update state
cs.recordMetrics(height, block)
// NewHeightStep!
cs.updateToState(stateCopy)
@ -1291,6 +1319,44 @@ func (cs *ConsensusState) finalizeCommit(height int64) {
// * cs.StartTime is set to when we will start round0.
}
func (cs *ConsensusState) recordMetrics(height int64, block *types.Block) {
cs.metrics.Validators.Set(float64(cs.Validators.Size()))
cs.metrics.ValidatorsPower.Set(float64(cs.Validators.TotalVotingPower()))
missingValidators := 0
missingValidatorsPower := int64(0)
for i, val := range cs.Validators.Validators {
var vote *types.Vote
if i < len(block.LastCommit.Precommits) {
vote = block.LastCommit.Precommits[i]
}
if vote == nil {
missingValidators++
missingValidatorsPower += val.VotingPower
}
}
cs.metrics.MissingValidators.Set(float64(missingValidators))
cs.metrics.MissingValidatorsPower.Set(float64(missingValidatorsPower))
cs.metrics.ByzantineValidators.Set(float64(len(block.Evidence.Evidence)))
byzantineValidatorsPower := int64(0)
for _, ev := range block.Evidence.Evidence {
if _, val := cs.Validators.GetByAddress(ev.Address()); val != nil {
byzantineValidatorsPower += val.VotingPower
}
}
cs.metrics.ByzantineValidatorsPower.Set(float64(byzantineValidatorsPower))
if height > 1 {
lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1)
cs.metrics.BlockIntervalSeconds.Observe(
block.Time.Sub(lastBlockMeta.Header.Time).Seconds(),
)
}
cs.metrics.NumTxs.Set(float64(block.NumTxs))
cs.metrics.BlockSizeBytes.Set(float64(block.Size()))
cs.metrics.TotalTxs.Set(float64(block.TotalTxs))
}
//-----------------------------------------------------------------------------
func (cs *ConsensusState) defaultSetProposal(proposal *types.Proposal) error {

View File

@ -0,0 +1,116 @@
# ADR 011: Monitoring
## Changelog
08-06-2018: Initial draft
11-06-2018: Reorg after @xla comments
13-06-2018: Clarification about usage of labels
## Context
In order to bring more visibility into Tendermint, we would like it to report
metrics and, maybe later, traces of transactions and RPC queries. See
https://github.com/tendermint/tendermint/issues/986.
A few solutions were considered:
1. [Prometheus](https://prometheus.io)
a) Prometheus API
b) [go-kit metrics package](https://github.com/go-kit/kit/tree/master/metrics) as an interface plus Prometheus
c) [telegraf](https://github.com/influxdata/telegraf)
d) new service, which will listen to events emitted by pubsub and report metrics
5. [OpenCensus](https://opencensus.io/go/index.html)
### 1. Prometheus
Prometheus seems to be the most popular product out there for monitoring. It has
a Go client library, powerful queries, alerts.
**a) Prometheus API**
We can commit to using Prometheus in Tendermint, but I think Tendermint users
should be free to choose whatever monitoring tool they feel will better suit
their needs (if they don't have existing one already). So we should try to
abstract interface enough so people can switch between Prometheus and other
similar tools.
**b) go-kit metrics package as an interface**
metrics package provides a set of uniform interfaces for service
instrumentation and offers adapters to popular metrics packages:
https://godoc.org/github.com/go-kit/kit/metrics#pkg-subdirectories
Comparing to Prometheus API, we're losing customisability and control, but gaining
freedom in choosing any instrument from the above list given we will extract
metrics creation into a separate function (see "providers" in node/node.go).
**c) telegraf**
Unlike already discussed options, telegraf does not require modifying Tendermint
source code. You create something called an input plugin, which polls
Tendermint RPC every second and calculates the metrics itself.
While it may sound good, but some metrics we want to report are not exposed via
RPC or pubsub, therefore can't be accessed externally.
**d) service, listening to pubsub**
Same issue as the above.
### 2. opencensus
opencensus provides both metrics and tracing, which may be important in the
future. It's API looks different from go-kit and Prometheus, but looks like it
covers everything we need.
Unfortunately, OpenCensus go client does not define any
interfaces, so if we want to abstract away metrics we
will need to write interfaces ourselves.
### List of metrics
| | Name | Type | Description |
| - | --------------------------------------- | ------- | ----------------------------------------------------------------------------- |
| A | consensus_height | Gauge | |
| A | consensus_validators | Gauge | Number of validators who signed |
| A | consensus_validators_power | Gauge | Total voting power of all validators |
| A | consensus_missing_validators | Gauge | Number of validators who did not sign |
| A | consensus_missing_validators_power | Gauge | Total voting power of the missing validators |
| A | consensus_byzantine_validators | Gauge | Number of validators who tried to double sign |
| A | consensus_byzantine_validators_power | Gauge | Total voting power of the byzantine validators |
| A | consensus_block_interval | Timing | Time between this and last block (Block.Header.Time) |
| | consensus_block_time | Timing | Time to create a block (from creating a proposal to commit) |
| | consensus_time_between_blocks | Timing | Time between committing last block and (receiving proposal creating proposal) |
| A | consensus_rounds | Gauge | Number of rounds |
| | consensus_prevotes | Gauge | |
| | consensus_precommits | Gauge | |
| | consensus_prevotes_total_power | Gauge | |
| | consensus_precommits_total_power | Gauge | |
| A | consensus_num_txs | Gauge | |
| A | mempool_size | Gauge | |
| A | consensus_total_txs | Gauge | |
| A | consensus_block_size | Gauge | In bytes |
| A | p2p_peers | Gauge | Number of peers node's connected to |
`A` - will be implemented in the fist place.
**Proposed solution**
## Status
Proposed.
## Consequences
### Positive
Better visibility, support of variety of monitoring backends
### Negative
One more library to audit, messing metrics reporting code with business domain.
### Neutral
-

View File

@ -52,6 +52,7 @@ ABCI, Apps, Logging, Etc
indexing-transactions.md
how-to-read-logs.md
running-in-production.md
metrics.md
Research & Specification
------------------------

40
docs/metrics.md Normal file
View File

@ -0,0 +1,40 @@
# Metrics
Tendermint can report and serve the Prometheus metrics, which in their turn can
be consumed by Prometheus collector(s).
This functionality is disabled by default.
To enable the Prometheus metrics, set `instrumentation.prometheus=true` if your
config file. Metrics will be served under `/metrics` on 26660 port by default.
Listen address can be changed in the config file (see
`prometheus_listen_addr`).
## List of available metrics
The following metrics are available:
| Name | Type | Since | Description |
| --------------------------------------- | ------- | --------- | ----------------------------------------------------------------------------- |
| consensus_height | Gauge | 0.20.1 | Height of the chain |
| consensus_validators | Gauge | 0.20.1 | Number of validators |
| consensus_validators_power | Gauge | 0.20.1 | Total voting power of all validators |
| consensus_missing_validators | Gauge | 0.20.1 | Number of validators who did not sign |
| consensus_missing_validators_power | Gauge | 0.20.1 | Total voting power of the missing validators |
| consensus_byzantine_validators | Gauge | 0.20.1 | Number of validators who tried to double sign |
| consensus_byzantine_validators_power | Gauge | 0.20.1 | Total voting power of the byzantine validators |
| consensus_block_interval_seconds | Histogram | 0.20.1 | Time between this and last block (Block.Header.Time) in seconds |
| consensus_rounds | Gauge | 0.20.1 | Number of rounds |
| consensus_num_txs | Gauge | 0.20.1 | Number of transactions |
| mempool_size | Gauge | 0.20.1 | Number of uncommitted transactions |
| consensus_total_txs | Gauge | 0.20.1 | Total number of transactions committed |
| consensus_block_size_bytes | Gauge | 0.20.1 | Block size in bytes |
| p2p_peers | Gauge | 0.20.1 | Number of peers node's connected to |
## Useful queries
Percentage of missing + byzantine validators:
```
((consensus_byzantine_validators_power + consensus_missing_validators_power) / consensus_validators_power) * 100
```

View File

@ -171,19 +171,30 @@ peer_query_maj23_sleep_duration = 2000
# What indexer to use for transactions
#
# Options:
# 1) "null" (default)
# 2) "kv" - the simplest possible indexer, backed by key-value storage (defaults to levelDB; see DBBackend).
indexer = "{{ .TxIndex.Indexer }}"
# 1) "null"
# 2) "kv" (default) - the simplest possible indexer, backed by key-value storage (defaults to levelDB; see DBBackend).
indexer = "kv"
# Comma-separated list of tags to index (by default the only tag is tx hash)
#
# It's recommended to index only a subset of tags due to possible memory
# bloat. This is, of course, depends on the indexer's DB and the volume of
# transactions.
index_tags = "{{ .TxIndex.IndexTags }}"
index_tags = ""
# When set to true, tells indexer to index all tags. Note this may be not
# desirable (see the comment above). IndexTags has a precedence over
# IndexAllTags (i.e. when given both, IndexTags will be indexed).
index_all_tags = {{ .TxIndex.IndexAllTags }}
index_all_tags = false
##### instrumentation configuration options #####
[instrumentation]
# When true, Prometheus metrics are served under /metrics on
# PrometheusListenAddr.
# Check out the documentation for the list of available metrics.
prometheus = false
# Address to listen for Prometheus collector(s) connections
prometheus_listen_addr = ":26660"
```

View File

@ -83,10 +83,20 @@ type Mempool struct {
wal *auto.AutoFile
logger log.Logger
metrics *Metrics
}
// MempoolOption sets an optional parameter on the Mempool.
type MempoolOption func(*Mempool)
// NewMempool returns a new Mempool with the given configuration and connection to an application.
func NewMempool(config *cfg.MempoolConfig, proxyAppConn proxy.AppConnMempool, height int64) *Mempool {
func NewMempool(
config *cfg.MempoolConfig,
proxyAppConn proxy.AppConnMempool,
height int64,
options ...MempoolOption,
) *Mempool {
mempool := &Mempool{
config: config,
proxyAppConn: proxyAppConn,
@ -98,8 +108,12 @@ func NewMempool(config *cfg.MempoolConfig, proxyAppConn proxy.AppConnMempool, he
recheckEnd: nil,
logger: log.NewNopLogger(),
cache: newTxCache(config.CacheSize),
metrics: NopMetrics(),
}
proxyAppConn.SetResponseCallback(mempool.resCb)
for _, option := range options {
option(mempool)
}
return mempool
}
@ -115,6 +129,11 @@ func (mem *Mempool) SetLogger(l log.Logger) {
mem.logger = l
}
// WithMetrics sets the metrics.
func WithMetrics(metrics *Metrics) MempoolOption {
return func(mem *Mempool) { mem.metrics = metrics }
}
// CloseWAL closes and discards the underlying WAL file.
// Any further writes will not be relayed to disk.
func (mem *Mempool) CloseWAL() bool {
@ -250,6 +269,7 @@ func (mem *Mempool) resCb(req *abci.Request, res *abci.Response) {
} else {
mem.resCbRecheck(req, res)
}
mem.metrics.Size.Set(float64(mem.Size()))
}
func (mem *Mempool) resCbNormal(req *abci.Request, res *abci.Response) {
@ -393,6 +413,7 @@ func (mem *Mempool) Update(height int64, txs types.Txs) error {
// mem.recheckCursor re-scans mem.txs and possibly removes some txs.
// Before mem.Reap(), we should wait for mem.recheckCursor to be nil.
}
mem.metrics.Size.Set(float64(mem.Size()))
return nil
}

34
mempool/metrics.go Normal file
View File

@ -0,0 +1,34 @@
package mempool
import (
"github.com/go-kit/kit/metrics"
"github.com/go-kit/kit/metrics/discard"
prometheus "github.com/go-kit/kit/metrics/prometheus"
stdprometheus "github.com/prometheus/client_golang/prometheus"
)
// Metrics contains metrics exposed by this package.
// see MetricsProvider for descriptions.
type Metrics struct {
// Size of the mempool.
Size metrics.Gauge
}
// PrometheusMetrics returns Metrics build using Prometheus client library.
func PrometheusMetrics() *Metrics {
return &Metrics{
Size: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "mempool",
Name: "size",
Help: "Size of the mempool (number of uncommitted transactions).",
}, []string{}),
}
}
// NopMetrics returns no-op Metrics.
func NopMetrics() *Metrics {
return &Metrics{
Size: discard.NewGauge(),
}
}

View File

@ -2,11 +2,14 @@ package node
import (
"bytes"
"context"
"errors"
"fmt"
"net"
"net/http"
"github.com/prometheus/client_golang/prometheus/promhttp"
abci "github.com/tendermint/abci/types"
amino "github.com/tendermint/go-amino"
crypto "github.com/tendermint/go-crypto"
@ -81,10 +84,25 @@ func DefaultNewNode(config *cfg.Config, logger log.Logger) (*Node, error) {
proxy.DefaultClientCreator(config.ProxyApp, config.ABCI, config.DBDir()),
DefaultGenesisDocProviderFunc(config),
DefaultDBProvider,
DefaultMetricsProvider,
logger,
)
}
// MetricsProvider returns a consensus, p2p and mempool Metrics.
type MetricsProvider func() (*cs.Metrics, *p2p.Metrics, *mempl.Metrics)
// DefaultMetricsProvider returns consensus, p2p and mempool Metrics build
// using Prometheus client library.
func DefaultMetricsProvider() (*cs.Metrics, *p2p.Metrics, *mempl.Metrics) {
return cs.PrometheusMetrics(), p2p.PrometheusMetrics(), mempl.PrometheusMetrics()
}
// NopMetricsProvider returns consensus, p2p and mempool Metrics as no-op.
func NopMetricsProvider() (*cs.Metrics, *p2p.Metrics, *mempl.Metrics) {
return cs.NopMetrics(), p2p.NopMetrics(), mempl.NopMetrics()
}
//------------------------------------------------------------------------------
// Node is the highest level interface to a full Tendermint node.
@ -114,6 +132,7 @@ type Node struct {
rpcListeners []net.Listener // rpc servers
txIndexer txindex.TxIndexer
indexerService *txindex.IndexerService
prometheusSrv *http.Server
}
// NewNode returns a new, ready to go, Tendermint Node.
@ -122,6 +141,7 @@ func NewNode(config *cfg.Config,
clientCreator proxy.ClientCreator,
genesisDocProvider GenesisDocProvider,
dbProvider DBProvider,
metricsProvider MetricsProvider,
logger log.Logger) (*Node, error) {
// Get BlockStore
@ -208,11 +228,28 @@ func NewNode(config *cfg.Config,
consensusLogger.Info("This node is not a validator", "addr", privValidator.GetAddress(), "pubKey", privValidator.GetPubKey())
}
// metrics
var (
csMetrics *cs.Metrics
p2pMetrics *p2p.Metrics
memplMetrics *mempl.Metrics
)
if config.Instrumentation.Prometheus {
csMetrics, p2pMetrics, memplMetrics = metricsProvider()
} else {
csMetrics, p2pMetrics, memplMetrics = NopMetricsProvider()
}
// Make MempoolReactor
mempoolLogger := logger.With("module", "mempool")
mempool := mempl.NewMempool(config.Mempool, proxyApp.Mempool(), state.LastBlockHeight)
mempool.InitWAL() // no need to have the mempool wal during tests
mempool := mempl.NewMempool(
config.Mempool,
proxyApp.Mempool(),
state.LastBlockHeight,
mempl.WithMetrics(memplMetrics),
)
mempool.SetLogger(mempoolLogger)
mempool.InitWAL() // no need to have the mempool wal during tests
mempoolReactor := mempl.NewMempoolReactor(config.Mempool, mempool)
mempoolReactor.SetLogger(mempoolLogger)
@ -241,8 +278,15 @@ func NewNode(config *cfg.Config,
bcReactor.SetLogger(logger.With("module", "blockchain"))
// Make ConsensusReactor
consensusState := cs.NewConsensusState(config.Consensus, state.Copy(),
blockExec, blockStore, mempool, evidencePool)
consensusState := cs.NewConsensusState(
config.Consensus,
state.Copy(),
blockExec,
blockStore,
mempool,
evidencePool,
cs.WithMetrics(csMetrics),
)
consensusState.SetLogger(consensusLogger)
if privValidator != nil {
consensusState.SetPrivValidator(privValidator)
@ -252,7 +296,7 @@ func NewNode(config *cfg.Config,
p2pLogger := logger.With("module", "p2p")
sw := p2p.NewSwitch(config.P2P)
sw := p2p.NewSwitch(config.P2P, p2p.WithMetrics(p2pMetrics))
sw.SetLogger(p2pLogger)
sw.AddReactor("MEMPOOL", mempoolReactor)
sw.AddReactor("BLOCKCHAIN", bcReactor)
@ -411,6 +455,10 @@ func (n *Node) OnStart() error {
n.rpcListeners = listeners
}
if n.config.Instrumentation.Prometheus {
n.prometheusSrv = n.startPrometheusServer(n.config.Instrumentation.PrometheusListenAddr)
}
// Start the switch (the P2P server).
err = n.sw.Start()
if err != nil {
@ -452,6 +500,13 @@ func (n *Node) OnStop() {
n.Logger.Error("Error stopping priv validator socket client", "err", err)
}
}
if n.prometheusSrv != nil {
if err := n.prometheusSrv.Shutdown(context.Background()); err != nil {
// Error from closing listeners, or context timeout:
n.Logger.Error("Prometheus HTTP server Shutdown", "err", err)
}
}
}
// RunForever waits for an interrupt signal and stops the node.
@ -527,6 +582,22 @@ func (n *Node) startRPC() ([]net.Listener, error) {
return listeners, nil
}
// startPrometheusServer starts a Prometheus HTTP server, listening for metrics
// collectors on addr.
func (n *Node) startPrometheusServer(addr string) *http.Server {
srv := &http.Server{
Addr: addr,
Handler: promhttp.Handler(),
}
go func() {
if err := srv.ListenAndServe(); err != http.ErrServerClosed {
// Error starting or closing listener:
n.Logger.Error("Prometheus HTTP server ListenAndServe", "err", err)
}
}()
return srv
}
// Switch returns the Node's Switch.
func (n *Node) Switch() *p2p.Switch {
return n.sw

33
p2p/metrics.go Normal file
View File

@ -0,0 +1,33 @@
package p2p
import (
"github.com/go-kit/kit/metrics"
"github.com/go-kit/kit/metrics/discard"
prometheus "github.com/go-kit/kit/metrics/prometheus"
stdprometheus "github.com/prometheus/client_golang/prometheus"
)
// Metrics contains metrics exposed by this package.
type Metrics struct {
// Number of peers.
Peers metrics.Gauge
}
// PrometheusMetrics returns Metrics build using Prometheus client library.
func PrometheusMetrics() *Metrics {
return &Metrics{
Peers: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Subsystem: "p2p",
Name: "peers",
Help: "Number of peers.",
}, []string{}),
}
}
// NopMetrics returns no-op Metrics.
func NopMetrics() *Metrics {
return &Metrics{
Peers: discard.NewGauge(),
}
}

View File

@ -73,10 +73,15 @@ type Switch struct {
mConfig conn.MConnConfig
rng *cmn.Rand // seed for randomizing dial times and orders
metrics *Metrics
}
// SwitchOption sets an optional parameter on the Switch.
type SwitchOption func(*Switch)
// NewSwitch creates a new Switch with the given config.
func NewSwitch(cfg *config.P2PConfig) *Switch {
func NewSwitch(cfg *config.P2PConfig, options ...SwitchOption) *Switch {
sw := &Switch{
config: cfg,
reactors: make(map[string]Reactor),
@ -85,6 +90,7 @@ func NewSwitch(cfg *config.P2PConfig) *Switch {
peers: NewPeerSet(),
dialing: cmn.NewCMap(),
reconnecting: cmn.NewCMap(),
metrics: NopMetrics(),
}
// Ensure we have a completely undeterministic PRNG.
@ -99,9 +105,19 @@ func NewSwitch(cfg *config.P2PConfig) *Switch {
sw.mConfig = mConfig
sw.BaseService = *cmn.NewBaseService(nil, "P2P Switch", sw)
for _, option := range options {
option(sw)
}
return sw
}
// WithMetrics sets the metrics.
func WithMetrics(metrics *Metrics) SwitchOption {
return func(sw *Switch) { sw.metrics = metrics }
}
//---------------------------------------------------------------------
// Switch setup
@ -279,6 +295,7 @@ func (sw *Switch) StopPeerGracefully(peer Peer) {
func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) {
sw.peers.Remove(peer)
sw.metrics.Peers.Add(float64(-1))
peer.Stop()
for _, reactor := range sw.reactors {
reactor.RemovePeer(peer, reason)
@ -623,6 +640,7 @@ func (sw *Switch) addPeer(pc peerConn) error {
if err := sw.peers.Add(peer); err != nil {
return err
}
sw.metrics.Peers.Add(float64(1))
sw.Logger.Info("Added peer", "peer", peer)
return nil

View File

@ -122,7 +122,9 @@ func NewTendermint(app abci.Application) *nm.Node {
papp := proxy.NewLocalClientCreator(app)
node, err := nm.NewNode(config, pv, papp,
nm.DefaultGenesisDocProviderFunc(config),
nm.DefaultDBProvider, logger)
nm.DefaultDBProvider,
nm.DefaultMetricsProvider,
logger)
if err != nil {
panic(err)
}

View File

@ -135,6 +135,15 @@ func (b *Block) HashesTo(hash []byte) bool {
return bytes.Equal(b.Hash(), hash)
}
// Size returns size of the block in bytes.
func (b *Block) Size() int {
bz, err := cdc.MarshalBinaryBare(b)
if err != nil {
return 0
}
return len(bz)
}
// String returns a string representation of the block
func (b *Block) String() string {
return b.StringIndented("")