From 03079185d4f90fcf8db188fb28c50f475bbf5041 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Fri, 8 Jun 2018 17:17:40 +0400 Subject: [PATCH 01/23] metrics ADR Refs #986 --- docs/architecture/adr-010-monitoring.md | 103 ++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 docs/architecture/adr-010-monitoring.md diff --git a/docs/architecture/adr-010-monitoring.md b/docs/architecture/adr-010-monitoring.md new file mode 100644 index 00000000..bac4b194 --- /dev/null +++ b/docs/architecture/adr-010-monitoring.md @@ -0,0 +1,103 @@ +# ADR 010: Monitoring + +## Changelog + +08-06-2018: Initial draft + +## Context + +In order to bring more visibility into Tendermint, we would like it to report +metrics and, maybe later, traces of transactions and RPC queries. See +https://github.com/tendermint/tendermint/issues/986. + +A few solutions were considered: + +1. [Prometheus](https://prometheus.io) + a) Prometheus API + b) [go-kit metrics package](https://github.com/go-kit/kit/tree/master/metrics) as an interface plus Prometheus + c) [telegraf](https://github.com/influxdata/telegraf) + d) new service, which will listen to events emitted by pubsub and report metrics +5. [OpenCensus](https://opencensus.io/go/index.html) + +### 1. Prometheus + +Prometheus seems to be the most popular product out there for monitoring. It has +a Go client library, powerful queries, alerts. + +**a) Prometheus API** + +We can commit to using Prometheus in Tendermint, but I think Tendermint users +should be free to choose whatever monitoring tool they feel will better suit +their needs (if they don't have existing one already). So we should try to +abstract interface enough so people can switch between Prometheus and other +similar tools. + +**b) go-kit metrics package as an interface** + +metrics package provides a set of uniform interfaces for service +instrumentation and offers adapters to popular metrics packages: + +https://godoc.org/github.com/go-kit/kit/metrics#pkg-subdirectories + +Comparing to Prometheus API, we're losing customisability and control, but gaining +freedom in choosing any instrument from the above list given we will extract +metrics creation into a separate function (see "providers" in node/node.go). + +**c) telegraf** + +Unlike already discussed options, telegraf does not require modifying Tendermint +source code. You create something called an input plugin, which polls +Tendermint RPC every second and calculates the metrics itself. + +While it may sound good, but some metrics we want to report are not exposed via +RPC or pubsub, therefore can't be accessed externally. + +**d) service, listening to pubsub** + +Same issue as the above. + +### 2. opencensus + +opencensus provides both metrics and tracing, which may be important in the +future. It's API looks different from go-kit and Prometheus, but looks like it +covers everything we need. + +Unfortunately, OpenCensus go client does not define any +interfaces, so if we want to abstract away metrics we +will need to write interfaces ourselves. + +### List of metrics + +| | Name | Type | | +| - | --------------------------------------- | ------- | ----------------------------------------------------------------------------- | +| A | height | Counter | | +| A | validators: | Gauge | Number of validators who signed | +| A | missing_validators: | Gauge | Number of validators who did not sign | +| A | byzantine_validators: | Gauge | Number of validators who tried to double sign | +| A | block_interval | Timing | Time between this and last block (Block.Header.Time) | +| | block_time | Timing | Time to create a block (from creating a proposal to commit) | +| | time_between_blocks | Timing | Time between committing last block and (receiving proposal creating proposal) | +| A | rounds: | Counter | Number of rounds | +| | prevotes:: | Counter | | +| | precommits:: | Counter | | +| | prevotes_total_power:: | Counter | | +| | precommits_total_power:: | Counter | | +| A | num_txs: | Counter | | +| | total_txs | Counter | | +| | block_size: | Gauge | In bytes | +| | peers | Gauge | Number of peers node's connected to | +| | power | Gauge | | + +`A` - will be implemented in the fist place. + +**Proposed solution** + +## Status + +## Consequences + +### Positive + +### Negative + +### Neutral From 5c7093cc9fd3675235ffc69ec68b0c14e0f92626 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Mon, 11 Jun 2018 17:14:42 +0400 Subject: [PATCH 02/23] go-kit metrics plus prometheus: one metric --- Gopkg.lock | 53 ++++++++++++++++++++++++++++++++++++-- Gopkg.toml | 4 +++ consensus/common_test.go | 2 +- consensus/metrics.go | 17 ++++++++++++ consensus/replay_file.go | 4 +-- consensus/state.go | 7 ++++- consensus/wal_generator.go | 2 +- node/node.go | 12 ++++++++- 8 files changed, 93 insertions(+), 8 deletions(-) create mode 100644 consensus/metrics.go diff --git a/Gopkg.lock b/Gopkg.lock index 8019acb5..9d1031e9 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -1,6 +1,12 @@ # This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'. +[[projects]] + branch = "master" + name = "github.com/beorn7/perks" + packages = ["quantile"] + revision = "3a771d992973f24aa725d07868b467d1ddfceafb" + [[projects]] branch = "master" name = "github.com/btcsuite/btcd" @@ -36,7 +42,11 @@ packages = [ "log", "log/level", - "log/term" + "log/term", + "metrics", + "metrics/discard", + "metrics/internal/lv", + "metrics/prometheus" ] revision = "4dc7be5d2d12881735283bcab7352178e190fc71" version = "v0.6.0" @@ -131,6 +141,12 @@ revision = "c2353362d570a7bfa228149c62842019201cfb71" version = "v1.8.0" +[[projects]] + name = "github.com/matttproud/golang_protobuf_extensions" + packages = ["pbutil"] + revision = "c12348ce28de40eed0136aa2b644d0ee0650e56c" + version = "v1.0.1" + [[projects]] branch = "master" name = "github.com/mitchellh/mapstructure" @@ -155,6 +171,39 @@ revision = "792786c7400a136282c1664665ae0a8db921c6c2" version = "v1.0.0" +[[projects]] + name = "github.com/prometheus/client_golang" + packages = ["prometheus"] + revision = "c5b7fccd204277076155f10851dad72b76a49317" + version = "v0.8.0" + +[[projects]] + branch = "master" + name = "github.com/prometheus/client_model" + packages = ["go"] + revision = "99fa1f4be8e564e8a6b613da7fa6f46c9edafc6c" + +[[projects]] + branch = "master" + name = "github.com/prometheus/common" + packages = [ + "expfmt", + "internal/bitbucket.org/ww/goautoneg", + "model" + ] + revision = "7600349dcfe1abd18d72d3a1770870d9800a7801" + +[[projects]] + branch = "master" + name = "github.com/prometheus/procfs" + packages = [ + ".", + "internal/util", + "nfs", + "xfs" + ] + revision = "94663424ae5ae9856b40a9f170762b4197024661" + [[projects]] branch = "master" name = "github.com/rcrowley/go-metrics" @@ -374,6 +423,6 @@ [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "62d0626fa963b25411234da915fb3b8bfc7aefffd76824d27ac1647ca2b5d489" + inputs-digest = "294ac88a5b44228f5ef841b13602d0a8b8fed5fbe9b9b6df77640d636175be16" solver-name = "gps-cdcl" solver-version = 1 diff --git a/Gopkg.toml b/Gopkg.toml index b7393a47..13339e55 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -97,3 +97,7 @@ [prune] go-tests = true unused-packages = true + +[[constraint]] + name = "github.com/prometheus/client_golang" + version = "0.8.0" diff --git a/consensus/common_test.go b/consensus/common_test.go index f50e5769..ea141592 100644 --- a/consensus/common_test.go +++ b/consensus/common_test.go @@ -267,7 +267,7 @@ func newConsensusStateWithConfigAndBlockStore(thisConfig *cfg.Config, state sm.S // Make ConsensusState stateDB := dbm.NewMemDB() blockExec := sm.NewBlockExecutor(stateDB, log.TestingLogger(), proxyAppConnCon, mempool, evpool) - cs := NewConsensusState(thisConfig.Consensus, state, blockExec, blockStore, mempool, evpool) + cs := NewConsensusState(thisConfig.Consensus, state, blockExec, blockStore, mempool, evpool, NopMetrics()) cs.SetLogger(log.TestingLogger().With("module", "consensus")) cs.SetPrivValidator(pv) diff --git a/consensus/metrics.go b/consensus/metrics.go new file mode 100644 index 00000000..06df7e35 --- /dev/null +++ b/consensus/metrics.go @@ -0,0 +1,17 @@ +package consensus + +import "github.com/go-kit/kit/metrics" +import "github.com/go-kit/kit/metrics/discard" + +// Metrics contains metrics exposed by this package. +type Metrics struct { + // height of the chain + Height metrics.Counter +} + +// NopMetrics returns no-op Metrics. +func NopMetrics() *Metrics { + return &Metrics{ + Height: discard.NewCounter(), + } +} diff --git a/consensus/replay_file.go b/consensus/replay_file.go index 57204b01..113c69c8 100644 --- a/consensus/replay_file.go +++ b/consensus/replay_file.go @@ -126,7 +126,7 @@ func (pb *playback) replayReset(count int, newStepCh chan interface{}) error { pb.cs.Wait() newCS := NewConsensusState(pb.cs.config, pb.genesisState.Copy(), pb.cs.blockExec, - pb.cs.blockStore, pb.cs.mempool, pb.cs.evpool) + pb.cs.blockStore, pb.cs.mempool, pb.cs.evpool, pb.cs.metrics) newCS.SetEventBus(pb.cs.eventBus) newCS.startForReplay() @@ -314,7 +314,7 @@ func newConsensusStateForReplay(config cfg.BaseConfig, csConfig *cfg.ConsensusCo blockExec := sm.NewBlockExecutor(stateDB, log.TestingLogger(), proxyApp.Consensus(), mempool, evpool) consensusState := NewConsensusState(csConfig, state.Copy(), blockExec, - blockStore, mempool, evpool) + blockStore, mempool, evpool, NopMetrics()) consensusState.SetEventBus(eventBus) return consensusState diff --git a/consensus/state.go b/consensus/state.go index d46ec583..3b6c029f 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -115,10 +115,13 @@ type ConsensusState struct { // synchronous pubsub between consensus state and reactor. // state only emits EventNewRoundStep, EventVote and EventProposalHeartbeat evsw tmevents.EventSwitch + + // for reporting metrics + metrics *Metrics } // NewConsensusState returns a new ConsensusState. -func NewConsensusState(config *cfg.ConsensusConfig, state sm.State, blockExec *sm.BlockExecutor, blockStore sm.BlockStore, mempool sm.Mempool, evpool sm.EvidencePool) *ConsensusState { +func NewConsensusState(config *cfg.ConsensusConfig, state sm.State, blockExec *sm.BlockExecutor, blockStore sm.BlockStore, mempool sm.Mempool, evpool sm.EvidencePool, metrics *Metrics) *ConsensusState { cs := &ConsensusState{ config: config, blockExec: blockExec, @@ -132,6 +135,7 @@ func NewConsensusState(config *cfg.ConsensusConfig, state sm.State, blockExec *s wal: nilWAL{}, evpool: evpool, evsw: tmevents.NewEventSwitch(), + metrics: metrics, } // set function defaults (may be overwritten before calling Start) cs.decideProposal = cs.defaultDecideProposal @@ -388,6 +392,7 @@ func (cs *ConsensusState) SetProposalAndBlock(proposal *types.Proposal, block *t func (cs *ConsensusState) updateHeight(height int64) { cs.Height = height + cs.metrics.Height.Add(float64(height - cs.Height)) } func (cs *ConsensusState) updateRoundStep(round int, step cstypes.RoundStepType) { diff --git a/consensus/wal_generator.go b/consensus/wal_generator.go index f61af15f..dab93c34 100644 --- a/consensus/wal_generator.go +++ b/consensus/wal_generator.go @@ -68,7 +68,7 @@ func WALWithNBlocks(numBlocks int) (data []byte, err error) { mempool := sm.MockMempool{} evpool := sm.MockEvidencePool{} blockExec := sm.NewBlockExecutor(stateDB, log.TestingLogger(), proxyApp.Consensus(), mempool, evpool) - consensusState := NewConsensusState(config.Consensus, state.Copy(), blockExec, blockStore, mempool, evpool) + consensusState := NewConsensusState(config.Consensus, state.Copy(), blockExec, blockStore, mempool, evpool, NopMetrics()) consensusState.SetLogger(logger) consensusState.SetEventBus(eventBus) if privValidator != nil { diff --git a/node/node.go b/node/node.go index efeb17ee..538055a8 100644 --- a/node/node.go +++ b/node/node.go @@ -7,6 +7,9 @@ import ( "net" "net/http" + prometheus "github.com/go-kit/kit/metrics/prometheus" + stdprometheus "github.com/prometheus/client_golang/prometheus" + abci "github.com/tendermint/abci/types" amino "github.com/tendermint/go-amino" crypto "github.com/tendermint/go-crypto" @@ -241,8 +244,15 @@ func NewNode(config *cfg.Config, bcReactor.SetLogger(logger.With("module", "blockchain")) // Make ConsensusReactor + // TODO: extract to provider + metrics := &cs.Metrics{ + Height: prometheus.NewCounter(stdprometheus.NewCounterVec(stdprometheus.CounterOpts{ + Name: "height", + }, []string{})), + } + stdprometheus.MustRegister(metrics.Height) consensusState := cs.NewConsensusState(config.Consensus, state.Copy(), - blockExec, blockStore, mempool, evidencePool) + blockExec, blockStore, mempool, evidencePool, metrics) consensusState.SetLogger(consensusLogger) if privValidator != nil { consensusState.SetPrivValidator(privValidator) From 9e14dc21a971aea30b43b60fbd32ba902604cc2b Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Wed, 13 Jun 2018 14:47:30 +0400 Subject: [PATCH 03/23] add labels column --- docs/architecture/adr-010-monitoring.md | 103 ----------------------- docs/architecture/adr-011-monitoring.md | 105 ++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 103 deletions(-) delete mode 100644 docs/architecture/adr-010-monitoring.md create mode 100644 docs/architecture/adr-011-monitoring.md diff --git a/docs/architecture/adr-010-monitoring.md b/docs/architecture/adr-010-monitoring.md deleted file mode 100644 index bac4b194..00000000 --- a/docs/architecture/adr-010-monitoring.md +++ /dev/null @@ -1,103 +0,0 @@ -# ADR 010: Monitoring - -## Changelog - -08-06-2018: Initial draft - -## Context - -In order to bring more visibility into Tendermint, we would like it to report -metrics and, maybe later, traces of transactions and RPC queries. See -https://github.com/tendermint/tendermint/issues/986. - -A few solutions were considered: - -1. [Prometheus](https://prometheus.io) - a) Prometheus API - b) [go-kit metrics package](https://github.com/go-kit/kit/tree/master/metrics) as an interface plus Prometheus - c) [telegraf](https://github.com/influxdata/telegraf) - d) new service, which will listen to events emitted by pubsub and report metrics -5. [OpenCensus](https://opencensus.io/go/index.html) - -### 1. Prometheus - -Prometheus seems to be the most popular product out there for monitoring. It has -a Go client library, powerful queries, alerts. - -**a) Prometheus API** - -We can commit to using Prometheus in Tendermint, but I think Tendermint users -should be free to choose whatever monitoring tool they feel will better suit -their needs (if they don't have existing one already). So we should try to -abstract interface enough so people can switch between Prometheus and other -similar tools. - -**b) go-kit metrics package as an interface** - -metrics package provides a set of uniform interfaces for service -instrumentation and offers adapters to popular metrics packages: - -https://godoc.org/github.com/go-kit/kit/metrics#pkg-subdirectories - -Comparing to Prometheus API, we're losing customisability and control, but gaining -freedom in choosing any instrument from the above list given we will extract -metrics creation into a separate function (see "providers" in node/node.go). - -**c) telegraf** - -Unlike already discussed options, telegraf does not require modifying Tendermint -source code. You create something called an input plugin, which polls -Tendermint RPC every second and calculates the metrics itself. - -While it may sound good, but some metrics we want to report are not exposed via -RPC or pubsub, therefore can't be accessed externally. - -**d) service, listening to pubsub** - -Same issue as the above. - -### 2. opencensus - -opencensus provides both metrics and tracing, which may be important in the -future. It's API looks different from go-kit and Prometheus, but looks like it -covers everything we need. - -Unfortunately, OpenCensus go client does not define any -interfaces, so if we want to abstract away metrics we -will need to write interfaces ourselves. - -### List of metrics - -| | Name | Type | | -| - | --------------------------------------- | ------- | ----------------------------------------------------------------------------- | -| A | height | Counter | | -| A | validators: | Gauge | Number of validators who signed | -| A | missing_validators: | Gauge | Number of validators who did not sign | -| A | byzantine_validators: | Gauge | Number of validators who tried to double sign | -| A | block_interval | Timing | Time between this and last block (Block.Header.Time) | -| | block_time | Timing | Time to create a block (from creating a proposal to commit) | -| | time_between_blocks | Timing | Time between committing last block and (receiving proposal creating proposal) | -| A | rounds: | Counter | Number of rounds | -| | prevotes:: | Counter | | -| | precommits:: | Counter | | -| | prevotes_total_power:: | Counter | | -| | precommits_total_power:: | Counter | | -| A | num_txs: | Counter | | -| | total_txs | Counter | | -| | block_size: | Gauge | In bytes | -| | peers | Gauge | Number of peers node's connected to | -| | power | Gauge | | - -`A` - will be implemented in the fist place. - -**Proposed solution** - -## Status - -## Consequences - -### Positive - -### Negative - -### Neutral diff --git a/docs/architecture/adr-011-monitoring.md b/docs/architecture/adr-011-monitoring.md new file mode 100644 index 00000000..a0eded5e --- /dev/null +++ b/docs/architecture/adr-011-monitoring.md @@ -0,0 +1,105 @@ +# ADR 011: Monitoring + +## Changelog + +08-06-2018: Initial draft +11-06-2018: Reorg after @xla comments +13-06-2018: Clarification about usage of labels + +## Context + +In order to bring more visibility into Tendermint, we would like it to report +metrics and, maybe later, traces of transactions and RPC queries. See +https://github.com/tendermint/tendermint/issues/986. + +A few solutions were considered: + +1. [Prometheus](https://prometheus.io) + a) Prometheus API + b) [go-kit metrics package](https://github.com/go-kit/kit/tree/master/metrics) as an interface plus Prometheus + c) [telegraf](https://github.com/influxdata/telegraf) + d) new service, which will listen to events emitted by pubsub and report metrics +5. [OpenCensus](https://opencensus.io/go/index.html) + +### 1. Prometheus + +Prometheus seems to be the most popular product out there for monitoring. It has +a Go client library, powerful queries, alerts. + +**a) Prometheus API** + +We can commit to using Prometheus in Tendermint, but I think Tendermint users +should be free to choose whatever monitoring tool they feel will better suit +their needs (if they don't have existing one already). So we should try to +abstract interface enough so people can switch between Prometheus and other +similar tools. + +**b) go-kit metrics package as an interface** + +metrics package provides a set of uniform interfaces for service +instrumentation and offers adapters to popular metrics packages: + +https://godoc.org/github.com/go-kit/kit/metrics#pkg-subdirectories + +Comparing to Prometheus API, we're losing customisability and control, but gaining +freedom in choosing any instrument from the above list given we will extract +metrics creation into a separate function (see "providers" in node/node.go). + +**c) telegraf** + +Unlike already discussed options, telegraf does not require modifying Tendermint +source code. You create something called an input plugin, which polls +Tendermint RPC every second and calculates the metrics itself. + +While it may sound good, but some metrics we want to report are not exposed via +RPC or pubsub, therefore can't be accessed externally. + +**d) service, listening to pubsub** + +Same issue as the above. + +### 2. opencensus + +opencensus provides both metrics and tracing, which may be important in the +future. It's API looks different from go-kit and Prometheus, but looks like it +covers everything we need. + +Unfortunately, OpenCensus go client does not define any +interfaces, so if we want to abstract away metrics we +will need to write interfaces ourselves. + +### List of metrics + +| | Name | Type | Labels | Description | +| - | --------------------------------------- | ------- | ------------------------- | ----------------------------------------------------------------------------- | +| A | height | Counter | height-X | | +| A | validators | Gauge | height-X | Number of validators who signed | +| A | missing_validators | Gauge | height-X | umber of validators who did not sign | +| A | byzantine_validators | Gauge | height-X | Number of validators who tried to double sign | +| A | block_interval | Timing | | Time between this and last block (Block.Header.Time) | +| | block_time | Timing | | Time to create a block (from creating a proposal to commit) | +| | time_between_blocks | Timing | | Time between committing last block and (receiving proposal creating proposal) | +| A | rounds | Counter | height-X | Number of rounds | +| | prevotes | Counter | height-X height-X-round-Y | | +| | precommits | Counter | height-X height-X-round-Y | | +| | prevotes_total_power | Counter | height-X height-X-round-Y | | +| | precommits_total_power | Counter | height-X height-X-round-Y | | +| A | num_txs | Counter | height-X | | +| | total_txs | Counter | | | +| | block_size | Gauge | height-X | In bytes | +| | peers | Gauge | | Number of peers node's connected to | +| | power | Gauge | | | + +`A` - will be implemented in the fist place. + +**Proposed solution** + +## Status + +## Consequences + +### Positive + +### Negative + +### Neutral From 5c869b58886aa7b27808e8e16b6dfe3a4b7c116c Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Wed, 13 Jun 2018 20:38:19 +0400 Subject: [PATCH 04/23] validator metrics --- consensus/metrics.go | 11 ++++++++++- consensus/state.go | 22 ++++++++++++++++++++++ node/node.go | 24 ++++++++++++++++++++---- 3 files changed, 52 insertions(+), 5 deletions(-) diff --git a/consensus/metrics.go b/consensus/metrics.go index 06df7e35..c4cd0334 100644 --- a/consensus/metrics.go +++ b/consensus/metrics.go @@ -7,11 +7,20 @@ import "github.com/go-kit/kit/metrics/discard" type Metrics struct { // height of the chain Height metrics.Counter + // number of validators who signed + Validators metrics.Gauge + // number of validators who did not sign + MissingValidators metrics.Gauge + // number of validators who tried to double sign + ByzantineValidators metrics.Gauge } // NopMetrics returns no-op Metrics. func NopMetrics() *Metrics { return &Metrics{ - Height: discard.NewCounter(), + Height: discard.NewCounter(), + Validators: discard.NewGauge(), + MissingValidators: discard.NewGauge(), + ByzantineValidators: discard.NewGauge(), } } diff --git a/consensus/state.go b/consensus/state.go index 3b6c029f..de424718 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -1281,6 +1281,8 @@ func (cs *ConsensusState) finalizeCommit(height int64) { fail.Fail() // XXX + cs.recordValidatorMetrics(height, block) + // NewHeightStep! cs.updateToState(stateCopy) @@ -1296,6 +1298,26 @@ func (cs *ConsensusState) finalizeCommit(height int64) { // * cs.StartTime is set to when we will start round0. } +func (cs *ConsensusState) recordValidatorMetrics(height int64, block *types.Block) { + heightStr := fmt.Sprintf("%d", height) + + cs.metrics.Validators.With("height", heightStr).Set(float64(cs.Validators.Size())) + + missingValidators := 0 + for i := range cs.Validators.Validators { + var vote *types.Vote + if i < len(block.LastCommit.Precommits) { + vote = block.LastCommit.Precommits[i] + } + if vote == nil { + missingValidators++ + } + } + cs.metrics.MissingValidators.With("height", heightStr).Set(float64(missingValidators)) + + cs.metrics.ByzantineValidators.With("height", heightStr).Set(float64(len(block.Evidence.Evidence))) +} + //----------------------------------------------------------------------------- func (cs *ConsensusState) defaultSetProposal(proposal *types.Proposal) error { diff --git a/node/node.go b/node/node.go index 538055a8..0ca6f777 100644 --- a/node/node.go +++ b/node/node.go @@ -246,11 +246,27 @@ func NewNode(config *cfg.Config, // Make ConsensusReactor // TODO: extract to provider metrics := &cs.Metrics{ - Height: prometheus.NewCounter(stdprometheus.NewCounterVec(stdprometheus.CounterOpts{ - Name: "height", - }, []string{})), + Height: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Subsystem: "consensus", + Name: "height", + Help: "Height of the chain.", + }, []string{}), + Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "validators", + Help: "Number of validators who signed, partitioned by height.", + }, []string{"height"}), + MissingValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "missing_validators", + Help: "Number of validators who did not sign, partitioned by height.", + }, []string{"height"}), + ByzantineValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "byzantine_validators", + Help: "Number of validators who tried to double sign, partitioned by height.", + }, []string{"height"}), } - stdprometheus.MustRegister(metrics.Height) consensusState := cs.NewConsensusState(config.Consensus, state.Copy(), blockExec, blockStore, mempool, evidencePool, metrics) consensusState.SetLogger(consensusLogger) From 3cdf3b670de3de0264e5ded90dbc1aaecdd4003d Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Wed, 13 Jun 2018 22:40:55 +0400 Subject: [PATCH 05/23] serve metrics under /metrics --- Gopkg.lock | 7 +++++-- consensus/state.go | 2 +- node/node.go | 2 ++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/Gopkg.lock b/Gopkg.lock index 9d1031e9..e45b84d1 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -173,7 +173,10 @@ [[projects]] name = "github.com/prometheus/client_golang" - packages = ["prometheus"] + packages = [ + "prometheus", + "prometheus/promhttp" + ] revision = "c5b7fccd204277076155f10851dad72b76a49317" version = "v0.8.0" @@ -423,6 +426,6 @@ [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "294ac88a5b44228f5ef841b13602d0a8b8fed5fbe9b9b6df77640d636175be16" + inputs-digest = "3bd388e520a08cd0aa14df2d6f5ecb46449d7c36fd80cf52eb775798e6accbaa" solver-name = "gps-cdcl" solver-version = 1 diff --git a/consensus/state.go b/consensus/state.go index de424718..8c07f9ea 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -391,8 +391,8 @@ func (cs *ConsensusState) SetProposalAndBlock(proposal *types.Proposal, block *t // internal functions for managing the state func (cs *ConsensusState) updateHeight(height int64) { - cs.Height = height cs.metrics.Height.Add(float64(height - cs.Height)) + cs.Height = height } func (cs *ConsensusState) updateRoundStep(round int, step cstypes.RoundStepType) { diff --git a/node/node.go b/node/node.go index 0ca6f777..89d21d1a 100644 --- a/node/node.go +++ b/node/node.go @@ -9,6 +9,7 @@ import ( prometheus "github.com/go-kit/kit/metrics/prometheus" stdprometheus "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" abci "github.com/tendermint/abci/types" amino "github.com/tendermint/go-amino" @@ -532,6 +533,7 @@ func (n *Node) startRPC() ([]net.Listener, error) { wm := rpcserver.NewWebsocketManager(rpccore.Routes, coreCodec, rpcserver.EventSubscriber(n.eventBus)) wm.SetLogger(rpcLogger.With("protocol", "websocket")) mux.HandleFunc("/websocket", wm.WebsocketHandler) + mux.Handle("/metrics", promhttp.Handler()) rpcserver.RegisterRPCFuncs(mux, rpccore.Routes, coreCodec, rpcLogger) listener, err := rpcserver.StartHTTPServer(listenAddr, mux, rpcLogger) if err != nil { From 489d9b9184f06d1c7e38f5895d0b0e42a9839aa3 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Thu, 14 Jun 2018 16:09:32 +0400 Subject: [PATCH 06/23] more metrics --- consensus/metrics.go | 9 +++++++++ consensus/state.go | 11 +++++++---- node/node.go | 34 ++++++++++++++++++++++++++++++++++ types/block.go | 9 +++++++++ 4 files changed, 59 insertions(+), 4 deletions(-) diff --git a/consensus/metrics.go b/consensus/metrics.go index c4cd0334..c099eb0b 100644 --- a/consensus/metrics.go +++ b/consensus/metrics.go @@ -13,6 +13,12 @@ type Metrics struct { MissingValidators metrics.Gauge // number of validators who tried to double sign ByzantineValidators metrics.Gauge + // number of transactions + NumTxs metrics.Gauge + // total number of transactions + TotalTxs metrics.Counter + // size of the block + BlockSizeBytes metrics.Gauge } // NopMetrics returns no-op Metrics. @@ -22,5 +28,8 @@ func NopMetrics() *Metrics { Validators: discard.NewGauge(), MissingValidators: discard.NewGauge(), ByzantineValidators: discard.NewGauge(), + NumTxs: discard.NewGauge(), + TotalTxs: discard.NewCounter(), + BlockSizeBytes: discard.NewGauge(), } } diff --git a/consensus/state.go b/consensus/state.go index 8c07f9ea..56d5a9ce 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -1281,7 +1281,7 @@ func (cs *ConsensusState) finalizeCommit(height int64) { fail.Fail() // XXX - cs.recordValidatorMetrics(height, block) + cs.recordMetrics(height, block) // NewHeightStep! cs.updateToState(stateCopy) @@ -1298,11 +1298,10 @@ func (cs *ConsensusState) finalizeCommit(height int64) { // * cs.StartTime is set to when we will start round0. } -func (cs *ConsensusState) recordValidatorMetrics(height int64, block *types.Block) { +func (cs *ConsensusState) recordMetrics(height int64, block *types.Block) { heightStr := fmt.Sprintf("%d", height) cs.metrics.Validators.With("height", heightStr).Set(float64(cs.Validators.Size())) - missingValidators := 0 for i := range cs.Validators.Validators { var vote *types.Vote @@ -1314,8 +1313,12 @@ func (cs *ConsensusState) recordValidatorMetrics(height int64, block *types.Bloc } } cs.metrics.MissingValidators.With("height", heightStr).Set(float64(missingValidators)) - cs.metrics.ByzantineValidators.With("height", heightStr).Set(float64(len(block.Evidence.Evidence))) + + cs.metrics.NumTxs.With("height", heightStr).Set(float64(block.NumTxs)) + cs.metrics.TotalTxs.Add(float64(block.NumTxs)) + + cs.metrics.BlockSizeBytes.With("height", heightStr).Set(float64(block.Size())) } //----------------------------------------------------------------------------- diff --git a/node/node.go b/node/node.go index 89d21d1a..27ef74f8 100644 --- a/node/node.go +++ b/node/node.go @@ -252,6 +252,7 @@ func NewNode(config *cfg.Config, Name: "height", Help: "Height of the chain.", }, []string{}), + Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Subsystem: "consensus", Name: "validators", @@ -267,6 +268,39 @@ func NewNode(config *cfg.Config, Name: "byzantine_validators", Help: "Number of validators who tried to double sign, partitioned by height.", }, []string{"height"}), + + // BlockInterval: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + // Subsystem: "consensus", + // Name: "block_interval", + // Help: "Time between this and the last block, partitioned by height.", + // }, []string{"height"}), + // BlockTime: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + // Subsystem: "consensus", + // Name: "time_to_create_block", + // Help: "Time to create a block (from sending a proposal to commit), partitioned by height", + // }, []string{"height"}), + // TimeBetweenBlocks: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + // Subsystem: "consensus", + // Name: "time_between_blocks", + // Help: "Time between committing the last block and (receiving/sending a proposal), partitioned by height", + // }, []string{"height"}), + + NumTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "num_txs", + Help: "Number of transactions, partitioned by height.", + }, []string{"height"}), + TotalTxs: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Subsystem: "consensus", + Name: "total_txs", + Help: "Total number of transactions.", + }, []string{}), + + BlockSizeBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "block_size_bytes", + Help: "Size of the block, partitioned by height.", + }, []string{"height"}), } consensusState := cs.NewConsensusState(config.Consensus, state.Copy(), blockExec, blockStore, mempool, evidencePool, metrics) diff --git a/types/block.go b/types/block.go index 3004672c..6adc0c4c 100644 --- a/types/block.go +++ b/types/block.go @@ -135,6 +135,15 @@ func (b *Block) HashesTo(hash []byte) bool { return bytes.Equal(b.Hash(), hash) } +// Size returns size of the block in bytes. +func (b *Block) Size() int { + bz, err := cdc.MarshalBinaryBare(b) + if err != nil { + return 0 + } + return len(bz) +} + // String returns a string representation of the block func (b *Block) String() string { return b.StringIndented("") From fad76e103baa32db84f1f4f1c6d843c4cac03ab9 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Fri, 15 Jun 2018 14:23:34 +0400 Subject: [PATCH 07/23] extract metrics to provider, remove height label --- consensus/metrics.go | 17 ++++--- consensus/state.go | 18 ++++--- node/node.go | 117 ++++++++++++++++++++++--------------------- rpc/test/helpers.go | 4 +- 4 files changed, 83 insertions(+), 73 deletions(-) diff --git a/consensus/metrics.go b/consensus/metrics.go index c099eb0b..2c74ceb7 100644 --- a/consensus/metrics.go +++ b/consensus/metrics.go @@ -13,6 +13,8 @@ type Metrics struct { MissingValidators metrics.Gauge // number of validators who tried to double sign ByzantineValidators metrics.Gauge + // time between this and the last block + BlockIntervalSeconds metrics.Histogram // number of transactions NumTxs metrics.Gauge // total number of transactions @@ -24,12 +26,13 @@ type Metrics struct { // NopMetrics returns no-op Metrics. func NopMetrics() *Metrics { return &Metrics{ - Height: discard.NewCounter(), - Validators: discard.NewGauge(), - MissingValidators: discard.NewGauge(), - ByzantineValidators: discard.NewGauge(), - NumTxs: discard.NewGauge(), - TotalTxs: discard.NewCounter(), - BlockSizeBytes: discard.NewGauge(), + Height: discard.NewCounter(), + Validators: discard.NewGauge(), + MissingValidators: discard.NewGauge(), + ByzantineValidators: discard.NewGauge(), + BlockIntervalSeconds: discard.NewHistogram(), + NumTxs: discard.NewGauge(), + TotalTxs: discard.NewCounter(), + BlockSizeBytes: discard.NewGauge(), } } diff --git a/consensus/state.go b/consensus/state.go index 56d5a9ce..2fb1fb69 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -1299,9 +1299,7 @@ func (cs *ConsensusState) finalizeCommit(height int64) { } func (cs *ConsensusState) recordMetrics(height int64, block *types.Block) { - heightStr := fmt.Sprintf("%d", height) - - cs.metrics.Validators.With("height", heightStr).Set(float64(cs.Validators.Size())) + cs.metrics.Validators.Set(float64(cs.Validators.Size())) missingValidators := 0 for i := range cs.Validators.Validators { var vote *types.Vote @@ -1312,13 +1310,19 @@ func (cs *ConsensusState) recordMetrics(height int64, block *types.Block) { missingValidators++ } } - cs.metrics.MissingValidators.With("height", heightStr).Set(float64(missingValidators)) - cs.metrics.ByzantineValidators.With("height", heightStr).Set(float64(len(block.Evidence.Evidence))) + cs.metrics.MissingValidators.Set(float64(missingValidators)) + cs.metrics.ByzantineValidators.Set(float64(len(block.Evidence.Evidence))) - cs.metrics.NumTxs.With("height", heightStr).Set(float64(block.NumTxs)) + if height > 1 { + lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1) + cs.metrics.BlockIntervalSeconds. + Observe(block.Time.Sub(lastBlockMeta.Header.Time).Seconds()) + } + + cs.metrics.NumTxs.Set(float64(block.NumTxs)) cs.metrics.TotalTxs.Add(float64(block.NumTxs)) - cs.metrics.BlockSizeBytes.With("height", heightStr).Set(float64(block.Size())) + cs.metrics.BlockSizeBytes.Set(float64(block.Size())) } //----------------------------------------------------------------------------- diff --git a/node/node.go b/node/node.go index 27ef74f8..681dc04b 100644 --- a/node/node.go +++ b/node/node.go @@ -85,10 +85,66 @@ func DefaultNewNode(config *cfg.Config, logger log.Logger) (*Node, error) { proxy.DefaultClientCreator(config.ProxyApp, config.ABCI, config.DBDir()), DefaultGenesisDocProviderFunc(config), DefaultDBProvider, + DefaultMetricsProvider, logger, ) } +// MetricsProvider returns a consensus Metrics. +type MetricsProvider func() *cs.Metrics + +// DefaultMetrics returns a consensus Metrics build using Prometheus client +// library. +func DefaultMetricsProvider() *cs.Metrics { + return &cs.Metrics{ + Height: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Subsystem: "consensus", + Name: "height", + Help: "Height of the chain.", + }, []string{}), + + Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "validators", + Help: "Number of validators who signed, partitioned by height.", + }, []string{}), + MissingValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "missing_validators", + Help: "Number of validators who did not sign, partitioned by height.", + }, []string{}), + ByzantineValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "byzantine_validators", + Help: "Number of validators who tried to double sign, partitioned by height.", + }, []string{}), + + BlockIntervalSeconds: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Subsystem: "consensus", + Name: "block_interval_seconds", + Help: "Time between this and the last block, partitioned by height.", + Buckets: []float64{1, 2.5, 5, 10, 60}, + }, []string{}), + + NumTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "num_txs", + Help: "Number of transactions, partitioned by height.", + }, []string{}), + TotalTxs: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + Subsystem: "consensus", + Name: "total_txs", + Help: "Total number of transactions.", + }, []string{}), + + BlockSizeBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "block_size_bytes", + Help: "Size of the block, partitioned by height.", + }, []string{}), + } +} + //------------------------------------------------------------------------------ // Node is the highest level interface to a full Tendermint node. @@ -126,6 +182,7 @@ func NewNode(config *cfg.Config, clientCreator proxy.ClientCreator, genesisDocProvider GenesisDocProvider, dbProvider DBProvider, + metricsProvider MetricsProvider, logger log.Logger) (*Node, error) { // Get BlockStore @@ -245,65 +302,9 @@ func NewNode(config *cfg.Config, bcReactor.SetLogger(logger.With("module", "blockchain")) // Make ConsensusReactor - // TODO: extract to provider - metrics := &cs.Metrics{ - Height: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Subsystem: "consensus", - Name: "height", - Help: "Height of the chain.", - }, []string{}), - - Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "validators", - Help: "Number of validators who signed, partitioned by height.", - }, []string{"height"}), - MissingValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "missing_validators", - Help: "Number of validators who did not sign, partitioned by height.", - }, []string{"height"}), - ByzantineValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "byzantine_validators", - Help: "Number of validators who tried to double sign, partitioned by height.", - }, []string{"height"}), - - // BlockInterval: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ - // Subsystem: "consensus", - // Name: "block_interval", - // Help: "Time between this and the last block, partitioned by height.", - // }, []string{"height"}), - // BlockTime: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ - // Subsystem: "consensus", - // Name: "time_to_create_block", - // Help: "Time to create a block (from sending a proposal to commit), partitioned by height", - // }, []string{"height"}), - // TimeBetweenBlocks: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ - // Subsystem: "consensus", - // Name: "time_between_blocks", - // Help: "Time between committing the last block and (receiving/sending a proposal), partitioned by height", - // }, []string{"height"}), - - NumTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "num_txs", - Help: "Number of transactions, partitioned by height.", - }, []string{"height"}), - TotalTxs: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Subsystem: "consensus", - Name: "total_txs", - Help: "Total number of transactions.", - }, []string{}), - - BlockSizeBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "block_size_bytes", - Help: "Size of the block, partitioned by height.", - }, []string{"height"}), - } + csMetrics := metricsProvider() consensusState := cs.NewConsensusState(config.Consensus, state.Copy(), - blockExec, blockStore, mempool, evidencePool, metrics) + blockExec, blockStore, mempool, evidencePool, csMetrics) consensusState.SetLogger(consensusLogger) if privValidator != nil { consensusState.SetPrivValidator(privValidator) diff --git a/rpc/test/helpers.go b/rpc/test/helpers.go index 5d18299b..b434c7d9 100644 --- a/rpc/test/helpers.go +++ b/rpc/test/helpers.go @@ -122,7 +122,9 @@ func NewTendermint(app abci.Application) *nm.Node { papp := proxy.NewLocalClientCreator(app) node, err := nm.NewNode(config, pv, papp, nm.DefaultGenesisDocProviderFunc(config), - nm.DefaultDBProvider, logger) + nm.DefaultDBProvider, + nm.DefaultMetricsProvider, + logger) if err != nil { panic(err) } From e58d674f4ca13ce66eb1f72300003bd7e36d412d Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Fri, 15 Jun 2018 14:35:36 +0400 Subject: [PATCH 08/23] add validators power gauges --- consensus/metrics.go | 24 ++++++++++++------------ consensus/state.go | 13 ++++++++++++- node/node.go | 27 +++++++++++++++++++++------ 3 files changed, 45 insertions(+), 19 deletions(-) diff --git a/consensus/metrics.go b/consensus/metrics.go index 2c74ceb7..064e5be5 100644 --- a/consensus/metrics.go +++ b/consensus/metrics.go @@ -4,22 +4,22 @@ import "github.com/go-kit/kit/metrics" import "github.com/go-kit/kit/metrics/discard" // Metrics contains metrics exposed by this package. +// see MetricsProvider for descriptions. type Metrics struct { - // height of the chain Height metrics.Counter - // number of validators who signed - Validators metrics.Gauge - // number of validators who did not sign - MissingValidators metrics.Gauge - // number of validators who tried to double sign - ByzantineValidators metrics.Gauge - // time between this and the last block + + Validators metrics.Gauge + ValidatorsPower metrics.Gauge + MissingValidators metrics.Gauge + MissingValidatorsPower metrics.Gauge + ByzantineValidators metrics.Gauge + ByzantineValidatorsPower metrics.Gauge + BlockIntervalSeconds metrics.Histogram - // number of transactions - NumTxs metrics.Gauge - // total number of transactions + + NumTxs metrics.Gauge TotalTxs metrics.Counter - // size of the block + BlockSizeBytes metrics.Gauge } diff --git a/consensus/state.go b/consensus/state.go index 2fb1fb69..61e01a9b 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -1300,18 +1300,29 @@ func (cs *ConsensusState) finalizeCommit(height int64) { func (cs *ConsensusState) recordMetrics(height int64, block *types.Block) { cs.metrics.Validators.Set(float64(cs.Validators.Size())) + cs.metrics.ValidatorsPower.Set(float64(cs.Validators.TotalVotingPower())) missingValidators := 0 - for i := range cs.Validators.Validators { + missingValidatorsPower := int64(0) + for i, val := range cs.Validators.Validators { var vote *types.Vote if i < len(block.LastCommit.Precommits) { vote = block.LastCommit.Precommits[i] } if vote == nil { missingValidators++ + missingValidatorsPower += val.VotingPower } } cs.metrics.MissingValidators.Set(float64(missingValidators)) + cs.metrics.MissingValidatorsPower.Set(float64(missingValidatorsPower)) cs.metrics.ByzantineValidators.Set(float64(len(block.Evidence.Evidence))) + byzantineValidatorsPower := int64(0) + for _, ev := range block.Evidence.Evidence { + if _, val := cs.Validators.GetByAddress(ev.Address()); val != nil { + byzantineValidatorsPower += val.VotingPower + } + } + cs.metrics.ByzantineValidatorsPower.Set(float64(byzantineValidatorsPower)) if height > 1 { lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1) diff --git a/node/node.go b/node/node.go index 681dc04b..79862088 100644 --- a/node/node.go +++ b/node/node.go @@ -106,30 +106,45 @@ func DefaultMetricsProvider() *cs.Metrics { Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Subsystem: "consensus", Name: "validators", - Help: "Number of validators who signed, partitioned by height.", + Help: "Number of validators who signed.", + }, []string{}), + ValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "validators_power", + Help: "Total power of all validators.", }, []string{}), MissingValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Subsystem: "consensus", Name: "missing_validators", - Help: "Number of validators who did not sign, partitioned by height.", + Help: "Number of validators who did not sign.", + }, []string{}), + MissingValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "missing_validators_power", + Help: "Total power of the missing validators.", }, []string{}), ByzantineValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Subsystem: "consensus", Name: "byzantine_validators", - Help: "Number of validators who tried to double sign, partitioned by height.", + Help: "Number of validators who tried to double sign.", + }, []string{}), + ByzantineValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "byzantine_validators_power", + Help: "Total power of the byzantine validators.", }, []string{}), BlockIntervalSeconds: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ Subsystem: "consensus", Name: "block_interval_seconds", - Help: "Time between this and the last block, partitioned by height.", + Help: "Time between this and the last block.", Buckets: []float64{1, 2.5, 5, 10, 60}, }, []string{}), NumTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Subsystem: "consensus", Name: "num_txs", - Help: "Number of transactions, partitioned by height.", + Help: "Number of transactions.", }, []string{}), TotalTxs: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ Subsystem: "consensus", @@ -140,7 +155,7 @@ func DefaultMetricsProvider() *cs.Metrics { BlockSizeBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Subsystem: "consensus", Name: "block_size_bytes", - Help: "Size of the block, partitioned by height.", + Help: "Size of the block.", }, []string{}), } } From 0cb50c05fc4ef24a109be368526a935c4b479894 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Fri, 15 Jun 2018 14:45:21 +0400 Subject: [PATCH 09/23] add rounds metric --- consensus/metrics.go | 1 + consensus/state.go | 1 + node/node.go | 5 +++++ 3 files changed, 7 insertions(+) diff --git a/consensus/metrics.go b/consensus/metrics.go index 064e5be5..169c2fd7 100644 --- a/consensus/metrics.go +++ b/consensus/metrics.go @@ -7,6 +7,7 @@ import "github.com/go-kit/kit/metrics/discard" // see MetricsProvider for descriptions. type Metrics struct { Height metrics.Counter + Rounds metrics.Gauge Validators metrics.Gauge ValidatorsPower metrics.Gauge diff --git a/consensus/state.go b/consensus/state.go index 61e01a9b..f078663d 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -697,6 +697,7 @@ func (cs *ConsensusState) enterNewRound(height int64, round int) { } logger.Info(cmn.Fmt("enterNewRound(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) + cs.metrics.Rounds.Set(float64(round)) // Increment validators if necessary validators := cs.Validators diff --git a/node/node.go b/node/node.go index 79862088..5630df6c 100644 --- a/node/node.go +++ b/node/node.go @@ -102,6 +102,11 @@ func DefaultMetricsProvider() *cs.Metrics { Name: "height", Help: "Height of the chain.", }, []string{}), + Rounds: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "rounds", + Help: "Number of rounds.", + }, []string{}), Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Subsystem: "consensus", From 19699d644fa9bcede24d5ce8ff153c0945bed395 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Fri, 15 Jun 2018 15:10:25 +0400 Subject: [PATCH 10/23] p2p metric, make height and totalTxs gauges --- blockchain/reactor_test.go | 2 +- consensus/byzantine_test.go | 2 +- consensus/metrics.go | 30 +++++--- consensus/state.go | 8 +- node/node.go | 146 +++++++++++++++++++----------------- p2p/metrics.go | 17 +++++ p2p/switch.go | 7 +- p2p/test_util.go | 2 +- 8 files changed, 125 insertions(+), 89 deletions(-) create mode 100644 p2p/metrics.go diff --git a/blockchain/reactor_test.go b/blockchain/reactor_test.go index c7f7e9af..672cb83d 100644 --- a/blockchain/reactor_test.go +++ b/blockchain/reactor_test.go @@ -42,7 +42,7 @@ func newBlockchainReactor(logger log.Logger, maxBlockHeight int64) *BlockchainRe bcReactor.SetLogger(logger.With("module", "blockchain")) // Next: we need to set a switch in order for peers to be added in - bcReactor.Switch = p2p.NewSwitch(cfg.DefaultP2PConfig()) + bcReactor.Switch = p2p.NewSwitch(cfg.DefaultP2PConfig(), p2p.NopMetrics()) // Lastly: let's add some blocks in for blockHeight := int64(1); blockHeight <= maxBlockHeight; blockHeight++ { diff --git a/consensus/byzantine_test.go b/consensus/byzantine_test.go index d3be8c35..c0d2e636 100644 --- a/consensus/byzantine_test.go +++ b/consensus/byzantine_test.go @@ -38,7 +38,7 @@ func TestByzantine(t *testing.T) { switches := make([]*p2p.Switch, N) p2pLogger := logger.With("module", "p2p") for i := 0; i < N; i++ { - switches[i] = p2p.NewSwitch(config.P2P) + switches[i] = p2p.NewSwitch(config.P2P, p2p.NopMetrics()) switches[i].SetLogger(p2pLogger.With("validator", i)) } diff --git a/consensus/metrics.go b/consensus/metrics.go index 169c2fd7..cd3bfc9b 100644 --- a/consensus/metrics.go +++ b/consensus/metrics.go @@ -6,7 +6,8 @@ import "github.com/go-kit/kit/metrics/discard" // Metrics contains metrics exposed by this package. // see MetricsProvider for descriptions. type Metrics struct { - Height metrics.Counter + Height metrics.Gauge + Rounds metrics.Gauge Validators metrics.Gauge @@ -18,22 +19,29 @@ type Metrics struct { BlockIntervalSeconds metrics.Histogram - NumTxs metrics.Gauge - TotalTxs metrics.Counter - + NumTxs metrics.Gauge BlockSizeBytes metrics.Gauge + TotalTxs metrics.Gauge } // NopMetrics returns no-op Metrics. func NopMetrics() *Metrics { return &Metrics{ - Height: discard.NewCounter(), - Validators: discard.NewGauge(), - MissingValidators: discard.NewGauge(), - ByzantineValidators: discard.NewGauge(), + Height: discard.NewGauge(), + + Rounds: discard.NewGauge(), + + Validators: discard.NewGauge(), + ValidatorsPower: discard.NewGauge(), + MissingValidators: discard.NewGauge(), + MissingValidatorsPower: discard.NewGauge(), + ByzantineValidators: discard.NewGauge(), + ByzantineValidatorsPower: discard.NewGauge(), + BlockIntervalSeconds: discard.NewHistogram(), - NumTxs: discard.NewGauge(), - TotalTxs: discard.NewCounter(), - BlockSizeBytes: discard.NewGauge(), + + NumTxs: discard.NewGauge(), + BlockSizeBytes: discard.NewGauge(), + TotalTxs: discard.NewGauge(), } } diff --git a/consensus/state.go b/consensus/state.go index f078663d..a9258453 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -391,7 +391,7 @@ func (cs *ConsensusState) SetProposalAndBlock(proposal *types.Proposal, block *t // internal functions for managing the state func (cs *ConsensusState) updateHeight(height int64) { - cs.metrics.Height.Add(float64(height - cs.Height)) + cs.metrics.Height.Set(float64(height)) cs.Height = height } @@ -697,7 +697,6 @@ func (cs *ConsensusState) enterNewRound(height int64, round int) { } logger.Info(cmn.Fmt("enterNewRound(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) - cs.metrics.Rounds.Set(float64(round)) // Increment validators if necessary validators := cs.Validators @@ -724,6 +723,7 @@ func (cs *ConsensusState) enterNewRound(height int64, round int) { cs.Votes.SetRound(round + 1) // also track next round (round+1) to allow round-skipping cs.eventBus.PublishEventNewRound(cs.RoundStateEvent()) + cs.metrics.Rounds.Set(float64(round)) // Wait for txs to be available in the mempool // before we enterPropose in round 0. If the last block changed the app hash, @@ -1282,6 +1282,7 @@ func (cs *ConsensusState) finalizeCommit(height int64) { fail.Fail() // XXX + // must be called before we update state cs.recordMetrics(height, block) // NewHeightStep! @@ -1332,9 +1333,8 @@ func (cs *ConsensusState) recordMetrics(height int64, block *types.Block) { } cs.metrics.NumTxs.Set(float64(block.NumTxs)) - cs.metrics.TotalTxs.Add(float64(block.NumTxs)) - cs.metrics.BlockSizeBytes.Set(float64(block.Size())) + cs.metrics.TotalTxs.Set(float64(block.TotalTxs)) } //----------------------------------------------------------------------------- diff --git a/node/node.go b/node/node.go index 5630df6c..a8671c9f 100644 --- a/node/node.go +++ b/node/node.go @@ -90,79 +90,84 @@ func DefaultNewNode(config *cfg.Config, logger log.Logger) (*Node, error) { ) } -// MetricsProvider returns a consensus Metrics. -type MetricsProvider func() *cs.Metrics +// MetricsProvider returns a consensus and p2p Metrics. +type MetricsProvider func() (*cs.Metrics, *p2p.Metrics) -// DefaultMetrics returns a consensus Metrics build using Prometheus client -// library. -func DefaultMetricsProvider() *cs.Metrics { +// DefaultMetricsProvider returns a consensus and p2p Metrics build using +// Prometheus client library. +func DefaultMetricsProvider() (*cs.Metrics, *p2p.Metrics) { return &cs.Metrics{ - Height: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Subsystem: "consensus", - Name: "height", - Help: "Height of the chain.", - }, []string{}), - Rounds: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "rounds", - Help: "Number of rounds.", - }, []string{}), + Height: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "height", + Help: "Height of the chain.", + }, []string{}), + Rounds: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "rounds", + Help: "Number of rounds.", + }, []string{}), - Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "validators", - Help: "Number of validators who signed.", - }, []string{}), - ValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "validators_power", - Help: "Total power of all validators.", - }, []string{}), - MissingValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "missing_validators", - Help: "Number of validators who did not sign.", - }, []string{}), - MissingValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "missing_validators_power", - Help: "Total power of the missing validators.", - }, []string{}), - ByzantineValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "byzantine_validators", - Help: "Number of validators who tried to double sign.", - }, []string{}), - ByzantineValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "byzantine_validators_power", - Help: "Total power of the byzantine validators.", - }, []string{}), + Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "validators", + Help: "Number of validators who signed.", + }, []string{}), + ValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "validators_power", + Help: "Total power of all validators.", + }, []string{}), + MissingValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "missing_validators", + Help: "Number of validators who did not sign.", + }, []string{}), + MissingValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "missing_validators_power", + Help: "Total power of the missing validators.", + }, []string{}), + ByzantineValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "byzantine_validators", + Help: "Number of validators who tried to double sign.", + }, []string{}), + ByzantineValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "byzantine_validators_power", + Help: "Total power of the byzantine validators.", + }, []string{}), - BlockIntervalSeconds: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ - Subsystem: "consensus", - Name: "block_interval_seconds", - Help: "Time between this and the last block.", - Buckets: []float64{1, 2.5, 5, 10, 60}, - }, []string{}), + BlockIntervalSeconds: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Subsystem: "consensus", + Name: "block_interval_seconds", + Help: "Time between this and the last block.", + Buckets: []float64{1, 2.5, 5, 10, 60}, + }, []string{}), - NumTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "num_txs", - Help: "Number of transactions.", - }, []string{}), - TotalTxs: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ - Subsystem: "consensus", - Name: "total_txs", - Help: "Total number of transactions.", - }, []string{}), - - BlockSizeBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "block_size_bytes", - Help: "Size of the block.", - }, []string{}), - } + NumTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "num_txs", + Help: "Number of transactions.", + }, []string{}), + BlockSizeBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "block_size_bytes", + Help: "Size of the block.", + }, []string{}), + TotalTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "total_txs", + Help: "Total number of transactions.", + }, []string{}), + }, &p2p.Metrics{ + Peers: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "p2p", + Name: "peers", + Help: "Number of peers.", + }, []string{}), + } } //------------------------------------------------------------------------------ @@ -321,8 +326,9 @@ func NewNode(config *cfg.Config, bcReactor := bc.NewBlockchainReactor(state.Copy(), blockExec, blockStore, fastSync) bcReactor.SetLogger(logger.With("module", "blockchain")) + csMetrics, p2pMetrics := metricsProvider() + // Make ConsensusReactor - csMetrics := metricsProvider() consensusState := cs.NewConsensusState(config.Consensus, state.Copy(), blockExec, blockStore, mempool, evidencePool, csMetrics) consensusState.SetLogger(consensusLogger) @@ -334,7 +340,7 @@ func NewNode(config *cfg.Config, p2pLogger := logger.With("module", "p2p") - sw := p2p.NewSwitch(config.P2P) + sw := p2p.NewSwitch(config.P2P, p2pMetrics) sw.SetLogger(p2pLogger) sw.AddReactor("MEMPOOL", mempoolReactor) sw.AddReactor("BLOCKCHAIN", bcReactor) diff --git a/p2p/metrics.go b/p2p/metrics.go new file mode 100644 index 00000000..3de45302 --- /dev/null +++ b/p2p/metrics.go @@ -0,0 +1,17 @@ +package p2p + +import "github.com/go-kit/kit/metrics" +import "github.com/go-kit/kit/metrics/discard" + +// Metrics contains metrics exposed by this package. +// see MetricsProvider for descriptions. +type Metrics struct { + Peers metrics.Gauge +} + +// NopMetrics returns no-op Metrics. +func NopMetrics() *Metrics { + return &Metrics{ + Peers: discard.NewGauge(), + } +} diff --git a/p2p/switch.go b/p2p/switch.go index f1ceee5c..2eb10cf8 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -73,10 +73,12 @@ type Switch struct { mConfig conn.MConnConfig rng *cmn.Rand // seed for randomizing dial times and orders + + metrics *Metrics } // NewSwitch creates a new Switch with the given config. -func NewSwitch(cfg *config.P2PConfig) *Switch { +func NewSwitch(cfg *config.P2PConfig, metrics *Metrics) *Switch { sw := &Switch{ config: cfg, reactors: make(map[string]Reactor), @@ -85,6 +87,7 @@ func NewSwitch(cfg *config.P2PConfig) *Switch { peers: NewPeerSet(), dialing: cmn.NewCMap(), reconnecting: cmn.NewCMap(), + metrics: metrics, } // Ensure we have a completely undeterministic PRNG. @@ -279,6 +282,7 @@ func (sw *Switch) StopPeerGracefully(peer Peer) { func (sw *Switch) stopAndRemovePeer(peer Peer, reason interface{}) { sw.peers.Remove(peer) + sw.metrics.Peers.Add(float64(-1)) peer.Stop() for _, reactor := range sw.reactors { reactor.RemovePeer(peer, reason) @@ -623,6 +627,7 @@ func (sw *Switch) addPeer(pc peerConn) error { if err := sw.peers.Add(peer); err != nil { return err } + sw.metrics.Peers.Add(float64(1)) sw.Logger.Info("Added peer", "peer", peer) return nil diff --git a/p2p/test_util.go b/p2p/test_util.go index 0d2ba6c5..02e4f644 100644 --- a/p2p/test_util.go +++ b/p2p/test_util.go @@ -137,7 +137,7 @@ func MakeSwitch(cfg *config.P2PConfig, i int, network, version string, initSwitc nodeKey := &NodeKey{ PrivKey: crypto.GenPrivKeyEd25519(), } - sw := NewSwitch(cfg) + sw := NewSwitch(cfg, NopMetrics()) sw.SetLogger(log.TestingLogger()) sw = initSwitch(i, sw) ni := NodeInfo{ From 7efb73aa187702529bc32d46d2c1b94bf58d150b Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Fri, 15 Jun 2018 15:57:11 +0400 Subject: [PATCH 11/23] mempool size metric --- consensus/common_test.go | 2 +- mempool/mempool.go | 7 ++++++- mempool/mempool_test.go | 4 ++-- mempool/metrics.go | 17 +++++++++++++++++ node/node.go | 16 +++++++++++----- 5 files changed, 37 insertions(+), 9 deletions(-) create mode 100644 mempool/metrics.go diff --git a/consensus/common_test.go b/consensus/common_test.go index ea141592..55f4d582 100644 --- a/consensus/common_test.go +++ b/consensus/common_test.go @@ -255,7 +255,7 @@ func newConsensusStateWithConfigAndBlockStore(thisConfig *cfg.Config, state sm.S proxyAppConnCon := abcicli.NewLocalClient(mtx, app) // Make Mempool - mempool := mempl.NewMempool(thisConfig.Mempool, proxyAppConnMem, 0) + mempool := mempl.NewMempool(thisConfig.Mempool, proxyAppConnMem, 0, mempl.NopMetrics()) mempool.SetLogger(log.TestingLogger().With("module", "mempool")) if thisConfig.Consensus.WaitForTxs() { mempool.EnableTxsAvailable() diff --git a/mempool/mempool.go b/mempool/mempool.go index 5af16b3c..a1f9cd17 100644 --- a/mempool/mempool.go +++ b/mempool/mempool.go @@ -83,10 +83,12 @@ type Mempool struct { wal *auto.AutoFile logger log.Logger + + metrics *Metrics } // NewMempool returns a new Mempool with the given configuration and connection to an application. -func NewMempool(config *cfg.MempoolConfig, proxyAppConn proxy.AppConnMempool, height int64) *Mempool { +func NewMempool(config *cfg.MempoolConfig, proxyAppConn proxy.AppConnMempool, height int64, metrics *Metrics) *Mempool { mempool := &Mempool{ config: config, proxyAppConn: proxyAppConn, @@ -98,6 +100,7 @@ func NewMempool(config *cfg.MempoolConfig, proxyAppConn proxy.AppConnMempool, he recheckEnd: nil, logger: log.NewNopLogger(), cache: newTxCache(config.CacheSize), + metrics: metrics, } proxyAppConn.SetResponseCallback(mempool.resCb) return mempool @@ -250,6 +253,7 @@ func (mem *Mempool) resCb(req *abci.Request, res *abci.Response) { } else { mem.resCbRecheck(req, res) } + mem.metrics.Size.Set(float64(mem.Size())) } func (mem *Mempool) resCbNormal(req *abci.Request, res *abci.Response) { @@ -393,6 +397,7 @@ func (mem *Mempool) Update(height int64, txs types.Txs) error { // mem.recheckCursor re-scans mem.txs and possibly removes some txs. // Before mem.Reap(), we should wait for mem.recheckCursor to be nil. } + mem.metrics.Size.Set(float64(mem.Size())) return nil } diff --git a/mempool/mempool_test.go b/mempool/mempool_test.go index a67adf6d..45aa27a8 100644 --- a/mempool/mempool_test.go +++ b/mempool/mempool_test.go @@ -33,7 +33,7 @@ func newMempoolWithApp(cc proxy.ClientCreator) *Mempool { if err != nil { panic(err) } - mempool := NewMempool(config.Mempool, appConnMem, 0) + mempool := NewMempool(config.Mempool, appConnMem, 0, NopMetrics()) mempool.SetLogger(log.TestingLogger()) return mempool } @@ -241,7 +241,7 @@ func TestMempoolCloseWAL(t *testing.T) { app := kvstore.NewKVStoreApplication() cc := proxy.NewLocalClientCreator(app) appConnMem, _ := cc.NewABCIClient() - mempool := NewMempool(wcfg, appConnMem, 10) + mempool := NewMempool(wcfg, appConnMem, 10, NopMetrics()) mempool.InitWAL() // 4. Ensure that the directory contains the WAL file diff --git a/mempool/metrics.go b/mempool/metrics.go new file mode 100644 index 00000000..3457285f --- /dev/null +++ b/mempool/metrics.go @@ -0,0 +1,17 @@ +package mempool + +import "github.com/go-kit/kit/metrics" +import "github.com/go-kit/kit/metrics/discard" + +// Metrics contains metrics exposed by this package. +// see MetricsProvider for descriptions. +type Metrics struct { + Size metrics.Gauge +} + +// NopMetrics returns no-op Metrics. +func NopMetrics() *Metrics { + return &Metrics{ + Size: discard.NewGauge(), + } +} diff --git a/node/node.go b/node/node.go index a8671c9f..acb3461b 100644 --- a/node/node.go +++ b/node/node.go @@ -91,11 +91,11 @@ func DefaultNewNode(config *cfg.Config, logger log.Logger) (*Node, error) { } // MetricsProvider returns a consensus and p2p Metrics. -type MetricsProvider func() (*cs.Metrics, *p2p.Metrics) +type MetricsProvider func() (*cs.Metrics, *p2p.Metrics, *mempl.Metrics) // DefaultMetricsProvider returns a consensus and p2p Metrics build using // Prometheus client library. -func DefaultMetricsProvider() (*cs.Metrics, *p2p.Metrics) { +func DefaultMetricsProvider() (*cs.Metrics, *p2p.Metrics, *mempl.Metrics) { return &cs.Metrics{ Height: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Subsystem: "consensus", @@ -167,6 +167,12 @@ func DefaultMetricsProvider() (*cs.Metrics, *p2p.Metrics) { Name: "peers", Help: "Number of peers.", }, []string{}), + }, &mempl.Metrics{ + Size: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "mempool", + Name: "size", + Help: "Size of the mempool (number of uncommitted transactions).", + }, []string{}), } } @@ -294,9 +300,11 @@ func NewNode(config *cfg.Config, consensusLogger.Info("This node is not a validator", "addr", privValidator.GetAddress(), "pubKey", privValidator.GetPubKey()) } + csMetrics, p2pMetrics, memplMetrics := metricsProvider() + // Make MempoolReactor mempoolLogger := logger.With("module", "mempool") - mempool := mempl.NewMempool(config.Mempool, proxyApp.Mempool(), state.LastBlockHeight) + mempool := mempl.NewMempool(config.Mempool, proxyApp.Mempool(), state.LastBlockHeight, memplMetrics) mempool.InitWAL() // no need to have the mempool wal during tests mempool.SetLogger(mempoolLogger) mempoolReactor := mempl.NewMempoolReactor(config.Mempool, mempool) @@ -326,8 +334,6 @@ func NewNode(config *cfg.Config, bcReactor := bc.NewBlockchainReactor(state.Copy(), blockExec, blockStore, fastSync) bcReactor.SetLogger(logger.With("module", "blockchain")) - csMetrics, p2pMetrics := metricsProvider() - // Make ConsensusReactor consensusState := cs.NewConsensusState(config.Consensus, state.Copy(), blockExec, blockStore, mempool, evidencePool, csMetrics) From c958b5319c068380a3b2ffa7efebf404df4d24e7 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Fri, 15 Jun 2018 17:19:09 +0400 Subject: [PATCH 12/23] update ADR --- docs/architecture/adr-011-monitoring.md | 49 +++++++++++++++---------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/docs/architecture/adr-011-monitoring.md b/docs/architecture/adr-011-monitoring.md index a0eded5e..ca16a9a1 100644 --- a/docs/architecture/adr-011-monitoring.md +++ b/docs/architecture/adr-011-monitoring.md @@ -70,25 +70,28 @@ will need to write interfaces ourselves. ### List of metrics -| | Name | Type | Labels | Description | -| - | --------------------------------------- | ------- | ------------------------- | ----------------------------------------------------------------------------- | -| A | height | Counter | height-X | | -| A | validators | Gauge | height-X | Number of validators who signed | -| A | missing_validators | Gauge | height-X | umber of validators who did not sign | -| A | byzantine_validators | Gauge | height-X | Number of validators who tried to double sign | -| A | block_interval | Timing | | Time between this and last block (Block.Header.Time) | -| | block_time | Timing | | Time to create a block (from creating a proposal to commit) | -| | time_between_blocks | Timing | | Time between committing last block and (receiving proposal creating proposal) | -| A | rounds | Counter | height-X | Number of rounds | -| | prevotes | Counter | height-X height-X-round-Y | | -| | precommits | Counter | height-X height-X-round-Y | | -| | prevotes_total_power | Counter | height-X height-X-round-Y | | -| | precommits_total_power | Counter | height-X height-X-round-Y | | -| A | num_txs | Counter | height-X | | -| | total_txs | Counter | | | -| | block_size | Gauge | height-X | In bytes | -| | peers | Gauge | | Number of peers node's connected to | -| | power | Gauge | | | +| | Name | Type | Description | +| - | --------------------------------------- | ------- | ----------------------------------------------------------------------------- | +| A | consensus_height | Gauge | | +| A | consensus_validators | Gauge | Number of validators who signed | +| A | consensus_validators_power | Gauge | Total voting power of all validators | +| A | consensus_missing_validators | Gauge | Number of validators who did not sign | +| A | consensus_missing_validators_power | Gauge | Total voting power of the missing validators | +| A | consensus_byzantine_validators | Gauge | Number of validators who tried to double sign | +| A | consensus_byzantine_validators_power | Gauge | Total voting power of the byzantine validators | +| A | consensus_block_interval | Timing | Time between this and last block (Block.Header.Time) | +| | consensus_block_time | Timing | Time to create a block (from creating a proposal to commit) | +| | consensus_time_between_blocks | Timing | Time between committing last block and (receiving proposal creating proposal) | +| A | consensus_rounds | Gauge | Number of rounds | +| | consensus_prevotes | Gauge | | +| | consensus_precommits | Gauge | | +| | consensus_prevotes_total_power | Gauge | | +| | consensus_precommits_total_power | Gauge | | +| A | consensus_num_txs | Gauge | | +| A | mempool_size | Gauge | | +| A | consensus_total_txs | Gauge | | +| A | consensus_block_size | Gauge | In bytes | +| A | p2p_peers | Gauge | Number of peers node's connected to | `A` - will be implemented in the fist place. @@ -96,10 +99,18 @@ will need to write interfaces ourselves. ## Status +Proposed. + ## Consequences ### Positive +Better visibility, support of variety of monitoring backends + ### Negative +One more library to audit, messing metrics reporting code with business domain. + ### Neutral + +- From 1bdff076ad903c184c33bcdb70caf4b9f8c26c9c Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Fri, 15 Jun 2018 17:21:04 +0400 Subject: [PATCH 13/23] add config option --- CHANGELOG.md | 5 +++++ config/config.go | 9 +++++++-- config/toml.go | 4 ++++ node/node.go | 25 +++++++++++++++++++++---- 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5afc0221..f69127d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## TBD + +FEATURES: +- [node] added metrics (served under /metrics using a Prometheus client; disabled by default) + ## 0.20.1 BUG FIXES: diff --git a/config/config.go b/config/config.go index 5ba568f2..c6cb6549 100644 --- a/config/config.go +++ b/config/config.go @@ -142,6 +142,10 @@ type BaseConfig struct { // Database directory DBPath string `mapstructure:"db_dir"` + + // When true, metrics are served under `/metrics` using a Prometheus client + // Check out the documentation for the list of available metrics. + Monitoring bool `mapstructure:"monitoring"` } // DefaultBaseConfig returns a default base configuration for a Tendermint node @@ -159,6 +163,7 @@ func DefaultBaseConfig() BaseConfig { FilterPeers: false, DBBackend: "leveldb", DBPath: "data", + Monitoring: false, } } @@ -411,7 +416,7 @@ func (cfg *MempoolConfig) WalDir() string { //----------------------------------------------------------------------------- // ConsensusConfig -// ConsensusConfig defines the confuguration for the Tendermint consensus service, +// ConsensusConfig defines the configuration for the Tendermint consensus service, // including timeouts and details about the WAL and the block structure. type ConsensusConfig struct { RootDir string `mapstructure:"home"` @@ -536,7 +541,7 @@ func (cfg *ConsensusConfig) SetWalFile(walFile string) { //----------------------------------------------------------------------------- // TxIndexConfig -// TxIndexConfig defines the confuguration for the transaction +// TxIndexConfig defines the configuration for the transaction // indexer, including tags to index. type TxIndexConfig struct { // What indexer to use for transactions diff --git a/config/toml.go b/config/toml.go index 7ed3e971..8927f15d 100644 --- a/config/toml.go +++ b/config/toml.go @@ -107,6 +107,10 @@ prof_laddr = "{{ .BaseConfig.ProfListenAddress }}" # so the app can decide if we should keep the connection or not filter_peers = {{ .BaseConfig.FilterPeers }} +# When true, metrics are served under /metrics using a Prometheus client +# Check out the documentation for the list of available metrics. +monitoring = {{ .BaseConfig.Monitoring }} + ##### advanced configuration options ##### ##### rpc server configuration options ##### diff --git a/node/node.go b/node/node.go index acb3461b..00d6cc40 100644 --- a/node/node.go +++ b/node/node.go @@ -93,8 +93,8 @@ func DefaultNewNode(config *cfg.Config, logger log.Logger) (*Node, error) { // MetricsProvider returns a consensus and p2p Metrics. type MetricsProvider func() (*cs.Metrics, *p2p.Metrics, *mempl.Metrics) -// DefaultMetricsProvider returns a consensus and p2p Metrics build using -// Prometheus client library. +// DefaultMetricsProvider returns consensus, p2p and mempool Metrics build +// using Prometheus client library. func DefaultMetricsProvider() (*cs.Metrics, *p2p.Metrics, *mempl.Metrics) { return &cs.Metrics{ Height: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ @@ -176,6 +176,11 @@ func DefaultMetricsProvider() (*cs.Metrics, *p2p.Metrics, *mempl.Metrics) { } } +// NopMetricsProvider returns consensus, p2p and mempool Metrics as no-op. +func NopMetricsProvider() (*cs.Metrics, *p2p.Metrics, *mempl.Metrics) { + return cs.NopMetrics(), p2p.NopMetrics(), mempl.NopMetrics() +} + //------------------------------------------------------------------------------ // Node is the highest level interface to a full Tendermint node. @@ -300,7 +305,17 @@ func NewNode(config *cfg.Config, consensusLogger.Info("This node is not a validator", "addr", privValidator.GetAddress(), "pubKey", privValidator.GetPubKey()) } - csMetrics, p2pMetrics, memplMetrics := metricsProvider() + // metrics + var ( + csMetrics *cs.Metrics + p2pMetrics *p2p.Metrics + memplMetrics *mempl.Metrics + ) + if config.BaseConfig.Monitoring { + csMetrics, p2pMetrics, memplMetrics = metricsProvider() + } else { + csMetrics, p2pMetrics, memplMetrics = NopMetricsProvider() + } // Make MempoolReactor mempoolLogger := logger.With("module", "mempool") @@ -600,7 +615,9 @@ func (n *Node) startRPC() ([]net.Listener, error) { wm := rpcserver.NewWebsocketManager(rpccore.Routes, coreCodec, rpcserver.EventSubscriber(n.eventBus)) wm.SetLogger(rpcLogger.With("protocol", "websocket")) mux.HandleFunc("/websocket", wm.WebsocketHandler) - mux.Handle("/metrics", promhttp.Handler()) + if n.config.BaseConfig.Monitoring { + mux.Handle("/metrics", promhttp.Handler()) + } rpcserver.RegisterRPCFuncs(mux, rpccore.Routes, coreCodec, rpcLogger) listener, err := rpcserver.StartHTTPServer(listenAddr, mux, rpcLogger) if err != nil { From e90cb4f5face47c9ea1449c863f1b0a82c2f3711 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Fri, 15 Jun 2018 17:33:44 +0400 Subject: [PATCH 14/23] add docs --- docs/index.rst | 1 + docs/metrics.md | 26 ++++++++++++++++++++++++++ node/node.go | 2 +- 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 docs/metrics.md diff --git a/docs/index.rst b/docs/index.rst index b40db3bf..e7d86bc2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -52,6 +52,7 @@ ABCI, Apps, Logging, Etc indexing-transactions.md how-to-read-logs.md running-in-production.md + metrics.md Research & Specification ------------------------ diff --git a/docs/metrics.md b/docs/metrics.md new file mode 100644 index 00000000..3a0a17fa --- /dev/null +++ b/docs/metrics.md @@ -0,0 +1,26 @@ +# Metrics + +Tendermint can serve metrics under `/metrics` RPC endpoint, which can be +consumed by Prometheus server. + +This functionality is disabled by default. Set `monitoring=true` if your config +to enable it. + +## List of available metrics + +| Name | Type | Description | +| --------------------------------------- | ------- | ----------------------------------------------------------------------------- | +| consensus_height | Gauge | Height of the chain | +| consensus_validators | Gauge | Number of validators | +| consensus_validators_power | Gauge | Total voting power of all validators | +| consensus_missing_validators | Gauge | Number of validators who did not sign | +| consensus_missing_validators_power | Gauge | Total voting power of the missing validators | +| consensus_byzantine_validators | Gauge | Number of validators who tried to double sign | +| consensus_byzantine_validators_power | Gauge | Total voting power of the byzantine validators | +| consensus_block_interval | Timing | Time between this and last block (Block.Header.Time) | +| consensus_rounds | Gauge | Number of rounds | +| consensus_num_txs | Gauge | Number of transactions | +| mempool_size | Gauge | Number of uncommitted transactions | +| consensus_total_txs | Gauge | Total number of transactions committed | +| consensus_block_size | Gauge | Block size in bytes | +| p2p_peers | Gauge | Number of peers node's connected to | diff --git a/node/node.go b/node/node.go index 00d6cc40..0d3ecdc1 100644 --- a/node/node.go +++ b/node/node.go @@ -111,7 +111,7 @@ func DefaultMetricsProvider() (*cs.Metrics, *p2p.Metrics, *mempl.Metrics) { Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Subsystem: "consensus", Name: "validators", - Help: "Number of validators who signed.", + Help: "Number of validators.", }, []string{}), ValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Subsystem: "consensus", From bba28624984b696c8208fae8a811fcf177a5f77b Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Fri, 15 Jun 2018 17:38:46 +0400 Subject: [PATCH 15/23] add one useful query --- docs/metrics.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/metrics.md b/docs/metrics.md index 3a0a17fa..71a632e0 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -24,3 +24,11 @@ to enable it. | consensus_total_txs | Gauge | Total number of transactions committed | | consensus_block_size | Gauge | Block size in bytes | | p2p_peers | Gauge | Number of peers node's connected to | + +## Useful queries + +Percentage of missing + byzantine validators: + +``` +((consensus_byzantine_validators_power + consensus_missing_validators_power) / consensus_validators_power) * 100 +``` From b10b0da3fd4e14f7c161c2719831cbb8b1c2aaf1 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Sat, 16 Jun 2018 11:12:55 +0400 Subject: [PATCH 16/23] bundle imports --- consensus/metrics.go | 6 ++++-- mempool/metrics.go | 6 ++++-- p2p/metrics.go | 6 ++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/consensus/metrics.go b/consensus/metrics.go index cd3bfc9b..9a5f701f 100644 --- a/consensus/metrics.go +++ b/consensus/metrics.go @@ -1,7 +1,9 @@ package consensus -import "github.com/go-kit/kit/metrics" -import "github.com/go-kit/kit/metrics/discard" +import ( + "github.com/go-kit/kit/metrics" + "github.com/go-kit/kit/metrics/discard" +) // Metrics contains metrics exposed by this package. // see MetricsProvider for descriptions. diff --git a/mempool/metrics.go b/mempool/metrics.go index 3457285f..9c40a254 100644 --- a/mempool/metrics.go +++ b/mempool/metrics.go @@ -1,7 +1,9 @@ package mempool -import "github.com/go-kit/kit/metrics" -import "github.com/go-kit/kit/metrics/discard" +import ( + "github.com/go-kit/kit/metrics" + "github.com/go-kit/kit/metrics/discard" +) // Metrics contains metrics exposed by this package. // see MetricsProvider for descriptions. diff --git a/p2p/metrics.go b/p2p/metrics.go index 3de45302..37efe443 100644 --- a/p2p/metrics.go +++ b/p2p/metrics.go @@ -1,7 +1,9 @@ package p2p -import "github.com/go-kit/kit/metrics" -import "github.com/go-kit/kit/metrics/discard" +import ( + "github.com/go-kit/kit/metrics" + "github.com/go-kit/kit/metrics/discard" +) // Metrics contains metrics exposed by this package. // see MetricsProvider for descriptions. From d8fea3ec9d96b2be417e891f89cb30f574bdb193 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Sat, 16 Jun 2018 11:13:27 +0400 Subject: [PATCH 17/23] separate http server for metrics --- config/config.go | 71 +++++++++++++++++++++++++++++++++--------------- config/toml.go | 15 +++++++--- node/node.go | 34 ++++++++++++++++++++--- 3 files changed, 90 insertions(+), 30 deletions(-) diff --git a/config/config.go b/config/config.go index c6cb6549..1a0dfaf3 100644 --- a/config/config.go +++ b/config/config.go @@ -45,34 +45,37 @@ type Config struct { BaseConfig `mapstructure:",squash"` // Options for services - RPC *RPCConfig `mapstructure:"rpc"` - P2P *P2PConfig `mapstructure:"p2p"` - Mempool *MempoolConfig `mapstructure:"mempool"` - Consensus *ConsensusConfig `mapstructure:"consensus"` - TxIndex *TxIndexConfig `mapstructure:"tx_index"` + RPC *RPCConfig `mapstructure:"rpc"` + P2P *P2PConfig `mapstructure:"p2p"` + Mempool *MempoolConfig `mapstructure:"mempool"` + Consensus *ConsensusConfig `mapstructure:"consensus"` + TxIndex *TxIndexConfig `mapstructure:"tx_index"` + Instrumentation *InstrumentationConfig `mapstructure:"instrumentation"` } // DefaultConfig returns a default configuration for a Tendermint node func DefaultConfig() *Config { return &Config{ - BaseConfig: DefaultBaseConfig(), - RPC: DefaultRPCConfig(), - P2P: DefaultP2PConfig(), - Mempool: DefaultMempoolConfig(), - Consensus: DefaultConsensusConfig(), - TxIndex: DefaultTxIndexConfig(), + BaseConfig: DefaultBaseConfig(), + RPC: DefaultRPCConfig(), + P2P: DefaultP2PConfig(), + Mempool: DefaultMempoolConfig(), + Consensus: DefaultConsensusConfig(), + TxIndex: DefaultTxIndexConfig(), + Instrumentation: DefaultInstrumentationConfig(), } } // TestConfig returns a configuration that can be used for testing func TestConfig() *Config { return &Config{ - BaseConfig: TestBaseConfig(), - RPC: TestRPCConfig(), - P2P: TestP2PConfig(), - Mempool: TestMempoolConfig(), - Consensus: TestConsensusConfig(), - TxIndex: TestTxIndexConfig(), + BaseConfig: TestBaseConfig(), + RPC: TestRPCConfig(), + P2P: TestP2PConfig(), + Mempool: TestMempoolConfig(), + Consensus: TestConsensusConfig(), + TxIndex: TestTxIndexConfig(), + Instrumentation: TestInstrumentationConfig(), } } @@ -142,10 +145,6 @@ type BaseConfig struct { // Database directory DBPath string `mapstructure:"db_dir"` - - // When true, metrics are served under `/metrics` using a Prometheus client - // Check out the documentation for the list of available metrics. - Monitoring bool `mapstructure:"monitoring"` } // DefaultBaseConfig returns a default base configuration for a Tendermint node @@ -163,7 +162,6 @@ func DefaultBaseConfig() BaseConfig { FilterPeers: false, DBBackend: "leveldb", DBPath: "data", - Monitoring: false, } } @@ -578,6 +576,35 @@ func TestTxIndexConfig() *TxIndexConfig { return DefaultTxIndexConfig() } +//----------------------------------------------------------------------------- +// InstrumentationConfig + +// InstrumentationConfig defines the configuration for metrics reporting. +type InstrumentationConfig struct { + // When true, Prometheus metrics are served under /metrics on + // PrometheusListenAddr. + // Check out the documentation for the list of available metrics. + Prometheus bool `mapstructure:"prometheus"` + + // Address to listen for Prometheus collector(s) connections. + PrometheusListenAddr string `mapstructure:"prometheus_listen_addr"` +} + +// DefaultInstrumentationConfig returns a default configuration for metrics +// reporting. +func DefaultInstrumentationConfig() *InstrumentationConfig { + return &InstrumentationConfig{ + Prometheus: false, + PrometheusListenAddr: ":26660", + } +} + +// TestInstrumentationConfig returns a default configuration for metrics +// reporting. +func TestInstrumentationConfig() *InstrumentationConfig { + return DefaultInstrumentationConfig() +} + //----------------------------------------------------------------------------- // Utils diff --git a/config/toml.go b/config/toml.go index 8927f15d..c3d41a9b 100644 --- a/config/toml.go +++ b/config/toml.go @@ -107,10 +107,6 @@ prof_laddr = "{{ .BaseConfig.ProfListenAddress }}" # so the app can decide if we should keep the connection or not filter_peers = {{ .BaseConfig.FilterPeers }} -# When true, metrics are served under /metrics using a Prometheus client -# Check out the documentation for the list of available metrics. -monitoring = {{ .BaseConfig.Monitoring }} - ##### advanced configuration options ##### ##### rpc server configuration options ##### @@ -236,6 +232,17 @@ index_tags = "{{ .TxIndex.IndexTags }}" # desirable (see the comment above). IndexTags has a precedence over # IndexAllTags (i.e. when given both, IndexTags will be indexed). index_all_tags = {{ .TxIndex.IndexAllTags }} + +##### instrumentation configuration options ##### +[instrumentation] + +# When true, Prometheus metrics are served under /metrics on +# PrometheusListenAddr. +# Check out the documentation for the list of available metrics. +prometheus = {{ .Instrumentation.Prometheus }} + +# Address to listen for Prometheus collector(s) connections +prometheus_listen_addr = "{{ .Instrumentation.PrometheusListenAddr }}" ` /****** these are for test settings ***********/ diff --git a/node/node.go b/node/node.go index 0d3ecdc1..df1d10cb 100644 --- a/node/node.go +++ b/node/node.go @@ -2,6 +2,7 @@ package node import ( "bytes" + "context" "errors" "fmt" "net" @@ -210,6 +211,7 @@ type Node struct { rpcListeners []net.Listener // rpc servers txIndexer txindex.TxIndexer indexerService *txindex.IndexerService + prometheusSrv *http.Server } // NewNode returns a new, ready to go, Tendermint Node. @@ -311,7 +313,7 @@ func NewNode(config *cfg.Config, p2pMetrics *p2p.Metrics memplMetrics *mempl.Metrics ) - if config.BaseConfig.Monitoring { + if config.Instrumentation.Prometheus { csMetrics, p2pMetrics, memplMetrics = metricsProvider() } else { csMetrics, p2pMetrics, memplMetrics = NopMetricsProvider() @@ -520,6 +522,10 @@ func (n *Node) OnStart() error { n.rpcListeners = listeners } + if n.config.Instrumentation.Prometheus { + n.prometheusSrv = n.StartPrometheusServer(n.config.Instrumentation.PrometheusListenAddr) + } + // Start the switch (the P2P server). err = n.sw.Start() if err != nil { @@ -561,6 +567,13 @@ func (n *Node) OnStop() { n.Logger.Error("Error stopping priv validator socket client", "err", err) } } + + if n.prometheusSrv != nil { + if err := n.prometheusSrv.Shutdown(context.Background()); err != nil { + // Error from closing listeners, or context timeout: + n.Logger.Error("Prometheus HTTP server Shutdown", "err", err) + } + } } // RunForever waits for an interrupt signal and stops the node. @@ -615,9 +628,6 @@ func (n *Node) startRPC() ([]net.Listener, error) { wm := rpcserver.NewWebsocketManager(rpccore.Routes, coreCodec, rpcserver.EventSubscriber(n.eventBus)) wm.SetLogger(rpcLogger.With("protocol", "websocket")) mux.HandleFunc("/websocket", wm.WebsocketHandler) - if n.config.BaseConfig.Monitoring { - mux.Handle("/metrics", promhttp.Handler()) - } rpcserver.RegisterRPCFuncs(mux, rpccore.Routes, coreCodec, rpcLogger) listener, err := rpcserver.StartHTTPServer(listenAddr, mux, rpcLogger) if err != nil { @@ -639,6 +649,22 @@ func (n *Node) startRPC() ([]net.Listener, error) { return listeners, nil } +// StartPrometheusServer starts a Prometheus HTTP server, listening for metrics +// collectors on addr. +func (n *Node) StartPrometheusServer(addr string) *http.Server { + srv := &http.Server{ + Addr: addr, + Handler: promhttp.Handler(), + } + go func(s *http.Server, logger log.Logger) { + if err := s.ListenAndServe(); err != http.ErrServerClosed { + // Error starting or closing listener: + logger.Error("Prometheus HTTP server ListenAndServe", "err", err) + } + }(srv, n.Logger) + return srv +} + // Switch returns the Node's Switch. func (n *Node) Switch() *p2p.Switch { return n.sw From cd11a54f7a9d4987d03873e795f89bae270558f8 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Sat, 16 Jun 2018 11:13:41 +0400 Subject: [PATCH 18/23] add since column to list of available metrics table --- docs/metrics.md | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/docs/metrics.md b/docs/metrics.md index 71a632e0..1cd758b9 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -1,29 +1,35 @@ # Metrics -Tendermint can serve metrics under `/metrics` RPC endpoint, which can be -consumed by Prometheus server. +Tendermint can report and serve the Prometheus metrics, which in their turn can +be consumed by Prometheus collector(s). -This functionality is disabled by default. Set `monitoring=true` if your config -to enable it. +This functionality is disabled by default. + +To enable the Prometheus metrics, set `instrumentation.prometheus=true` if your +config file. Metrics will be served under `/metrics` on 26660 port by default. +Listen address can be changed in the config file (see +`prometheus_listen_addr`). ## List of available metrics -| Name | Type | Description | -| --------------------------------------- | ------- | ----------------------------------------------------------------------------- | -| consensus_height | Gauge | Height of the chain | -| consensus_validators | Gauge | Number of validators | -| consensus_validators_power | Gauge | Total voting power of all validators | -| consensus_missing_validators | Gauge | Number of validators who did not sign | -| consensus_missing_validators_power | Gauge | Total voting power of the missing validators | -| consensus_byzantine_validators | Gauge | Number of validators who tried to double sign | -| consensus_byzantine_validators_power | Gauge | Total voting power of the byzantine validators | -| consensus_block_interval | Timing | Time between this and last block (Block.Header.Time) | -| consensus_rounds | Gauge | Number of rounds | -| consensus_num_txs | Gauge | Number of transactions | -| mempool_size | Gauge | Number of uncommitted transactions | -| consensus_total_txs | Gauge | Total number of transactions committed | -| consensus_block_size | Gauge | Block size in bytes | -| p2p_peers | Gauge | Number of peers node's connected to | +The following metrics are available: + +| Name | Type | Since | Description | +| --------------------------------------- | ------- | --------- | ----------------------------------------------------------------------------- | +| consensus_height | Gauge | 0.20.1 | Height of the chain | +| consensus_validators | Gauge | 0.20.1 | Number of validators | +| consensus_validators_power | Gauge | 0.20.1 | Total voting power of all validators | +| consensus_missing_validators | Gauge | 0.20.1 | Number of validators who did not sign | +| consensus_missing_validators_power | Gauge | 0.20.1 | Total voting power of the missing validators | +| consensus_byzantine_validators | Gauge | 0.20.1 | Number of validators who tried to double sign | +| consensus_byzantine_validators_power | Gauge | 0.20.1 | Total voting power of the byzantine validators | +| consensus_block_interval_seconds | Histogram | 0.20.1 | Time between this and last block (Block.Header.Time) in seconds | +| consensus_rounds | Gauge | 0.20.1 | Number of rounds | +| consensus_num_txs | Gauge | 0.20.1 | Number of transactions | +| mempool_size | Gauge | 0.20.1 | Number of uncommitted transactions | +| consensus_total_txs | Gauge | 0.20.1 | Total number of transactions committed | +| consensus_block_size_bytes | Gauge | 0.20.1 | Block size in bytes | +| p2p_peers | Gauge | 0.20.1 | Number of peers node's connected to | ## Useful queries From 84812145cb8bc7c706b3fb6cc5be70b3ee72a8a2 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Sat, 16 Jun 2018 11:44:03 +0400 Subject: [PATCH 19/23] friendly apis for constructors --- blockchain/reactor_test.go | 2 +- consensus/byzantine_test.go | 2 +- consensus/common_test.go | 4 ++-- consensus/replay_file.go | 4 ++-- consensus/state.go | 14 ++++++++++++-- consensus/wal_generator.go | 2 +- mempool/mempool.go | 14 ++++++++++++-- mempool/mempool_test.go | 4 ++-- node/node.go | 9 +++++---- p2p/switch.go | 16 ++++++++++++++-- p2p/test_util.go | 2 +- 11 files changed, 53 insertions(+), 20 deletions(-) diff --git a/blockchain/reactor_test.go b/blockchain/reactor_test.go index 672cb83d..c7f7e9af 100644 --- a/blockchain/reactor_test.go +++ b/blockchain/reactor_test.go @@ -42,7 +42,7 @@ func newBlockchainReactor(logger log.Logger, maxBlockHeight int64) *BlockchainRe bcReactor.SetLogger(logger.With("module", "blockchain")) // Next: we need to set a switch in order for peers to be added in - bcReactor.Switch = p2p.NewSwitch(cfg.DefaultP2PConfig(), p2p.NopMetrics()) + bcReactor.Switch = p2p.NewSwitch(cfg.DefaultP2PConfig()) // Lastly: let's add some blocks in for blockHeight := int64(1); blockHeight <= maxBlockHeight; blockHeight++ { diff --git a/consensus/byzantine_test.go b/consensus/byzantine_test.go index c0d2e636..d3be8c35 100644 --- a/consensus/byzantine_test.go +++ b/consensus/byzantine_test.go @@ -38,7 +38,7 @@ func TestByzantine(t *testing.T) { switches := make([]*p2p.Switch, N) p2pLogger := logger.With("module", "p2p") for i := 0; i < N; i++ { - switches[i] = p2p.NewSwitch(config.P2P, p2p.NopMetrics()) + switches[i] = p2p.NewSwitch(config.P2P) switches[i].SetLogger(p2pLogger.With("validator", i)) } diff --git a/consensus/common_test.go b/consensus/common_test.go index 55f4d582..f50e5769 100644 --- a/consensus/common_test.go +++ b/consensus/common_test.go @@ -255,7 +255,7 @@ func newConsensusStateWithConfigAndBlockStore(thisConfig *cfg.Config, state sm.S proxyAppConnCon := abcicli.NewLocalClient(mtx, app) // Make Mempool - mempool := mempl.NewMempool(thisConfig.Mempool, proxyAppConnMem, 0, mempl.NopMetrics()) + mempool := mempl.NewMempool(thisConfig.Mempool, proxyAppConnMem, 0) mempool.SetLogger(log.TestingLogger().With("module", "mempool")) if thisConfig.Consensus.WaitForTxs() { mempool.EnableTxsAvailable() @@ -267,7 +267,7 @@ func newConsensusStateWithConfigAndBlockStore(thisConfig *cfg.Config, state sm.S // Make ConsensusState stateDB := dbm.NewMemDB() blockExec := sm.NewBlockExecutor(stateDB, log.TestingLogger(), proxyAppConnCon, mempool, evpool) - cs := NewConsensusState(thisConfig.Consensus, state, blockExec, blockStore, mempool, evpool, NopMetrics()) + cs := NewConsensusState(thisConfig.Consensus, state, blockExec, blockStore, mempool, evpool) cs.SetLogger(log.TestingLogger().With("module", "consensus")) cs.SetPrivValidator(pv) diff --git a/consensus/replay_file.go b/consensus/replay_file.go index 113c69c8..57204b01 100644 --- a/consensus/replay_file.go +++ b/consensus/replay_file.go @@ -126,7 +126,7 @@ func (pb *playback) replayReset(count int, newStepCh chan interface{}) error { pb.cs.Wait() newCS := NewConsensusState(pb.cs.config, pb.genesisState.Copy(), pb.cs.blockExec, - pb.cs.blockStore, pb.cs.mempool, pb.cs.evpool, pb.cs.metrics) + pb.cs.blockStore, pb.cs.mempool, pb.cs.evpool) newCS.SetEventBus(pb.cs.eventBus) newCS.startForReplay() @@ -314,7 +314,7 @@ func newConsensusStateForReplay(config cfg.BaseConfig, csConfig *cfg.ConsensusCo blockExec := sm.NewBlockExecutor(stateDB, log.TestingLogger(), proxyApp.Consensus(), mempool, evpool) consensusState := NewConsensusState(csConfig, state.Copy(), blockExec, - blockStore, mempool, evpool, NopMetrics()) + blockStore, mempool, evpool) consensusState.SetEventBus(eventBus) return consensusState diff --git a/consensus/state.go b/consensus/state.go index a9258453..686d2e7d 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -121,7 +121,7 @@ type ConsensusState struct { } // NewConsensusState returns a new ConsensusState. -func NewConsensusState(config *cfg.ConsensusConfig, state sm.State, blockExec *sm.BlockExecutor, blockStore sm.BlockStore, mempool sm.Mempool, evpool sm.EvidencePool, metrics *Metrics) *ConsensusState { +func NewConsensusState(config *cfg.ConsensusConfig, state sm.State, blockExec *sm.BlockExecutor, blockStore sm.BlockStore, mempool sm.Mempool, evpool sm.EvidencePool, options ...func(*ConsensusState)) *ConsensusState { cs := &ConsensusState{ config: config, blockExec: blockExec, @@ -135,7 +135,7 @@ func NewConsensusState(config *cfg.ConsensusConfig, state sm.State, blockExec *s wal: nilWAL{}, evpool: evpool, evsw: tmevents.NewEventSwitch(), - metrics: metrics, + metrics: NopMetrics(), } // set function defaults (may be overwritten before calling Start) cs.decideProposal = cs.defaultDecideProposal @@ -147,6 +147,9 @@ func NewConsensusState(config *cfg.ConsensusConfig, state sm.State, blockExec *s // We do that upon Start(). cs.reconstructLastCommit(state) cs.BaseService = *cmn.NewBaseService(nil, "ConsensusState", cs) + for _, option := range options { + option(cs) + } return cs } @@ -165,6 +168,13 @@ func (cs *ConsensusState) SetEventBus(b *types.EventBus) { cs.blockExec.SetEventBus(b) } +// WithMetrics sets the metrics. +func WithMetrics(metrics *Metrics) func(*ConsensusState) { + return func(cs *ConsensusState) { + cs.metrics = metrics + } +} + // String returns a string. func (cs *ConsensusState) String() string { // better not to access shared variables diff --git a/consensus/wal_generator.go b/consensus/wal_generator.go index dab93c34..f61af15f 100644 --- a/consensus/wal_generator.go +++ b/consensus/wal_generator.go @@ -68,7 +68,7 @@ func WALWithNBlocks(numBlocks int) (data []byte, err error) { mempool := sm.MockMempool{} evpool := sm.MockEvidencePool{} blockExec := sm.NewBlockExecutor(stateDB, log.TestingLogger(), proxyApp.Consensus(), mempool, evpool) - consensusState := NewConsensusState(config.Consensus, state.Copy(), blockExec, blockStore, mempool, evpool, NopMetrics()) + consensusState := NewConsensusState(config.Consensus, state.Copy(), blockExec, blockStore, mempool, evpool) consensusState.SetLogger(logger) consensusState.SetEventBus(eventBus) if privValidator != nil { diff --git a/mempool/mempool.go b/mempool/mempool.go index a1f9cd17..1df1651e 100644 --- a/mempool/mempool.go +++ b/mempool/mempool.go @@ -88,7 +88,7 @@ type Mempool struct { } // NewMempool returns a new Mempool with the given configuration and connection to an application. -func NewMempool(config *cfg.MempoolConfig, proxyAppConn proxy.AppConnMempool, height int64, metrics *Metrics) *Mempool { +func NewMempool(config *cfg.MempoolConfig, proxyAppConn proxy.AppConnMempool, height int64, options ...func(*Mempool)) *Mempool { mempool := &Mempool{ config: config, proxyAppConn: proxyAppConn, @@ -100,9 +100,12 @@ func NewMempool(config *cfg.MempoolConfig, proxyAppConn proxy.AppConnMempool, he recheckEnd: nil, logger: log.NewNopLogger(), cache: newTxCache(config.CacheSize), - metrics: metrics, + metrics: NopMetrics(), } proxyAppConn.SetResponseCallback(mempool.resCb) + for _, option := range options { + option(mempool) + } return mempool } @@ -118,6 +121,13 @@ func (mem *Mempool) SetLogger(l log.Logger) { mem.logger = l } +// WithMetrics sets the metrics. +func WithMetrics(metrics *Metrics) func(*Mempool) { + return func(mem *Mempool) { + mem.metrics = metrics + } +} + // CloseWAL closes and discards the underlying WAL file. // Any further writes will not be relayed to disk. func (mem *Mempool) CloseWAL() bool { diff --git a/mempool/mempool_test.go b/mempool/mempool_test.go index 45aa27a8..a67adf6d 100644 --- a/mempool/mempool_test.go +++ b/mempool/mempool_test.go @@ -33,7 +33,7 @@ func newMempoolWithApp(cc proxy.ClientCreator) *Mempool { if err != nil { panic(err) } - mempool := NewMempool(config.Mempool, appConnMem, 0, NopMetrics()) + mempool := NewMempool(config.Mempool, appConnMem, 0) mempool.SetLogger(log.TestingLogger()) return mempool } @@ -241,7 +241,7 @@ func TestMempoolCloseWAL(t *testing.T) { app := kvstore.NewKVStoreApplication() cc := proxy.NewLocalClientCreator(app) appConnMem, _ := cc.NewABCIClient() - mempool := NewMempool(wcfg, appConnMem, 10, NopMetrics()) + mempool := NewMempool(wcfg, appConnMem, 10) mempool.InitWAL() // 4. Ensure that the directory contains the WAL file diff --git a/node/node.go b/node/node.go index df1d10cb..67f99247 100644 --- a/node/node.go +++ b/node/node.go @@ -321,9 +321,10 @@ func NewNode(config *cfg.Config, // Make MempoolReactor mempoolLogger := logger.With("module", "mempool") - mempool := mempl.NewMempool(config.Mempool, proxyApp.Mempool(), state.LastBlockHeight, memplMetrics) - mempool.InitWAL() // no need to have the mempool wal during tests + mempool := mempl.NewMempool(config.Mempool, proxyApp.Mempool(), state.LastBlockHeight, + mempl.WithMetrics(memplMetrics)) mempool.SetLogger(mempoolLogger) + mempool.InitWAL() // no need to have the mempool wal during tests mempoolReactor := mempl.NewMempoolReactor(config.Mempool, mempool) mempoolReactor.SetLogger(mempoolLogger) @@ -353,7 +354,7 @@ func NewNode(config *cfg.Config, // Make ConsensusReactor consensusState := cs.NewConsensusState(config.Consensus, state.Copy(), - blockExec, blockStore, mempool, evidencePool, csMetrics) + blockExec, blockStore, mempool, evidencePool, cs.WithMetrics(csMetrics)) consensusState.SetLogger(consensusLogger) if privValidator != nil { consensusState.SetPrivValidator(privValidator) @@ -363,7 +364,7 @@ func NewNode(config *cfg.Config, p2pLogger := logger.With("module", "p2p") - sw := p2p.NewSwitch(config.P2P, p2pMetrics) + sw := p2p.NewSwitch(config.P2P, p2p.WithMetrics(p2pMetrics)) sw.SetLogger(p2pLogger) sw.AddReactor("MEMPOOL", mempoolReactor) sw.AddReactor("BLOCKCHAIN", bcReactor) diff --git a/p2p/switch.go b/p2p/switch.go index 2eb10cf8..ed7a8098 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -78,7 +78,7 @@ type Switch struct { } // NewSwitch creates a new Switch with the given config. -func NewSwitch(cfg *config.P2PConfig, metrics *Metrics) *Switch { +func NewSwitch(cfg *config.P2PConfig, options ...func(*Switch)) *Switch { sw := &Switch{ config: cfg, reactors: make(map[string]Reactor), @@ -87,7 +87,7 @@ func NewSwitch(cfg *config.P2PConfig, metrics *Metrics) *Switch { peers: NewPeerSet(), dialing: cmn.NewCMap(), reconnecting: cmn.NewCMap(), - metrics: metrics, + metrics: NopMetrics(), } // Ensure we have a completely undeterministic PRNG. @@ -102,9 +102,21 @@ func NewSwitch(cfg *config.P2PConfig, metrics *Metrics) *Switch { sw.mConfig = mConfig sw.BaseService = *cmn.NewBaseService(nil, "P2P Switch", sw) + + for _, option := range options { + option(sw) + } + return sw } +// WithMetrics sets the metrics. +func WithMetrics(metrics *Metrics) func(*Switch) { + return func(sw *Switch) { + sw.metrics = metrics + } +} + //--------------------------------------------------------------------- // Switch setup diff --git a/p2p/test_util.go b/p2p/test_util.go index 02e4f644..0d2ba6c5 100644 --- a/p2p/test_util.go +++ b/p2p/test_util.go @@ -137,7 +137,7 @@ func MakeSwitch(cfg *config.P2PConfig, i int, network, version string, initSwitc nodeKey := &NodeKey{ PrivKey: crypto.GenPrivKeyEd25519(), } - sw := NewSwitch(cfg, NopMetrics()) + sw := NewSwitch(cfg) sw.SetLogger(log.TestingLogger()) sw = initSwitch(i, sw) ni := NodeInfo{ From 829342a82d8266861a0013943ac723a1f3df9933 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Tue, 19 Jun 2018 10:36:10 +0400 Subject: [PATCH 20/23] make StartPrometheusServer func private we don't really need to export it --- node/node.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/node/node.go b/node/node.go index 67f99247..768570a2 100644 --- a/node/node.go +++ b/node/node.go @@ -524,7 +524,7 @@ func (n *Node) OnStart() error { } if n.config.Instrumentation.Prometheus { - n.prometheusSrv = n.StartPrometheusServer(n.config.Instrumentation.PrometheusListenAddr) + n.prometheusSrv = n.startPrometheusServer(n.config.Instrumentation.PrometheusListenAddr) } // Start the switch (the P2P server). @@ -650,19 +650,19 @@ func (n *Node) startRPC() ([]net.Listener, error) { return listeners, nil } -// StartPrometheusServer starts a Prometheus HTTP server, listening for metrics +// startPrometheusServer starts a Prometheus HTTP server, listening for metrics // collectors on addr. -func (n *Node) StartPrometheusServer(addr string) *http.Server { +func (n *Node) startPrometheusServer(addr string) *http.Server { srv := &http.Server{ Addr: addr, Handler: promhttp.Handler(), } - go func(s *http.Server, logger log.Logger) { - if err := s.ListenAndServe(); err != http.ErrServerClosed { + go func() { + if err := srv.ListenAndServe(); err != http.ErrServerClosed { // Error starting or closing listener: - logger.Error("Prometheus HTTP server ListenAndServe", "err", err) + n.Logger.Error("Prometheus HTTP server ListenAndServe", "err", err) } - }(srv, n.Logger) + }() return srv } From e4bb3566a0101c978f8930086355b1edf64f619e Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Tue, 19 Jun 2018 11:27:58 +0400 Subject: [PATCH 21/23] move metrics constructors to a separate package --- consensus/metrics.go | 27 +++++--- mempool/metrics.go | 1 + metrics/prometheus/prometheus.go | 102 +++++++++++++++++++++++++++++++ node/node.go | 84 +------------------------ p2p/metrics.go | 2 +- 5 files changed, 126 insertions(+), 90 deletions(-) create mode 100644 metrics/prometheus/prometheus.go diff --git a/consensus/metrics.go b/consensus/metrics.go index 9a5f701f..7e1e2337 100644 --- a/consensus/metrics.go +++ b/consensus/metrics.go @@ -6,24 +6,35 @@ import ( ) // Metrics contains metrics exposed by this package. -// see MetricsProvider for descriptions. type Metrics struct { + // Height of the chain. Height metrics.Gauge + // Number of rounds. Rounds metrics.Gauge - Validators metrics.Gauge - ValidatorsPower metrics.Gauge - MissingValidators metrics.Gauge - MissingValidatorsPower metrics.Gauge - ByzantineValidators metrics.Gauge + // Number of validators. + Validators metrics.Gauge + // Total power of all validators. + ValidatorsPower metrics.Gauge + // Number of validators who did not sign. + MissingValidators metrics.Gauge + // Total power of the missing validators. + MissingValidatorsPower metrics.Gauge + // Number of validators who tried to double sign. + ByzantineValidators metrics.Gauge + // Total power of the byzantine validators. ByzantineValidatorsPower metrics.Gauge + // Time between this and the last block. BlockIntervalSeconds metrics.Histogram - NumTxs metrics.Gauge + // Number of transactions. + NumTxs metrics.Gauge + // Size of the block. BlockSizeBytes metrics.Gauge - TotalTxs metrics.Gauge + // Total number of transactions. + TotalTxs metrics.Gauge } // NopMetrics returns no-op Metrics. diff --git a/mempool/metrics.go b/mempool/metrics.go index 9c40a254..7c3cc967 100644 --- a/mempool/metrics.go +++ b/mempool/metrics.go @@ -8,6 +8,7 @@ import ( // Metrics contains metrics exposed by this package. // see MetricsProvider for descriptions. type Metrics struct { + // Size of the mempool. Size metrics.Gauge } diff --git a/metrics/prometheus/prometheus.go b/metrics/prometheus/prometheus.go new file mode 100644 index 00000000..7691efae --- /dev/null +++ b/metrics/prometheus/prometheus.go @@ -0,0 +1,102 @@ +package prometheus + +import ( + prometheus "github.com/go-kit/kit/metrics/prometheus" + stdprometheus "github.com/prometheus/client_golang/prometheus" + + cs "github.com/tendermint/tendermint/consensus" + mempl "github.com/tendermint/tendermint/mempool" + "github.com/tendermint/tendermint/p2p" +) + +// Consensus returns consensus Metrics build using Prometheus client library. +func Consensus() *cs.Metrics { + return &cs.Metrics{ + Height: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "height", + Help: "Height of the chain.", + }, []string{}), + Rounds: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "rounds", + Help: "Number of rounds.", + }, []string{}), + + Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "validators", + Help: "Number of validators.", + }, []string{}), + ValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "validators_power", + Help: "Total power of all validators.", + }, []string{}), + MissingValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "missing_validators", + Help: "Number of validators who did not sign.", + }, []string{}), + MissingValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "missing_validators_power", + Help: "Total power of the missing validators.", + }, []string{}), + ByzantineValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "byzantine_validators", + Help: "Number of validators who tried to double sign.", + }, []string{}), + ByzantineValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "byzantine_validators_power", + Help: "Total power of the byzantine validators.", + }, []string{}), + + BlockIntervalSeconds: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Subsystem: "consensus", + Name: "block_interval_seconds", + Help: "Time between this and the last block.", + Buckets: []float64{1, 2.5, 5, 10, 60}, + }, []string{}), + + NumTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "num_txs", + Help: "Number of transactions.", + }, []string{}), + BlockSizeBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "block_size_bytes", + Help: "Size of the block.", + }, []string{}), + TotalTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "total_txs", + Help: "Total number of transactions.", + }, []string{}), + } +} + +// P2P returns p2p Metrics build using Prometheus client library. +func P2P() *p2p.Metrics { + return &p2p.Metrics{ + Peers: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "p2p", + Name: "peers", + Help: "Number of peers.", + }, []string{}), + } +} + +// Mempool returns mempool Metrics build using Prometheus client library. +func Mempool() *mempl.Metrics { + return &mempl.Metrics{ + Size: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "mempool", + Name: "size", + Help: "Size of the mempool (number of uncommitted transactions).", + }, []string{}), + } +} diff --git a/node/node.go b/node/node.go index 768570a2..0f55683b 100644 --- a/node/node.go +++ b/node/node.go @@ -8,8 +8,6 @@ import ( "net" "net/http" - prometheus "github.com/go-kit/kit/metrics/prometheus" - stdprometheus "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" abci "github.com/tendermint/abci/types" @@ -24,6 +22,7 @@ import ( cs "github.com/tendermint/tendermint/consensus" "github.com/tendermint/tendermint/evidence" mempl "github.com/tendermint/tendermint/mempool" + prometrics "github.com/tendermint/tendermint/metrics/prometheus" "github.com/tendermint/tendermint/p2p" "github.com/tendermint/tendermint/p2p/pex" "github.com/tendermint/tendermint/privval" @@ -91,90 +90,13 @@ func DefaultNewNode(config *cfg.Config, logger log.Logger) (*Node, error) { ) } -// MetricsProvider returns a consensus and p2p Metrics. +// MetricsProvider returns a consensus, p2p and mempool Metrics. type MetricsProvider func() (*cs.Metrics, *p2p.Metrics, *mempl.Metrics) // DefaultMetricsProvider returns consensus, p2p and mempool Metrics build // using Prometheus client library. func DefaultMetricsProvider() (*cs.Metrics, *p2p.Metrics, *mempl.Metrics) { - return &cs.Metrics{ - Height: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "height", - Help: "Height of the chain.", - }, []string{}), - Rounds: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "rounds", - Help: "Number of rounds.", - }, []string{}), - - Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "validators", - Help: "Number of validators.", - }, []string{}), - ValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "validators_power", - Help: "Total power of all validators.", - }, []string{}), - MissingValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "missing_validators", - Help: "Number of validators who did not sign.", - }, []string{}), - MissingValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "missing_validators_power", - Help: "Total power of the missing validators.", - }, []string{}), - ByzantineValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "byzantine_validators", - Help: "Number of validators who tried to double sign.", - }, []string{}), - ByzantineValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "byzantine_validators_power", - Help: "Total power of the byzantine validators.", - }, []string{}), - - BlockIntervalSeconds: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ - Subsystem: "consensus", - Name: "block_interval_seconds", - Help: "Time between this and the last block.", - Buckets: []float64{1, 2.5, 5, 10, 60}, - }, []string{}), - - NumTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "num_txs", - Help: "Number of transactions.", - }, []string{}), - BlockSizeBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "block_size_bytes", - Help: "Size of the block.", - }, []string{}), - TotalTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "total_txs", - Help: "Total number of transactions.", - }, []string{}), - }, &p2p.Metrics{ - Peers: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "p2p", - Name: "peers", - Help: "Number of peers.", - }, []string{}), - }, &mempl.Metrics{ - Size: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "mempool", - Name: "size", - Help: "Size of the mempool (number of uncommitted transactions).", - }, []string{}), - } + return prometrics.Consensus(), prometrics.P2P(), prometrics.Mempool() } // NopMetricsProvider returns consensus, p2p and mempool Metrics as no-op. diff --git a/p2p/metrics.go b/p2p/metrics.go index 37efe443..2e3eff14 100644 --- a/p2p/metrics.go +++ b/p2p/metrics.go @@ -6,8 +6,8 @@ import ( ) // Metrics contains metrics exposed by this package. -// see MetricsProvider for descriptions. type Metrics struct { + // Number of peers. Peers metrics.Gauge } From 205d8b8062d3f17b396ff7741a20e9c9e70f4fb8 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Wed, 20 Jun 2018 10:39:19 +0400 Subject: [PATCH 22/23] fixes after @xla review - move prometheus metrics into internal packages - *Option structs - misc. format changes --- consensus/metrics.go | 73 ++++++++++++++++++++++ consensus/state.go | 24 +++++--- mempool/mempool.go | 16 +++-- mempool/metrics.go | 14 +++++ metrics/prometheus/prometheus.go | 102 ------------------------------- node/node.go | 22 +++++-- p2p/metrics.go | 14 +++++ p2p/switch.go | 11 ++-- 8 files changed, 151 insertions(+), 125 deletions(-) delete mode 100644 metrics/prometheus/prometheus.go diff --git a/consensus/metrics.go b/consensus/metrics.go index 7e1e2337..253880e8 100644 --- a/consensus/metrics.go +++ b/consensus/metrics.go @@ -3,6 +3,9 @@ package consensus import ( "github.com/go-kit/kit/metrics" "github.com/go-kit/kit/metrics/discard" + + prometheus "github.com/go-kit/kit/metrics/prometheus" + stdprometheus "github.com/prometheus/client_golang/prometheus" ) // Metrics contains metrics exposed by this package. @@ -37,6 +40,76 @@ type Metrics struct { TotalTxs metrics.Gauge } +// PrometheusMetrics returns Metrics build using Prometheus client library. +func PrometheusMetrics() *Metrics { + return &Metrics{ + Height: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "height", + Help: "Height of the chain.", + }, []string{}), + Rounds: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "rounds", + Help: "Number of rounds.", + }, []string{}), + + Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "validators", + Help: "Number of validators.", + }, []string{}), + ValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "validators_power", + Help: "Total power of all validators.", + }, []string{}), + MissingValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "missing_validators", + Help: "Number of validators who did not sign.", + }, []string{}), + MissingValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "missing_validators_power", + Help: "Total power of the missing validators.", + }, []string{}), + ByzantineValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "byzantine_validators", + Help: "Number of validators who tried to double sign.", + }, []string{}), + ByzantineValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "byzantine_validators_power", + Help: "Total power of the byzantine validators.", + }, []string{}), + + BlockIntervalSeconds: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + Subsystem: "consensus", + Name: "block_interval_seconds", + Help: "Time between this and the last block.", + Buckets: []float64{1, 2.5, 5, 10, 60}, + }, []string{}), + + NumTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "num_txs", + Help: "Number of transactions.", + }, []string{}), + BlockSizeBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "block_size_bytes", + Help: "Size of the block.", + }, []string{}), + TotalTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "consensus", + Name: "total_txs", + Help: "Total number of transactions.", + }, []string{}), + } +} + // NopMetrics returns no-op Metrics. func NopMetrics() *Metrics { return &Metrics{ diff --git a/consensus/state.go b/consensus/state.go index 686d2e7d..aab82296 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -120,8 +120,19 @@ type ConsensusState struct { metrics *Metrics } +// CSOption sets an optional parameter on the ConsensusState. +type CSOption func(*ConsensusState) + // NewConsensusState returns a new ConsensusState. -func NewConsensusState(config *cfg.ConsensusConfig, state sm.State, blockExec *sm.BlockExecutor, blockStore sm.BlockStore, mempool sm.Mempool, evpool sm.EvidencePool, options ...func(*ConsensusState)) *ConsensusState { +func NewConsensusState( + config *cfg.ConsensusConfig, + state sm.State, + blockExec *sm.BlockExecutor, + blockStore sm.BlockStore, + mempool sm.Mempool, + evpool sm.EvidencePool, + options ...CSOption, +) *ConsensusState { cs := &ConsensusState{ config: config, blockExec: blockExec, @@ -169,10 +180,8 @@ func (cs *ConsensusState) SetEventBus(b *types.EventBus) { } // WithMetrics sets the metrics. -func WithMetrics(metrics *Metrics) func(*ConsensusState) { - return func(cs *ConsensusState) { - cs.metrics = metrics - } +func WithMetrics(metrics *Metrics) CSOption { + return func(cs *ConsensusState) { cs.metrics = metrics } } // String returns a string. @@ -1338,8 +1347,9 @@ func (cs *ConsensusState) recordMetrics(height int64, block *types.Block) { if height > 1 { lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1) - cs.metrics.BlockIntervalSeconds. - Observe(block.Time.Sub(lastBlockMeta.Header.Time).Seconds()) + cs.metrics.BlockIntervalSeconds.Observe( + block.Time.Sub(lastBlockMeta.Header.Time).Seconds(), + ) } cs.metrics.NumTxs.Set(float64(block.NumTxs)) diff --git a/mempool/mempool.go b/mempool/mempool.go index 1df1651e..418470a7 100644 --- a/mempool/mempool.go +++ b/mempool/mempool.go @@ -87,8 +87,16 @@ type Mempool struct { metrics *Metrics } +// MempoolOption sets an optional parameter on the Mempool. +type MempoolOption func(*Mempool) + // NewMempool returns a new Mempool with the given configuration and connection to an application. -func NewMempool(config *cfg.MempoolConfig, proxyAppConn proxy.AppConnMempool, height int64, options ...func(*Mempool)) *Mempool { +func NewMempool( + config *cfg.MempoolConfig, + proxyAppConn proxy.AppConnMempool, + height int64, + options ...MempoolOption, +) *Mempool { mempool := &Mempool{ config: config, proxyAppConn: proxyAppConn, @@ -122,10 +130,8 @@ func (mem *Mempool) SetLogger(l log.Logger) { } // WithMetrics sets the metrics. -func WithMetrics(metrics *Metrics) func(*Mempool) { - return func(mem *Mempool) { - mem.metrics = metrics - } +func WithMetrics(metrics *Metrics) MempoolOption { + return func(mem *Mempool) { mem.metrics = metrics } } // CloseWAL closes and discards the underlying WAL file. diff --git a/mempool/metrics.go b/mempool/metrics.go index 7c3cc967..f381678c 100644 --- a/mempool/metrics.go +++ b/mempool/metrics.go @@ -3,6 +3,9 @@ package mempool import ( "github.com/go-kit/kit/metrics" "github.com/go-kit/kit/metrics/discard" + + prometheus "github.com/go-kit/kit/metrics/prometheus" + stdprometheus "github.com/prometheus/client_golang/prometheus" ) // Metrics contains metrics exposed by this package. @@ -12,6 +15,17 @@ type Metrics struct { Size metrics.Gauge } +// PrometheusMetrics returns Metrics build using Prometheus client library. +func PrometheusMetrics() *Metrics { + return &Metrics{ + Size: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "mempool", + Name: "size", + Help: "Size of the mempool (number of uncommitted transactions).", + }, []string{}), + } +} + // NopMetrics returns no-op Metrics. func NopMetrics() *Metrics { return &Metrics{ diff --git a/metrics/prometheus/prometheus.go b/metrics/prometheus/prometheus.go deleted file mode 100644 index 7691efae..00000000 --- a/metrics/prometheus/prometheus.go +++ /dev/null @@ -1,102 +0,0 @@ -package prometheus - -import ( - prometheus "github.com/go-kit/kit/metrics/prometheus" - stdprometheus "github.com/prometheus/client_golang/prometheus" - - cs "github.com/tendermint/tendermint/consensus" - mempl "github.com/tendermint/tendermint/mempool" - "github.com/tendermint/tendermint/p2p" -) - -// Consensus returns consensus Metrics build using Prometheus client library. -func Consensus() *cs.Metrics { - return &cs.Metrics{ - Height: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "height", - Help: "Height of the chain.", - }, []string{}), - Rounds: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "rounds", - Help: "Number of rounds.", - }, []string{}), - - Validators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "validators", - Help: "Number of validators.", - }, []string{}), - ValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "validators_power", - Help: "Total power of all validators.", - }, []string{}), - MissingValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "missing_validators", - Help: "Number of validators who did not sign.", - }, []string{}), - MissingValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "missing_validators_power", - Help: "Total power of the missing validators.", - }, []string{}), - ByzantineValidators: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "byzantine_validators", - Help: "Number of validators who tried to double sign.", - }, []string{}), - ByzantineValidatorsPower: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "byzantine_validators_power", - Help: "Total power of the byzantine validators.", - }, []string{}), - - BlockIntervalSeconds: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ - Subsystem: "consensus", - Name: "block_interval_seconds", - Help: "Time between this and the last block.", - Buckets: []float64{1, 2.5, 5, 10, 60}, - }, []string{}), - - NumTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "num_txs", - Help: "Number of transactions.", - }, []string{}), - BlockSizeBytes: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "block_size_bytes", - Help: "Size of the block.", - }, []string{}), - TotalTxs: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "consensus", - Name: "total_txs", - Help: "Total number of transactions.", - }, []string{}), - } -} - -// P2P returns p2p Metrics build using Prometheus client library. -func P2P() *p2p.Metrics { - return &p2p.Metrics{ - Peers: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "p2p", - Name: "peers", - Help: "Number of peers.", - }, []string{}), - } -} - -// Mempool returns mempool Metrics build using Prometheus client library. -func Mempool() *mempl.Metrics { - return &mempl.Metrics{ - Size: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ - Subsystem: "mempool", - Name: "size", - Help: "Size of the mempool (number of uncommitted transactions).", - }, []string{}), - } -} diff --git a/node/node.go b/node/node.go index 0f55683b..dc79cff7 100644 --- a/node/node.go +++ b/node/node.go @@ -22,7 +22,6 @@ import ( cs "github.com/tendermint/tendermint/consensus" "github.com/tendermint/tendermint/evidence" mempl "github.com/tendermint/tendermint/mempool" - prometrics "github.com/tendermint/tendermint/metrics/prometheus" "github.com/tendermint/tendermint/p2p" "github.com/tendermint/tendermint/p2p/pex" "github.com/tendermint/tendermint/privval" @@ -96,7 +95,7 @@ type MetricsProvider func() (*cs.Metrics, *p2p.Metrics, *mempl.Metrics) // DefaultMetricsProvider returns consensus, p2p and mempool Metrics build // using Prometheus client library. func DefaultMetricsProvider() (*cs.Metrics, *p2p.Metrics, *mempl.Metrics) { - return prometrics.Consensus(), prometrics.P2P(), prometrics.Mempool() + return cs.PrometheusMetrics(), p2p.PrometheusMetrics(), mempl.PrometheusMetrics() } // NopMetricsProvider returns consensus, p2p and mempool Metrics as no-op. @@ -243,8 +242,12 @@ func NewNode(config *cfg.Config, // Make MempoolReactor mempoolLogger := logger.With("module", "mempool") - mempool := mempl.NewMempool(config.Mempool, proxyApp.Mempool(), state.LastBlockHeight, - mempl.WithMetrics(memplMetrics)) + mempool := mempl.NewMempool( + config.Mempool, + proxyApp.Mempool(), + state.LastBlockHeight, + mempl.WithMetrics(memplMetrics), + ) mempool.SetLogger(mempoolLogger) mempool.InitWAL() // no need to have the mempool wal during tests mempoolReactor := mempl.NewMempoolReactor(config.Mempool, mempool) @@ -275,8 +278,15 @@ func NewNode(config *cfg.Config, bcReactor.SetLogger(logger.With("module", "blockchain")) // Make ConsensusReactor - consensusState := cs.NewConsensusState(config.Consensus, state.Copy(), - blockExec, blockStore, mempool, evidencePool, cs.WithMetrics(csMetrics)) + consensusState := cs.NewConsensusState( + config.Consensus, + state.Copy(), + blockExec, + blockStore, + mempool, + evidencePool, + cs.WithMetrics(csMetrics), + ) consensusState.SetLogger(consensusLogger) if privValidator != nil { consensusState.SetPrivValidator(privValidator) diff --git a/p2p/metrics.go b/p2p/metrics.go index 2e3eff14..ab876ee7 100644 --- a/p2p/metrics.go +++ b/p2p/metrics.go @@ -3,6 +3,9 @@ package p2p import ( "github.com/go-kit/kit/metrics" "github.com/go-kit/kit/metrics/discard" + + prometheus "github.com/go-kit/kit/metrics/prometheus" + stdprometheus "github.com/prometheus/client_golang/prometheus" ) // Metrics contains metrics exposed by this package. @@ -11,6 +14,17 @@ type Metrics struct { Peers metrics.Gauge } +// PrometheusMetrics returns Metrics build using Prometheus client library. +func PrometheusMetrics() *Metrics { + return &Metrics{ + Peers: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Subsystem: "p2p", + Name: "peers", + Help: "Number of peers.", + }, []string{}), + } +} + // NopMetrics returns no-op Metrics. func NopMetrics() *Metrics { return &Metrics{ diff --git a/p2p/switch.go b/p2p/switch.go index ed7a8098..bf5f9747 100644 --- a/p2p/switch.go +++ b/p2p/switch.go @@ -77,8 +77,11 @@ type Switch struct { metrics *Metrics } +// SwitchOption sets an optional parameter on the Switch. +type SwitchOption func(*Switch) + // NewSwitch creates a new Switch with the given config. -func NewSwitch(cfg *config.P2PConfig, options ...func(*Switch)) *Switch { +func NewSwitch(cfg *config.P2PConfig, options ...SwitchOption) *Switch { sw := &Switch{ config: cfg, reactors: make(map[string]Reactor), @@ -111,10 +114,8 @@ func NewSwitch(cfg *config.P2PConfig, options ...func(*Switch)) *Switch { } // WithMetrics sets the metrics. -func WithMetrics(metrics *Metrics) func(*Switch) { - return func(sw *Switch) { - sw.metrics = metrics - } +func WithMetrics(metrics *Metrics) SwitchOption { + return func(sw *Switch) { sw.metrics = metrics } } //--------------------------------------------------------------------- From 8c1ca9d64a0886a04915f2e12d51965e659b6705 Mon Sep 17 00:00:00 2001 From: Anton Kaliaev Date: Wed, 20 Jun 2018 12:42:43 +0400 Subject: [PATCH 23/23] updates to docs/specification/configuration.md after rebase --- config/config.go | 4 ++-- docs/specification/configuration.md | 21 ++++++++++++++++----- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/config/config.go b/config/config.go index 1a0dfaf3..6a283a82 100644 --- a/config/config.go +++ b/config/config.go @@ -545,8 +545,8 @@ type TxIndexConfig struct { // What indexer to use for transactions // // Options: - // 1) "null" (default) - // 2) "kv" - the simplest possible indexer, backed by key-value storage (defaults to levelDB; see DBBackend). + // 1) "null" + // 2) "kv" (default) - the simplest possible indexer, backed by key-value storage (defaults to levelDB; see DBBackend). Indexer string `mapstructure:"indexer"` // Comma-separated list of tags to index (by default the only tag is tx hash) diff --git a/docs/specification/configuration.md b/docs/specification/configuration.md index d39aa3cb..08981c06 100644 --- a/docs/specification/configuration.md +++ b/docs/specification/configuration.md @@ -171,19 +171,30 @@ peer_query_maj23_sleep_duration = 2000 # What indexer to use for transactions # # Options: -# 1) "null" (default) -# 2) "kv" - the simplest possible indexer, backed by key-value storage (defaults to levelDB; see DBBackend). -indexer = "{{ .TxIndex.Indexer }}" +# 1) "null" +# 2) "kv" (default) - the simplest possible indexer, backed by key-value storage (defaults to levelDB; see DBBackend). +indexer = "kv" # Comma-separated list of tags to index (by default the only tag is tx hash) # # It's recommended to index only a subset of tags due to possible memory # bloat. This is, of course, depends on the indexer's DB and the volume of # transactions. -index_tags = "{{ .TxIndex.IndexTags }}" +index_tags = "" # When set to true, tells indexer to index all tags. Note this may be not # desirable (see the comment above). IndexTags has a precedence over # IndexAllTags (i.e. when given both, IndexTags will be indexed). -index_all_tags = {{ .TxIndex.IndexAllTags }} +index_all_tags = false + +##### instrumentation configuration options ##### +[instrumentation] + +# When true, Prometheus metrics are served under /metrics on +# PrometheusListenAddr. +# Check out the documentation for the list of available metrics. +prometheus = false + +# Address to listen for Prometheus collector(s) connections +prometheus_listen_addr = ":26660" ```