2016-01-15 16:55:24 -05:00
|
|
|
package types
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
2016-02-27 00:31:28 -05:00
|
|
|
"os/exec"
|
2016-01-15 16:55:24 -05:00
|
|
|
"sync"
|
2016-02-02 13:10:43 -05:00
|
|
|
"time"
|
2016-01-15 16:55:24 -05:00
|
|
|
|
2016-02-02 13:10:43 -05:00
|
|
|
"github.com/rcrowley/go-metrics"
|
|
|
|
. "github.com/tendermint/go-common"
|
|
|
|
tmtypes "github.com/tendermint/tendermint/types"
|
2016-01-15 16:55:24 -05:00
|
|
|
)
|
|
|
|
|
2016-02-02 13:10:43 -05:00
|
|
|
// waitign more than this many seconds for a block means we're unhealthy
|
|
|
|
const newBlockTimeoutSeconds = 5
|
|
|
|
|
2016-01-15 16:55:24 -05:00
|
|
|
//------------------------------------------------
|
|
|
|
// blockchain types
|
2016-02-07 12:11:12 -08:00
|
|
|
// NOTE: mintnet duplicates some types from here and val.go
|
2016-01-27 00:27:24 -05:00
|
|
|
//------------------------------------------------
|
2016-01-15 16:55:24 -05:00
|
|
|
|
2016-01-15 23:31:57 -05:00
|
|
|
// Known chain and validator set IDs (from which anything else can be found)
|
2016-01-27 00:27:24 -05:00
|
|
|
// Returned by the Status RPC
|
2016-01-15 23:31:57 -05:00
|
|
|
type ChainAndValidatorSetIDs struct {
|
|
|
|
ChainIDs []string `json:"chain_ids"`
|
|
|
|
ValidatorSetIDs []string `json:"validator_set_ids"`
|
|
|
|
}
|
|
|
|
|
2016-01-27 00:27:24 -05:00
|
|
|
//------------------------------------------------
|
|
|
|
// chain state
|
2016-01-15 23:31:57 -05:00
|
|
|
|
|
|
|
// Main chain state
|
2016-01-27 00:27:24 -05:00
|
|
|
// Returned over RPC; also used to manage state
|
2016-01-15 16:55:24 -05:00
|
|
|
type ChainState struct {
|
|
|
|
Config *BlockchainConfig `json:"config"`
|
|
|
|
Status *BlockchainStatus `json:"status"`
|
|
|
|
}
|
|
|
|
|
2016-01-27 00:27:24 -05:00
|
|
|
func (cs *ChainState) NewBlock(block *tmtypes.Block) {
|
|
|
|
cs.Status.NewBlock(block)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cs *ChainState) UpdateLatency(oldLatency, newLatency float64) {
|
|
|
|
cs.Status.UpdateLatency(oldLatency, newLatency)
|
2016-01-21 22:57:24 -05:00
|
|
|
}
|
|
|
|
|
2016-02-02 13:10:43 -05:00
|
|
|
func (cs *ChainState) SetOnline(val *ValidatorState, isOnline bool) {
|
|
|
|
cs.Status.SetOnline(val, isOnline)
|
|
|
|
}
|
|
|
|
|
2016-01-27 00:27:24 -05:00
|
|
|
//------------------------------------------------
|
|
|
|
// Blockchain Config: id, validator config
|
|
|
|
|
|
|
|
// Chain Config
|
2016-01-15 16:55:24 -05:00
|
|
|
type BlockchainConfig struct {
|
2016-01-27 00:27:24 -05:00
|
|
|
// should be fixed for life of chain
|
2016-01-15 23:31:57 -05:00
|
|
|
ID string `json:"id"`
|
2016-01-27 00:27:24 -05:00
|
|
|
ValSetID string `json:"val_set_id"` // NOTE: do we really commit to one val set per chain?
|
2016-01-15 16:55:24 -05:00
|
|
|
|
2016-01-27 00:27:24 -05:00
|
|
|
// handles live validator states (latency, last block, etc)
|
|
|
|
// and validator set changes
|
2016-01-15 23:31:57 -05:00
|
|
|
mtx sync.Mutex
|
2016-01-21 22:57:24 -05:00
|
|
|
Validators []*ValidatorState `json:"validators"` // TODO: this should be ValidatorConfig and the state in BlockchainStatus
|
2016-01-15 16:55:24 -05:00
|
|
|
valIDMap map[string]int // map IDs to indices
|
|
|
|
}
|
|
|
|
|
2016-01-27 00:27:24 -05:00
|
|
|
// So we can fetch validator by id rather than index
|
2016-01-15 16:55:24 -05:00
|
|
|
func (bc *BlockchainConfig) PopulateValIDMap() {
|
|
|
|
bc.mtx.Lock()
|
|
|
|
defer bc.mtx.Unlock()
|
|
|
|
bc.valIDMap = make(map[string]int)
|
|
|
|
for i, v := range bc.Validators {
|
2016-01-21 22:57:24 -05:00
|
|
|
bc.valIDMap[v.Config.Validator.ID] = i
|
2016-01-15 16:55:24 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-21 22:57:24 -05:00
|
|
|
func (bc *BlockchainConfig) GetValidatorByID(valID string) (*ValidatorState, error) {
|
2016-01-15 16:55:24 -05:00
|
|
|
bc.mtx.Lock()
|
|
|
|
defer bc.mtx.Unlock()
|
|
|
|
valIndex, ok := bc.valIDMap[valID]
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("Unknown validator %s", valID)
|
|
|
|
}
|
|
|
|
return bc.Validators[valIndex], nil
|
|
|
|
}
|
|
|
|
|
2016-01-27 00:27:24 -05:00
|
|
|
//------------------------------------------------
|
|
|
|
// BlockchainStatus
|
|
|
|
|
|
|
|
// Basic blockchain metrics
|
|
|
|
type BlockchainStatus struct {
|
|
|
|
mtx sync.Mutex
|
|
|
|
|
|
|
|
// Blockchain Info
|
2016-02-02 13:10:43 -05:00
|
|
|
Height int `json:"height"` // latest height we've got
|
2016-01-27 00:27:24 -05:00
|
|
|
BlockchainSize int64 `json:"blockchain_size"`
|
2016-02-02 13:10:43 -05:00
|
|
|
MeanBlockTime float64 `json:"mean_block_time" wire:"unsafe"` // ms (avg over last minute)
|
|
|
|
TxThroughput float64 `json:"tx_throughput" wire:"unsafe"` // tx/s (avg over last minute)
|
2016-01-27 00:27:24 -05:00
|
|
|
|
|
|
|
blockTimeMeter metrics.Meter
|
|
|
|
txThroughputMeter metrics.Meter
|
|
|
|
|
|
|
|
// Network Info
|
2016-02-02 13:10:43 -05:00
|
|
|
NumValidators int `json:"num_validators"`
|
|
|
|
ActiveValidators int `json:"active_validators"`
|
|
|
|
//ActiveNodes int `json:"active_nodes"`
|
|
|
|
MeanLatency float64 `json:"mean_latency" wire:"unsafe"` // ms
|
|
|
|
|
|
|
|
// Health
|
|
|
|
FullHealth bool `json:"full_health"` // all validators online, synced, making blocks
|
|
|
|
Healthy bool `json:"healthy"` // we're making blocks
|
|
|
|
|
|
|
|
// Uptime
|
|
|
|
UptimeData *UptimeData `json:"uptime_data"`
|
2016-01-27 00:27:24 -05:00
|
|
|
|
|
|
|
// What else can we get / do we want?
|
|
|
|
// TODO: charts for block time, latency (websockets/event-meter ?)
|
2016-02-27 00:31:28 -05:00
|
|
|
|
|
|
|
// for benchmark runs
|
|
|
|
benchResults *BenchmarkResults
|
|
|
|
}
|
|
|
|
|
|
|
|
func (bc *BlockchainStatus) Benchmark(done chan *BenchmarkResults, nTxs int, args []string) {
|
|
|
|
bc.benchResults = &BenchmarkResults{
|
|
|
|
StartTime: time.Now(),
|
|
|
|
NumTxs: nTxs,
|
|
|
|
done: done,
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: capture output to file
|
|
|
|
cmd := exec.Command(args[0], args[1:]...)
|
|
|
|
go cmd.Run()
|
|
|
|
}
|
|
|
|
|
|
|
|
type Block struct {
|
|
|
|
Time time.Time `json:time"`
|
|
|
|
Height int `json:"height"`
|
|
|
|
NumTxs int `json:"num_txs"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type BenchmarkResults struct {
|
|
|
|
StartTime time.Time `json:"start_time"`
|
|
|
|
TotalTime float64 `json:"total_time"` // seconds
|
|
|
|
NumBlocks int `json:"num_blocks"`
|
|
|
|
NumTxs int `json:"num_txs`
|
|
|
|
Blocks []*Block `json:"blocks"`
|
|
|
|
MeanLatency float64 `json:"latency"` // seconds per block
|
|
|
|
MeanThroughput float64 `json:"throughput"` // txs per second
|
|
|
|
|
|
|
|
done chan *BenchmarkResults
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the total time to commit all txs, in seconds
|
|
|
|
func (br *BenchmarkResults) ElapsedTime() float64 {
|
|
|
|
return float64(br.Blocks[br.NumBlocks].Time.Sub(br.StartTime)) / float64(1000000000)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the avg seconds/block
|
|
|
|
func (br *BenchmarkResults) Latency() float64 {
|
|
|
|
return br.ElapsedTime() / float64(br.NumBlocks)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the avg txs/second
|
|
|
|
func (br *BenchmarkResults) Throughput() float64 {
|
|
|
|
return float64(br.NumTxs) / br.ElapsedTime()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (br *BenchmarkResults) Done() {
|
|
|
|
br.TotalTime = br.ElapsedTime()
|
|
|
|
br.MeanThroughput = br.Throughput()
|
|
|
|
br.MeanLatency = br.Latency()
|
|
|
|
br.done <- br
|
2016-01-27 00:27:24 -05:00
|
|
|
}
|
2016-01-15 16:55:24 -05:00
|
|
|
|
2016-02-02 13:10:43 -05:00
|
|
|
type UptimeData struct {
|
|
|
|
StartTime time.Time `json:"start_time"`
|
|
|
|
Uptime float64 `json:"uptime" wire:"unsafe"` // Percentage of time we've been Healthy, ever
|
|
|
|
|
|
|
|
totalDownTime time.Duration // total downtime (only updated when we come back online)
|
|
|
|
wentDown time.Time
|
|
|
|
|
|
|
|
// TODO: uptime over last day, month, year
|
|
|
|
}
|
|
|
|
|
2016-01-27 00:27:24 -05:00
|
|
|
func NewBlockchainStatus() *BlockchainStatus {
|
|
|
|
return &BlockchainStatus{
|
|
|
|
blockTimeMeter: metrics.NewMeter(),
|
|
|
|
txThroughputMeter: metrics.NewMeter(),
|
2016-02-02 13:10:43 -05:00
|
|
|
Healthy: true,
|
|
|
|
UptimeData: &UptimeData{
|
|
|
|
StartTime: time.Now(),
|
|
|
|
Uptime: 100.0,
|
|
|
|
},
|
2016-01-15 16:55:24 -05:00
|
|
|
}
|
2016-01-27 00:27:24 -05:00
|
|
|
}
|
2016-01-15 16:55:24 -05:00
|
|
|
|
2016-01-27 00:27:24 -05:00
|
|
|
func (s *BlockchainStatus) NewBlock(block *tmtypes.Block) {
|
|
|
|
s.mtx.Lock()
|
|
|
|
defer s.mtx.Unlock()
|
|
|
|
if block.Header.Height > s.Height {
|
|
|
|
s.Height = block.Header.Height
|
|
|
|
s.blockTimeMeter.Mark(1)
|
|
|
|
s.txThroughputMeter.Mark(int64(block.Header.NumTxs))
|
2016-02-02 13:10:43 -05:00
|
|
|
s.MeanBlockTime = (1 / s.blockTimeMeter.Rate1()) * 1000 // 1/s to ms
|
|
|
|
s.TxThroughput = s.txThroughputMeter.Rate1()
|
|
|
|
|
2016-02-27 00:31:28 -05:00
|
|
|
if s.benchResults != nil {
|
|
|
|
s.benchResults.Blocks = append(s.benchResults.Blocks, &Block{
|
|
|
|
Time: time.Now(),
|
|
|
|
Height: s.Height,
|
|
|
|
NumTxs: block.Header.NumTxs,
|
|
|
|
})
|
|
|
|
if s.txThroughputMeter.Count() >= int64(s.benchResults.NumTxs) {
|
|
|
|
// XXX: do we need to be more careful than just counting?!
|
|
|
|
s.benchResults.Done()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-02-02 13:10:43 -05:00
|
|
|
// if we're making blocks, we're healthy
|
|
|
|
if !s.Healthy {
|
|
|
|
s.Healthy = true
|
|
|
|
s.UptimeData.totalDownTime += time.Since(s.UptimeData.wentDown)
|
|
|
|
}
|
|
|
|
|
|
|
|
// if we are connected to all validators, we're at full health
|
|
|
|
// TODO: make sure they're all at the same height (within a block) and all proposing (and possibly validating )
|
|
|
|
// Alternatively, just check there hasn't been a new round in numValidators rounds
|
|
|
|
if s.ActiveValidators == s.NumValidators {
|
|
|
|
s.FullHealth = true
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: should we refactor so there's a central loop and ticker?
|
|
|
|
go s.newBlockTimeout(s.Height)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// we have newBlockTimeoutSeconds to make a new block, else we're unhealthy
|
|
|
|
func (s *BlockchainStatus) newBlockTimeout(height int) {
|
|
|
|
time.Sleep(time.Second * newBlockTimeoutSeconds)
|
|
|
|
|
|
|
|
s.mtx.Lock()
|
|
|
|
defer s.mtx.Unlock()
|
|
|
|
if !(s.Height > height) {
|
|
|
|
s.Healthy = false
|
|
|
|
s.UptimeData.wentDown = time.Now()
|
2016-01-15 16:55:24 -05:00
|
|
|
}
|
2016-01-27 00:27:24 -05:00
|
|
|
}
|
|
|
|
|
2016-02-02 13:10:43 -05:00
|
|
|
// Used to calculate uptime on demand. TODO: refactor this into the central loop ...
|
|
|
|
func (s *BlockchainStatus) RealTimeUpdates() {
|
|
|
|
s.mtx.Lock()
|
|
|
|
defer s.mtx.Unlock()
|
|
|
|
since := time.Since(s.UptimeData.StartTime)
|
|
|
|
uptime := since - s.UptimeData.totalDownTime
|
|
|
|
if !s.Healthy {
|
|
|
|
uptime -= time.Since(s.UptimeData.wentDown)
|
|
|
|
}
|
|
|
|
s.UptimeData.Uptime = float64(uptime) / float64(since)
|
|
|
|
}
|
|
|
|
|
2016-01-27 00:27:24 -05:00
|
|
|
func (s *BlockchainStatus) UpdateLatency(oldLatency, newLatency float64) {
|
|
|
|
s.mtx.Lock()
|
|
|
|
defer s.mtx.Unlock()
|
|
|
|
|
2016-02-02 13:10:43 -05:00
|
|
|
// update avg validator rpc latency
|
2016-01-27 00:27:24 -05:00
|
|
|
mean := s.MeanLatency * float64(s.NumValidators)
|
|
|
|
mean = (mean - oldLatency + newLatency) / float64(s.NumValidators)
|
|
|
|
s.MeanLatency = mean
|
2016-02-02 13:10:43 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
// Toggle validators online/offline (updates ActiveValidators and FullHealth)
|
|
|
|
func (s *BlockchainStatus) SetOnline(val *ValidatorState, isOnline bool) {
|
|
|
|
val.SetOnline(isOnline)
|
|
|
|
|
|
|
|
var change int
|
|
|
|
if isOnline {
|
|
|
|
change = 1
|
|
|
|
} else {
|
|
|
|
change = -1
|
|
|
|
}
|
|
|
|
|
|
|
|
s.mtx.Lock()
|
|
|
|
defer s.mtx.Unlock()
|
|
|
|
|
|
|
|
s.ActiveValidators += change
|
|
|
|
|
|
|
|
if s.ActiveValidators > s.NumValidators {
|
|
|
|
panic(Fmt("got %d validators. max %ds", s.ActiveValidators, s.NumValidators))
|
|
|
|
}
|
|
|
|
|
|
|
|
// if we lost a connection we're no longer at full health, even if it's still online.
|
|
|
|
// so long as we receive blocks, we'll know we're still healthy
|
|
|
|
if s.ActiveValidators != s.NumValidators {
|
|
|
|
s.FullHealth = false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TwoThirdsMaj(count, total int) bool {
|
|
|
|
return float64(count) > (2.0/3.0)*float64(total)
|
2016-01-15 16:55:24 -05:00
|
|
|
}
|