tendermint/types/chain.go

301 lines
8.5 KiB
Go
Raw Normal View History

2016-01-15 16:55:24 -05:00
package types
import (
"fmt"
2016-02-27 00:31:28 -05:00
"os/exec"
2016-01-15 16:55:24 -05:00
"sync"
2016-02-02 13:10:43 -05:00
"time"
2016-01-15 16:55:24 -05:00
2016-02-02 13:10:43 -05:00
"github.com/rcrowley/go-metrics"
. "github.com/tendermint/go-common"
tmtypes "github.com/tendermint/tendermint/types"
2016-01-15 16:55:24 -05:00
)
2016-02-02 13:10:43 -05:00
// waitign more than this many seconds for a block means we're unhealthy
const newBlockTimeoutSeconds = 5
2016-01-15 16:55:24 -05:00
//------------------------------------------------
// blockchain types
2016-02-07 12:11:12 -08:00
// NOTE: mintnet duplicates some types from here and val.go
2016-01-27 00:27:24 -05:00
//------------------------------------------------
2016-01-15 16:55:24 -05:00
2016-01-15 23:31:57 -05:00
// Known chain and validator set IDs (from which anything else can be found)
2016-01-27 00:27:24 -05:00
// Returned by the Status RPC
2016-01-15 23:31:57 -05:00
type ChainAndValidatorSetIDs struct {
ChainIDs []string `json:"chain_ids"`
ValidatorSetIDs []string `json:"validator_set_ids"`
}
2016-01-27 00:27:24 -05:00
//------------------------------------------------
// chain state
2016-01-15 23:31:57 -05:00
// Main chain state
2016-01-27 00:27:24 -05:00
// Returned over RPC; also used to manage state
2016-01-15 16:55:24 -05:00
type ChainState struct {
Config *BlockchainConfig `json:"config"`
Status *BlockchainStatus `json:"status"`
}
2016-01-27 00:27:24 -05:00
func (cs *ChainState) NewBlock(block *tmtypes.Block) {
cs.Status.NewBlock(block)
}
func (cs *ChainState) UpdateLatency(oldLatency, newLatency float64) {
cs.Status.UpdateLatency(oldLatency, newLatency)
}
2016-02-02 13:10:43 -05:00
func (cs *ChainState) SetOnline(val *ValidatorState, isOnline bool) {
cs.Status.SetOnline(val, isOnline)
}
2016-01-27 00:27:24 -05:00
//------------------------------------------------
// Blockchain Config: id, validator config
// Chain Config
2016-01-15 16:55:24 -05:00
type BlockchainConfig struct {
2016-01-27 00:27:24 -05:00
// should be fixed for life of chain
2016-01-15 23:31:57 -05:00
ID string `json:"id"`
2016-01-27 00:27:24 -05:00
ValSetID string `json:"val_set_id"` // NOTE: do we really commit to one val set per chain?
2016-01-15 16:55:24 -05:00
2016-01-27 00:27:24 -05:00
// handles live validator states (latency, last block, etc)
// and validator set changes
2016-01-15 23:31:57 -05:00
mtx sync.Mutex
Validators []*ValidatorState `json:"validators"` // TODO: this should be ValidatorConfig and the state in BlockchainStatus
2016-01-15 16:55:24 -05:00
valIDMap map[string]int // map IDs to indices
}
2016-01-27 00:27:24 -05:00
// So we can fetch validator by id rather than index
2016-01-15 16:55:24 -05:00
func (bc *BlockchainConfig) PopulateValIDMap() {
bc.mtx.Lock()
defer bc.mtx.Unlock()
bc.valIDMap = make(map[string]int)
for i, v := range bc.Validators {
bc.valIDMap[v.Config.Validator.ID] = i
2016-01-15 16:55:24 -05:00
}
}
func (bc *BlockchainConfig) GetValidatorByID(valID string) (*ValidatorState, error) {
2016-01-15 16:55:24 -05:00
bc.mtx.Lock()
defer bc.mtx.Unlock()
valIndex, ok := bc.valIDMap[valID]
if !ok {
return nil, fmt.Errorf("Unknown validator %s", valID)
}
return bc.Validators[valIndex], nil
}
2016-01-27 00:27:24 -05:00
//------------------------------------------------
// BlockchainStatus
// Basic blockchain metrics
type BlockchainStatus struct {
mtx sync.Mutex
// Blockchain Info
2016-02-02 13:10:43 -05:00
Height int `json:"height"` // latest height we've got
2016-01-27 00:27:24 -05:00
BlockchainSize int64 `json:"blockchain_size"`
2016-02-02 13:10:43 -05:00
MeanBlockTime float64 `json:"mean_block_time" wire:"unsafe"` // ms (avg over last minute)
TxThroughput float64 `json:"tx_throughput" wire:"unsafe"` // tx/s (avg over last minute)
2016-01-27 00:27:24 -05:00
blockTimeMeter metrics.Meter
txThroughputMeter metrics.Meter
// Network Info
2016-02-02 13:10:43 -05:00
NumValidators int `json:"num_validators"`
ActiveValidators int `json:"active_validators"`
//ActiveNodes int `json:"active_nodes"`
MeanLatency float64 `json:"mean_latency" wire:"unsafe"` // ms
// Health
FullHealth bool `json:"full_health"` // all validators online, synced, making blocks
Healthy bool `json:"healthy"` // we're making blocks
// Uptime
UptimeData *UptimeData `json:"uptime_data"`
2016-01-27 00:27:24 -05:00
// What else can we get / do we want?
// TODO: charts for block time, latency (websockets/event-meter ?)
2016-02-27 00:31:28 -05:00
// for benchmark runs
benchResults *BenchmarkResults
}
func (bc *BlockchainStatus) Benchmark(done chan *BenchmarkResults, nTxs int, args []string) {
bc.benchResults = &BenchmarkResults{
StartTime: time.Now(),
NumTxs: nTxs,
done: done,
}
// TODO: capture output to file
cmd := exec.Command(args[0], args[1:]...)
go cmd.Run()
}
type Block struct {
Time time.Time `json:time"`
Height int `json:"height"`
NumTxs int `json:"num_txs"`
}
type BenchmarkResults struct {
StartTime time.Time `json:"start_time"`
TotalTime float64 `json:"total_time"` // seconds
NumBlocks int `json:"num_blocks"`
NumTxs int `json:"num_txs`
Blocks []*Block `json:"blocks"`
MeanLatency float64 `json:"latency"` // seconds per block
MeanThroughput float64 `json:"throughput"` // txs per second
done chan *BenchmarkResults
}
// Return the total time to commit all txs, in seconds
func (br *BenchmarkResults) ElapsedTime() float64 {
return float64(br.Blocks[br.NumBlocks].Time.Sub(br.StartTime)) / float64(1000000000)
}
// Return the avg seconds/block
func (br *BenchmarkResults) Latency() float64 {
return br.ElapsedTime() / float64(br.NumBlocks)
}
// Return the avg txs/second
func (br *BenchmarkResults) Throughput() float64 {
return float64(br.NumTxs) / br.ElapsedTime()
}
func (br *BenchmarkResults) Done() {
br.TotalTime = br.ElapsedTime()
br.MeanThroughput = br.Throughput()
br.MeanLatency = br.Latency()
br.done <- br
2016-01-27 00:27:24 -05:00
}
2016-01-15 16:55:24 -05:00
2016-02-02 13:10:43 -05:00
type UptimeData struct {
StartTime time.Time `json:"start_time"`
Uptime float64 `json:"uptime" wire:"unsafe"` // Percentage of time we've been Healthy, ever
totalDownTime time.Duration // total downtime (only updated when we come back online)
wentDown time.Time
// TODO: uptime over last day, month, year
}
2016-01-27 00:27:24 -05:00
func NewBlockchainStatus() *BlockchainStatus {
return &BlockchainStatus{
blockTimeMeter: metrics.NewMeter(),
txThroughputMeter: metrics.NewMeter(),
2016-02-02 13:10:43 -05:00
Healthy: true,
UptimeData: &UptimeData{
StartTime: time.Now(),
Uptime: 100.0,
},
2016-01-15 16:55:24 -05:00
}
2016-01-27 00:27:24 -05:00
}
2016-01-15 16:55:24 -05:00
2016-01-27 00:27:24 -05:00
func (s *BlockchainStatus) NewBlock(block *tmtypes.Block) {
s.mtx.Lock()
defer s.mtx.Unlock()
if block.Header.Height > s.Height {
s.Height = block.Header.Height
s.blockTimeMeter.Mark(1)
s.txThroughputMeter.Mark(int64(block.Header.NumTxs))
2016-02-02 13:10:43 -05:00
s.MeanBlockTime = (1 / s.blockTimeMeter.Rate1()) * 1000 // 1/s to ms
s.TxThroughput = s.txThroughputMeter.Rate1()
2016-02-27 00:31:28 -05:00
if s.benchResults != nil {
s.benchResults.Blocks = append(s.benchResults.Blocks, &Block{
Time: time.Now(),
Height: s.Height,
NumTxs: block.Header.NumTxs,
})
if s.txThroughputMeter.Count() >= int64(s.benchResults.NumTxs) {
// XXX: do we need to be more careful than just counting?!
s.benchResults.Done()
}
}
2016-02-02 13:10:43 -05:00
// if we're making blocks, we're healthy
if !s.Healthy {
s.Healthy = true
s.UptimeData.totalDownTime += time.Since(s.UptimeData.wentDown)
}
// if we are connected to all validators, we're at full health
// TODO: make sure they're all at the same height (within a block) and all proposing (and possibly validating )
// Alternatively, just check there hasn't been a new round in numValidators rounds
if s.ActiveValidators == s.NumValidators {
s.FullHealth = true
}
// TODO: should we refactor so there's a central loop and ticker?
go s.newBlockTimeout(s.Height)
}
}
// we have newBlockTimeoutSeconds to make a new block, else we're unhealthy
func (s *BlockchainStatus) newBlockTimeout(height int) {
time.Sleep(time.Second * newBlockTimeoutSeconds)
s.mtx.Lock()
defer s.mtx.Unlock()
if !(s.Height > height) {
s.Healthy = false
s.UptimeData.wentDown = time.Now()
2016-01-15 16:55:24 -05:00
}
2016-01-27 00:27:24 -05:00
}
2016-02-02 13:10:43 -05:00
// Used to calculate uptime on demand. TODO: refactor this into the central loop ...
func (s *BlockchainStatus) RealTimeUpdates() {
s.mtx.Lock()
defer s.mtx.Unlock()
since := time.Since(s.UptimeData.StartTime)
uptime := since - s.UptimeData.totalDownTime
if !s.Healthy {
uptime -= time.Since(s.UptimeData.wentDown)
}
s.UptimeData.Uptime = float64(uptime) / float64(since)
}
2016-01-27 00:27:24 -05:00
func (s *BlockchainStatus) UpdateLatency(oldLatency, newLatency float64) {
s.mtx.Lock()
defer s.mtx.Unlock()
2016-02-02 13:10:43 -05:00
// update avg validator rpc latency
2016-01-27 00:27:24 -05:00
mean := s.MeanLatency * float64(s.NumValidators)
mean = (mean - oldLatency + newLatency) / float64(s.NumValidators)
s.MeanLatency = mean
2016-02-02 13:10:43 -05:00
}
// Toggle validators online/offline (updates ActiveValidators and FullHealth)
func (s *BlockchainStatus) SetOnline(val *ValidatorState, isOnline bool) {
val.SetOnline(isOnline)
var change int
if isOnline {
change = 1
} else {
change = -1
}
s.mtx.Lock()
defer s.mtx.Unlock()
s.ActiveValidators += change
if s.ActiveValidators > s.NumValidators {
panic(Fmt("got %d validators. max %ds", s.ActiveValidators, s.NumValidators))
}
// if we lost a connection we're no longer at full health, even if it's still online.
// so long as we receive blocks, we'll know we're still healthy
if s.ActiveValidators != s.NumValidators {
s.FullHealth = false
}
}
func TwoThirdsMaj(count, total int) bool {
return float64(count) > (2.0/3.0)*float64(total)
2016-01-15 16:55:24 -05:00
}