From dab5b5d4372a12b00ebbc3daf97eda7895b0675c Mon Sep 17 00:00:00 2001 From: aucusaga Date: Tue, 28 Sep 2021 17:09:47 +0800 Subject: [PATCH] metrics: add the consensus tags. --- bcs/ledger/xledger/ledger/ledger.go | 28 ++-- .../consensus/base/driver/chained-bft/smr.go | 11 ++ lib/metrics/metrics.go | 131 +++++++++++------- 3 files changed, 109 insertions(+), 61 deletions(-) diff --git a/bcs/ledger/xledger/ledger/ledger.go b/bcs/ledger/xledger/ledger/ledger.go index b990cdcc..e8607bc8 100644 --- a/bcs/ledger/xledger/ledger/ledger.go +++ b/bcs/ledger/xledger/ledger/ledger.go @@ -411,24 +411,25 @@ func (l *Ledger) correctTxsBlockid(blockID []byte, batchWrite kvdb.Batch) error // | // +---->Q---->Q--->NewTip // 处理完后,会返回分叉点的block -func (l *Ledger) handleFork(oldTip []byte, newTipPre []byte, nextHash []byte, batchWrite kvdb.Batch) (*pb.InternalBlock, error) { +func (l *Ledger) handleFork(oldTip []byte, newTipPre []byte, nextHash []byte, batchWrite kvdb.Batch) (*pb.InternalBlock, int64, error) { + var sum int64 p := oldTip q := newTipPre for !bytes.Equal(p, q) { pBlock, pErr := l.fetchBlock(p) if pErr != nil { - return nil, pErr + return nil, 0, pErr } pBlock.InTrunk = false pBlock.NextHash = []byte{} //next_hash表示是主干上的下一个blockid,所以分支上的这个属性清空 qBlock, qErr := l.fetchBlock(q) if qErr != nil { - return nil, qErr + return nil, 0, qErr } qBlock.InTrunk = true cerr := l.correctTxsBlockid(qBlock.Blockid, batchWrite) if cerr != nil { - return nil, cerr + return nil, 0, cerr } qBlock.NextHash = nextHash nextHash = q @@ -436,24 +437,26 @@ func (l *Ledger) handleFork(oldTip []byte, newTipPre []byte, nextHash []byte, ba q = qBlock.PreHash saveErr := l.saveBlock(pBlock, batchWrite) if saveErr != nil { - return nil, saveErr + return nil, 0, saveErr } saveErr = l.saveBlock(qBlock, batchWrite) if saveErr != nil { - return nil, saveErr + return nil, 0, saveErr } + sum -= int64(pBlock.TxCount) + sum += int64(qBlock.TxCount) } splitBlock, qErr := l.fetchBlock(q) if qErr != nil { - return nil, qErr + return nil, 0, qErr } splitBlock.InTrunk = true splitBlock.NextHash = nextHash saveErr := l.saveBlock(splitBlock, batchWrite) if saveErr != nil { - return nil, saveErr + return nil, 0, saveErr } - return splitBlock, nil + return splitBlock, sum, nil } // IsValidTx valid transactions of coinbase in block @@ -576,6 +579,7 @@ func (l *Ledger) ConfirmBlock(block *pb.InternalBlock, isRoot bool) ConfirmStatu l.mutex.Lock() beginTime := time.Now() var confirmStatus ConfirmStatus + var txSum int64 defer func() { l.mutex.Unlock() bcName := l.ctx.BCName @@ -588,6 +592,7 @@ func (l *Ledger) ConfirmBlock(block *pb.InternalBlock, isRoot bool) ConfirmStatu if confirmStatus.TrunkSwitch { metrics.LedgerSwitchBranchCounter.WithLabelValues(bcName).Inc() } + metrics.GeneralSumGauge.WithLabelValues(l.ctx.BCName, "intrunk-tx-nums").Add(float64(txSum)) }() blkTimer := timer.NewXTimer() @@ -642,6 +647,7 @@ func (l *Ledger) ConfirmBlock(block *pb.InternalBlock, isRoot bool) ConfirmStatu return confirmStatus } } + txSum = int64(block.TxCount) } else { //在分支上 if preBlock.Height+1 > newMeta.TrunkHeight { @@ -650,12 +656,13 @@ func (l *Ledger) ConfirmBlock(block *pb.InternalBlock, isRoot bool) ConfirmStatu newMeta.TrunkHeight = preBlock.Height + 1 newMeta.TipBlockid = block.Blockid block.InTrunk = true - splitBlock, splitErr := l.handleFork(oldTip, preBlock.Blockid, block.Blockid, batchWrite) //处理分叉 + splitBlock, sum, splitErr := l.handleFork(oldTip, preBlock.Blockid, block.Blockid, batchWrite) //处理分叉 if splitErr != nil { l.xlog.Warn("handle split failed", "splitErr", splitErr) confirmStatus.Succ = false return confirmStatus } + txSum = sum splitHeight = splitBlock.Height confirmStatus.Split = true confirmStatus.TrunkSwitch = true @@ -1144,6 +1151,7 @@ func (l *Ledger) removeBlocks(fromBlockid []byte, toBlockid []byte, batch kvdb.B if fromBlock.InTrunk { sHeight := []byte(fmt.Sprintf("%020d", fromBlock.Height)) batch.Delete(append([]byte(pb.BlockHeightPrefix), sHeight...)) + metrics.GeneralSumGauge.WithLabelValues(l.ctx.BCName, "intrunk-tx-nums").Sub(float64(fromBlock.TxCount)) } //iter to prev block fromBlock, findErr = l.fetchBlock(fromBlock.PreHash) diff --git a/kernel/consensus/base/driver/chained-bft/smr.go b/kernel/consensus/base/driver/chained-bft/smr.go index 7af24260..15bfadf9 100644 --- a/kernel/consensus/base/driver/chained-bft/smr.go +++ b/kernel/consensus/base/driver/chained-bft/smr.go @@ -16,6 +16,7 @@ import ( "github.com/xuperchain/xupercore/kernel/ledger" "github.com/xuperchain/xupercore/kernel/network/p2p" "github.com/xuperchain/xupercore/lib/logs" + "github.com/xuperchain/xupercore/lib/metrics" "github.com/xuperchain/xupercore/lib/timer" "github.com/xuperchain/xupercore/lib/utils" xuperp2p "github.com/xuperchain/xupercore/protos" @@ -260,6 +261,10 @@ func (s *Smr) ResetProposerStatus(tipBlock cctx.BlockInterface, if qc == nil { return false, nil, ErrEmptyHighQC } + // metrics + diff := tipBlock.GetHeight() - qc.GetProposalView() + metrics.GeneralSumGauge.WithLabelValues(s.bcName, "consensus-rollback").Set(float64(diff)) + ok, err := s.enforceUpdateHighQC(qc.GetProposalId()) if err != nil { s.log.Error("consensus:smr:ResetProposerStatus: EnforceUpdateHighQC error.", "error", err) @@ -625,6 +630,12 @@ func (s *Smr) handleReceivedVoteMsg(msg *xuperp2p.XuperMessage) error { // 更新本地pacemaker AdvanceRound s.pacemaker.AdvanceView(voteQC) s.log.Debug("smr::handleReceivedVoteMsg::FULL VOTES!", "pacemaker view", s.pacemaker.GetCurrentView()) + if t, ok := s.localProposal.Load(utils.F(voteQC.GetProposalId())); ok { + if proposalGetT, ok := t.(int64); ok && proposalGetT != 0 { + time := (time.Now().UnixNano() - proposalGetT) / int64(time.Millisecond) + metrics.ConsensusMsgSummary.WithLabelValues(s.bcName, "proposalToFullvotes").Observe(float64(time)) + } + } // 更新HighQC s.qcTree.UpdateHighQC(voteQC.GetProposalId()) return nil diff --git a/lib/metrics/metrics.go b/lib/metrics/metrics.go index 588b8241..67e3d2db 100644 --- a/lib/metrics/metrics.go +++ b/lib/metrics/metrics.go @@ -5,26 +5,33 @@ import prom "github.com/prometheus/client_golang/prometheus" const ( Namespace = "xuperos" - SubsystemCommon = "common" - SubsystemContract = "contract" - SubsystemLedger = "ledger" - SubsystemState = "state" - SubsystemNetwork = "network" + SubsystemCommon = "common" + SubsystemContract = "contract" + SubsystemLedger = "ledger" + SubsystemState = "state" + SubsystemNetwork = "network" + SubsystemConsensus = "consensus" - LabelBCName = "bcname" - LabelMessageType = "message" - LabelCallMethod = "method" + LabelBCName = "bcname" + LabelMessageType = "message" + LabelCallMethod = "method" - LabelContractModuleName = "contract_module" - LabelContractName = "contract_name" - LabelContractMethod = "contract_method" - LabelErrorCode = "code" + LabelContractModuleName = "contract_module" + LabelContractName = "contract_name" + LabelContractMethod = "contract_method" + LabelErrorCode = "code" LabelModule = "module" LabelHandle = "handle" + + LabelConsensusPhase = "consensus_phase_tag" + LabelGeneralSum = "general_sum" ) -var DefBuckets = []float64{.001, .0025, .005, .01, .025, .05, .1, .25, .5, 1, 2.5} +var ( + DefBuckets = []float64{.001, .0025, .005, .01, .025, .05, .1, .25, .5, 1, 2.5} + ConsDefObjs = map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001, 0.999: 0.0001} +) // common var ( @@ -33,8 +40,8 @@ var ( prom.GaugeOpts{ Namespace: Namespace, Subsystem: SubsystemCommon, - Name: "concurrent_requests_total", - Help: "Total number of concurrent requests.", + Name: "concurrent_requests_total", + Help: "Total number of concurrent requests.", }, []string{LabelModule}) // 字节量 @@ -42,8 +49,8 @@ var ( prom.CounterOpts{ Namespace: Namespace, Subsystem: SubsystemCommon, - Name: "handle_bytes", - Help: "Total size of bytes.", + Name: "handle_bytes", + Help: "Total size of bytes.", }, []string{LabelModule, LabelCallMethod, LabelHandle}) // 函数调用 @@ -51,17 +58,17 @@ var ( prom.CounterOpts{ Namespace: Namespace, Subsystem: SubsystemCommon, - Name: "call_method_total", - Help: "Total number of call method.", + Name: "call_method_total", + Help: "Total number of call method.", }, []string{LabelModule, LabelCallMethod, LabelErrorCode}) CallMethodHistogram = prom.NewHistogramVec( prom.HistogramOpts{ Namespace: Namespace, Subsystem: SubsystemCommon, - Name: "call_method_seconds", - Help: "Histogram of call method cost latency.", - Buckets: DefBuckets, + Name: "call_method_seconds", + Help: "Histogram of call method cost latency.", + Buckets: DefBuckets, }, []string{LabelModule, LabelCallMethod}) ) @@ -72,17 +79,17 @@ var ( prom.CounterOpts{ Namespace: Namespace, Subsystem: SubsystemContract, - Name: "invoke_total", - Help: "Total number of invoke contract latency.", + Name: "invoke_total", + Help: "Total number of invoke contract latency.", }, []string{LabelBCName, LabelContractModuleName, LabelContractName, LabelContractMethod, LabelErrorCode}) ContractInvokeHistogram = prom.NewHistogramVec( prom.HistogramOpts{ Namespace: Namespace, Subsystem: SubsystemContract, - Name: "invoke_seconds", - Help: "Histogram of invoke contract latency.", - Buckets: DefBuckets, + Name: "invoke_seconds", + Help: "Histogram of invoke contract latency.", + Buckets: DefBuckets, }, []string{LabelBCName, LabelContractModuleName, LabelContractName, LabelContractMethod}) ) @@ -93,24 +100,24 @@ var ( prom.CounterOpts{ Namespace: Namespace, Subsystem: SubsystemLedger, - Name: "confirmed_tx_total", - Help: "Total number of ledger confirmed tx.", + Name: "confirmed_tx_total", + Help: "Total number of ledger confirmed tx.", }, []string{LabelBCName}) LedgerHeightGauge = prom.NewGaugeVec( prom.GaugeOpts{ Namespace: Namespace, Subsystem: SubsystemLedger, - Name: "height_total", - Help: "Total number of ledger height.", + Name: "height_total", + Help: "Total number of ledger height.", }, []string{LabelBCName}) LedgerSwitchBranchCounter = prom.NewCounterVec( prom.CounterOpts{ Namespace: Namespace, Subsystem: SubsystemLedger, - Name: "switch_branch_total", - Help: "Total number of ledger switch branch.", + Name: "switch_branch_total", + Help: "Total number of ledger switch branch.", }, []string{LabelBCName}) ) @@ -121,8 +128,8 @@ var ( prom.GaugeOpts{ Namespace: Namespace, Subsystem: SubsystemState, - Name: "unconfirmed_tx_gauge", - Help: "Total number of miner unconfirmed tx.", + Name: "unconfirmed_tx_gauge", + Help: "Total number of miner unconfirmed tx.", }, []string{LabelBCName}) ) @@ -133,56 +140,75 @@ var ( prom.CounterOpts{ Namespace: Namespace, Subsystem: SubsystemNetwork, - Name: "msg_send_total", - Help: "Total number of P2P send message.", + Name: "msg_send_total", + Help: "Total number of P2P send message.", }, []string{LabelBCName, LabelMessageType}) NetworkMsgSendBytesCounter = prom.NewCounterVec( prom.CounterOpts{ Namespace: Namespace, Subsystem: SubsystemNetwork, - Name: "msg_send_bytes", - Help: "Total size of P2P send message.", + Name: "msg_send_bytes", + Help: "Total size of P2P send message.", }, []string{LabelBCName, LabelMessageType}) NetworkClientHandlingHistogram = prom.NewHistogramVec( prom.HistogramOpts{ Namespace: Namespace, Subsystem: SubsystemNetwork, - Name: "client_handled_seconds", - Help: "Histogram of response latency (seconds) of P2P handled.", - Buckets: DefBuckets, + Name: "client_handled_seconds", + Help: "Histogram of response latency (seconds) of P2P handled.", + Buckets: DefBuckets, }, []string{LabelBCName, LabelMessageType}) - NetworkMsgReceivedCounter = prom.NewCounterVec( prom.CounterOpts{ Namespace: Namespace, Subsystem: SubsystemNetwork, - Name: "msg_received_total", - Help: "Total number of P2P received message.", + Name: "msg_received_total", + Help: "Total number of P2P received message.", }, []string{LabelBCName, LabelMessageType}) NetworkMsgReceivedBytesCounter = prom.NewCounterVec( prom.CounterOpts{ Namespace: Namespace, Subsystem: SubsystemNetwork, - Name: "msg_received_bytes", - Help: "Total size of P2P received message.", + Name: "msg_received_bytes", + Help: "Total size of P2P received message.", }, []string{LabelBCName, LabelMessageType}) NetworkServerHandlingHistogram = prom.NewHistogramVec( prom.HistogramOpts{ Namespace: Namespace, Subsystem: SubsystemNetwork, - Name: "server_handled_seconds", - Help: "Histogram of response latency (seconds) of P2P handled.", - Buckets: DefBuckets, + Name: "server_handled_seconds", + Help: "Histogram of response latency (seconds) of P2P handled.", + Buckets: DefBuckets, }, []string{LabelBCName, LabelMessageType}) ) +// consensus +var ( + ConsensusMsgSummary = prom.NewSummaryVec( + prom.SummaryOpts{ + Namespace: Namespace, + Subsystem: SubsystemConsensus, + Name: "consensus_handle_seconds", + Help: "consensus msg cost latency between peers.", + Objectives: ConsDefObjs, + }, []string{LabelBCName, LabelConsensusPhase}) + GeneralSumGauge = prom.NewGaugeVec( + prom.GaugeOpts{ + Namespace: Namespace, + Subsystem: SubsystemConsensus, + Name: "general_sum", + Help: "block & transaction sum with truncate-op.", + }, + []string{LabelBCName, LabelGeneralSum}) +) + func RegisterMetrics() { // common prom.MustRegister(BytesCounter) @@ -205,4 +231,7 @@ func RegisterMetrics() { prom.MustRegister(NetworkMsgReceivedCounter) prom.MustRegister(NetworkMsgReceivedBytesCounter) prom.MustRegister(NetworkServerHandlingHistogram) -} \ No newline at end of file + // consensus + prom.MustRegister(ConsensusMsgSummary) + prom.MustRegister(GeneralSumGauge) +}