From 7ae1596b81e6daa6ac54d413077fde500f8cf01a Mon Sep 17 00:00:00 2001 From: murataniloener Date: Tue, 25 Jul 2023 21:03:55 +0200 Subject: [PATCH 1/6] catch error if type is missing (#1234) Co-authored-by: Andrew Gouin --- cmd/config.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cmd/config.go b/cmd/config.go index 5266a4685..9888a4596 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -19,6 +19,7 @@ package cmd import ( "context" "encoding/json" + "errors" "fmt" "io" "io/ioutil" @@ -393,7 +394,11 @@ func UnmarshalJSONProviderConfig(data []byte, customTypes map[string]reflect.Typ return nil, err } - typeName := m["type"].(string) + typeName, ok := m["type"].(string) + if !ok { + return nil, errors.New("cannot find type"); + } + var provCfg provider.ProviderConfig if ty, found := customTypes[typeName]; found { provCfg = reflect.New(ty).Interface().(provider.ProviderConfig) From 22bce429316c81ce9a5386787d021b003d752b72 Mon Sep 17 00:00:00 2001 From: Dan Kanefsky <56059752+boojamya@users.noreply.github.com> Date: Tue, 25 Jul 2023 12:11:21 -0700 Subject: [PATCH 2/6] Export client expiration metric to prometheus (#1235) * export client expiration metric * finalize * add path name * snake case * change label to `chain` * trusting period as string --- relayer/processor/message_processor.go | 7 +++++++ relayer/processor/metrics.go | 12 ++++++++++++ 2 files changed, 19 insertions(+) diff --git a/relayer/processor/message_processor.go b/relayer/processor/message_processor.go index bb0f66b12..fe355cfa7 100644 --- a/relayer/processor/message_processor.go +++ b/relayer/processor/message_processor.go @@ -141,8 +141,14 @@ func (mp *messageProcessor) shouldUpdateClientNow(ctx context.Context, src, dst shouldUpdateClientNow := enoughBlocksPassed && (pastTwoThirdsTrustingPeriod || pastConfiguredClientUpdateThreshold) + if mp.metrics != nil { + timeToExpiration := dst.clientState.TrustingPeriod - time.Since(consensusHeightTime) + mp.metrics.SetClientExpiration(src.info.PathName, dst.info.ChainID, dst.clientState.ClientID, fmt.Sprint(dst.clientState.TrustingPeriod.String()), timeToExpiration) + } + if shouldUpdateClientNow { mp.log.Info("Client update threshold condition met", + zap.String("path_name", src.info.PathName), zap.String("chain_id", dst.info.ChainID), zap.String("client_id", dst.info.ClientID), zap.Int64("trusting_period", dst.clientState.TrustingPeriod.Milliseconds()), @@ -249,6 +255,7 @@ func (mp *messageProcessor) assembleMsgUpdateClient(ctx context.Context, src, ds clientConsensusHeight.RevisionHeight+1, src.info.ChainID, err) } mp.log.Debug("Had to query for client trusted IBC header", + zap.String("path_name", src.info.PathName), zap.String("chain_id", src.info.ChainID), zap.String("counterparty_chain_id", dst.info.ChainID), zap.String("counterparty_client_id", clientID), diff --git a/relayer/processor/metrics.go b/relayer/processor/metrics.go index a549eb40a..96de766b6 100644 --- a/relayer/processor/metrics.go +++ b/relayer/processor/metrics.go @@ -1,6 +1,8 @@ package processor import ( + "time" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" ) @@ -12,6 +14,7 @@ type PrometheusMetrics struct { LatestHeightGauge *prometheus.GaugeVec WalletBalance *prometheus.GaugeVec FeesSpent *prometheus.GaugeVec + ClientExpiration *prometheus.GaugeVec } func (m *PrometheusMetrics) AddPacketsObserved(path, chain, channel, port, eventType string, count int) { @@ -34,10 +37,15 @@ func (m *PrometheusMetrics) SetFeesSpent(chain, key, address, denom string, amou m.FeesSpent.WithLabelValues(chain, key, address, denom).Set(amount) } +func (m *PrometheusMetrics) SetClientExpiration(pathName, chain, clientID, trustingPeriod string, timeToExpiration time.Duration) { + m.ClientExpiration.WithLabelValues(pathName, chain, clientID, trustingPeriod).Set(timeToExpiration.Seconds()) +} + func NewPrometheusMetrics() *PrometheusMetrics { packetLabels := []string{"path", "chain", "channel", "port", "type"} heightLabels := []string{"chain"} walletLabels := []string{"chain", "key", "address", "denom"} + clientExpirationLables := []string{"path_name", "chain", "client_id", "trusting_period"} registry := prometheus.NewRegistry() registerer := promauto.With(registry) return &PrometheusMetrics{ @@ -62,5 +70,9 @@ func NewPrometheusMetrics() *PrometheusMetrics { Name: "cosmos_relayer_fees_spent", Help: "The amount of fees spent from the relayer's wallet", }, walletLabels), + ClientExpiration: registerer.NewGaugeVec(prometheus.GaugeOpts{ + Name: "cosmos_relayer_client_expiration_seconds", + Help: "Seconds until the client expires", + }, clientExpirationLables), } } From 55084bd5844c42a5df8cf9e6fa2d74a84ccf304c Mon Sep 17 00:00:00 2001 From: Dan Kanefsky <56059752+boojamya@users.noreply.github.com> Date: Tue, 25 Jul 2023 12:21:03 -0700 Subject: [PATCH 3/6] Export configured gas prices to prometheus wallet balance metric (#1236) * export gas price to prom * update label * update fees spent metric * snake case --- relayer/chains/cosmos/cosmos_chain_processor.go | 2 +- relayer/chains/cosmos/tx.go | 2 +- relayer/processor/metrics.go | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/relayer/chains/cosmos/cosmos_chain_processor.go b/relayer/chains/cosmos/cosmos_chain_processor.go index b4bef6d47..9189e945d 100644 --- a/relayer/chains/cosmos/cosmos_chain_processor.go +++ b/relayer/chains/cosmos/cosmos_chain_processor.go @@ -550,6 +550,6 @@ func (ccp *CosmosChainProcessor) CurrentRelayerBalance(ctx context.Context) { bal := relayerWalletBalances.AmountOf(gasDenom.Denom) // Convert to a big float to get a float64 for metrics f, _ := big.NewFloat(0.0).SetInt(bal.BigInt()).Float64() - ccp.metrics.SetWalletBalance(ccp.chainProvider.ChainId(), ccp.chainProvider.Key(), address, gasDenom.Denom, f) + ccp.metrics.SetWalletBalance(ccp.chainProvider.ChainId(), ccp.chainProvider.PCfg.GasPrices, ccp.chainProvider.Key(), address, gasDenom.Denom, f) } } diff --git a/relayer/chains/cosmos/tx.go b/relayer/chains/cosmos/tx.go index e4c06714d..8d9ae37d1 100644 --- a/relayer/chains/cosmos/tx.go +++ b/relayer/chains/cosmos/tx.go @@ -1301,7 +1301,7 @@ func (cc *CosmosProvider) UpdateFeesSpent(chain, key, address string, fees sdk.C for _, fee := range cc.TotalFees { // Convert to a big float to get a float64 for metrics f, _ := big.NewFloat(0.0).SetInt(fee.Amount.BigInt()).Float64() - cc.metrics.SetFeesSpent(chain, key, address, fee.GetDenom(), f) + cc.metrics.SetFeesSpent(chain, cc.PCfg.GasPrices, key, address, fee.GetDenom(), f) } } diff --git a/relayer/processor/metrics.go b/relayer/processor/metrics.go index 96de766b6..6c77b5f3e 100644 --- a/relayer/processor/metrics.go +++ b/relayer/processor/metrics.go @@ -29,12 +29,12 @@ func (m *PrometheusMetrics) SetLatestHeight(chain string, height int64) { m.LatestHeightGauge.WithLabelValues(chain).Set(float64(height)) } -func (m *PrometheusMetrics) SetWalletBalance(chain, key, address, denom string, balance float64) { - m.WalletBalance.WithLabelValues(chain, key, address, denom).Set(balance) +func (m *PrometheusMetrics) SetWalletBalance(chain, gasPrice, key, address, denom string, balance float64) { + m.WalletBalance.WithLabelValues(chain, gasPrice, key, address, denom).Set(balance) } -func (m *PrometheusMetrics) SetFeesSpent(chain, key, address, denom string, amount float64) { - m.FeesSpent.WithLabelValues(chain, key, address, denom).Set(amount) +func (m *PrometheusMetrics) SetFeesSpent(chain, gasPrice, key, address, denom string, amount float64) { + m.FeesSpent.WithLabelValues(chain, gasPrice, key, address, denom).Set(amount) } func (m *PrometheusMetrics) SetClientExpiration(pathName, chain, clientID, trustingPeriod string, timeToExpiration time.Duration) { @@ -44,7 +44,7 @@ func (m *PrometheusMetrics) SetClientExpiration(pathName, chain, clientID, trust func NewPrometheusMetrics() *PrometheusMetrics { packetLabels := []string{"path", "chain", "channel", "port", "type"} heightLabels := []string{"chain"} - walletLabels := []string{"chain", "key", "address", "denom"} + walletLabels := []string{"chain", "gas_price", "key", "address", "denom"} clientExpirationLables := []string{"path_name", "chain", "client_id", "trusting_period"} registry := prometheus.NewRegistry() registerer := promauto.With(registry) From 107d3f5174e17aabd1b336b098ef3bb0f68d1b6a Mon Sep 17 00:00:00 2001 From: Dan Kanefsky <56059752+boojamya@users.noreply.github.com> Date: Tue, 25 Jul 2023 12:32:52 -0700 Subject: [PATCH 4/6] Exports block query errors to prometheus metrics (counter) (#1239) * separate by type * add help info * remove new line in help and fix readme * feedback --- docs/advanced_usage.md | 2 +- relayer/chains/cosmos/cosmos_chain_processor.go | 6 ++++++ relayer/processor/metrics.go | 10 ++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/docs/advanced_usage.md b/docs/advanced_usage.md index 715bc283d..99a721634 100644 --- a/docs/advanced_usage.md +++ b/docs/advanced_usage.md @@ -5,7 +5,7 @@ **Prometheus exporter** If you started `rly` with the default `--debug-addr` argument, -you can use `http://$IP:7597/relayer/metrics` as a target for your prometheus scraper. +you can use `http://$IP:5183/relayer/metrics` as a target for your prometheus scraper. **Example metrics** diff --git a/relayer/chains/cosmos/cosmos_chain_processor.go b/relayer/chains/cosmos/cosmos_chain_processor.go index 9189e945d..c582cc731 100644 --- a/relayer/chains/cosmos/cosmos_chain_processor.go +++ b/relayer/chains/cosmos/cosmos_chain_processor.go @@ -386,12 +386,18 @@ func (ccp *CosmosChainProcessor) queryCycle(ctx context.Context, persistence *qu queryCtx, cancelQueryCtx := context.WithTimeout(ctx, blockResultsQueryTimeout) defer cancelQueryCtx() blockRes, err = ccp.chainProvider.RPCClient.BlockResults(queryCtx, &i) + if err != nil && ccp.metrics != nil { + ccp.metrics.IncBlockQueryFailure(chainID, "RPC Client") + } return err }) eg.Go(func() (err error) { queryCtx, cancelQueryCtx := context.WithTimeout(ctx, queryTimeout) defer cancelQueryCtx() ibcHeader, err = ccp.chainProvider.QueryIBCHeader(queryCtx, i) + if err != nil && ccp.metrics != nil { + ccp.metrics.IncBlockQueryFailure(chainID, "IBC Header") + } return err }) diff --git a/relayer/processor/metrics.go b/relayer/processor/metrics.go index 6c77b5f3e..27e9e789f 100644 --- a/relayer/processor/metrics.go +++ b/relayer/processor/metrics.go @@ -14,6 +14,7 @@ type PrometheusMetrics struct { LatestHeightGauge *prometheus.GaugeVec WalletBalance *prometheus.GaugeVec FeesSpent *prometheus.GaugeVec + BlockQueryFailure *prometheus.CounterVec ClientExpiration *prometheus.GaugeVec } @@ -41,9 +42,14 @@ func (m *PrometheusMetrics) SetClientExpiration(pathName, chain, clientID, trust m.ClientExpiration.WithLabelValues(pathName, chain, clientID, trustingPeriod).Set(timeToExpiration.Seconds()) } +func (m *PrometheusMetrics) IncBlockQueryFailure(chain, err string) { + m.BlockQueryFailure.WithLabelValues(chain, err).Inc() +} + func NewPrometheusMetrics() *PrometheusMetrics { packetLabels := []string{"path", "chain", "channel", "port", "type"} heightLabels := []string{"chain"} + blockQueryFailureLabels := []string{"chain", "type"} walletLabels := []string{"chain", "gas_price", "key", "address", "denom"} clientExpirationLables := []string{"path_name", "chain", "client_id", "trusting_period"} registry := prometheus.NewRegistry() @@ -70,6 +76,10 @@ func NewPrometheusMetrics() *PrometheusMetrics { Name: "cosmos_relayer_fees_spent", Help: "The amount of fees spent from the relayer's wallet", }, walletLabels), + BlockQueryFailure: registerer.NewCounterVec(prometheus.CounterOpts{ + Name: "cosmos_relayer_block_query_errors_total", + Help: "The total number of block query failures. The failures are separated into two catagories: 'RPC Client' and 'IBC Header'", + }, blockQueryFailureLabels), ClientExpiration: registerer.NewGaugeVec(prometheus.GaugeOpts{ Name: "cosmos_relayer_client_expiration_seconds", Help: "Seconds until the client expires", From 3c7828782d351273c1e8192a0cc66a2d26de4296 Mon Sep 17 00:00:00 2001 From: Dan Kanefsky <56059752+boojamya@users.noreply.github.com> Date: Tue, 25 Jul 2023 12:55:34 -0700 Subject: [PATCH 5/6] Export TX failures to prometheus metrics (counter) (#1240) * export tx failures to prometheus * change label to `cause` --- docs/advanced_usage.md | 11 ++++++++ relayer/processor/message_processor.go | 36 ++++++++++++++++++++++++++ relayer/processor/metrics.go | 10 +++++++ 3 files changed, 57 insertions(+) diff --git a/docs/advanced_usage.md b/docs/advanced_usage.md index 99a721634..a0b518b07 100644 --- a/docs/advanced_usage.md +++ b/docs/advanced_usage.md @@ -7,6 +7,17 @@ If you started `rly` with the default `--debug-addr` argument, you can use `http://$IP:5183/relayer/metrics` as a target for your prometheus scraper. + +Exported metrics: + +| **Exported Metric** | **Description** | **Type** | +|:----------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:--------:| +| cosmos_relayer_observed_packets | The total number of observed packets | Counter | +| cosmos_relayer_relayed_packets | The total number of relayed packets | Counter | +| cosmos_relayer_chain_latest_height | The current height of the chain | Gauge | +| cosmos_relayer_wallet_balance | The current balance for the relayer's wallet | Gauge | +| cosmos_relayer_fees_spent | The amount of fees spent from the relayer's wallet | Gauge | +| cosmos_relayer_tx_failure |
The total number of tx failures broken up into catagories .
Categories:
- "packet messages are redundant"
- "insufficient funds"
- "invalid coins"
- "out of gas"
- "incorrect account sequence"

"Tx Failure" is the the catch all bucket| Counter | **Example metrics** ``` diff --git a/relayer/processor/message_processor.go b/relayer/processor/message_processor.go index fe355cfa7..b17978fc4 100644 --- a/relayer/processor/message_processor.go +++ b/relayer/processor/message_processor.go @@ -8,6 +8,7 @@ import ( "sync" "time" + sdkerrors "github.com/cosmos/cosmos-sdk/types/errors" chantypes "github.com/cosmos/ibc-go/v7/modules/core/04-channel/types" ibcexported "github.com/cosmos/ibc-go/v7/modules/core/exported" "github.com/cosmos/relayer/v2/relayer/provider" @@ -33,6 +34,9 @@ type messageProcessor struct { isLocalhost bool } +// catagories of tx errors for a Prometheus counter. If the error doesnt fall into one of the below categories, it is labeled as "Tx Failure" +var promErrorCatagories = []error{chantypes.ErrRedundantTx, sdkerrors.ErrInsufficientFunds, sdkerrors.ErrInvalidCoins, sdkerrors.ErrOutOfGas, sdkerrors.ErrWrongSequence} + // trackMessage stores the message tracker in the correct slice and index based on the type. func (mp *messageProcessor) trackMessage(tracker messageToTrack, i int) { switch t := tracker.(type) { @@ -361,6 +365,16 @@ func (mp *messageProcessor) sendClientUpdate( zap.String("dst_client_id", dst.info.ClientID), zap.Error(err), ) + + for _, promError := range promErrorCatagories { + if mp.metrics != nil { + if errors.Is(err, promError) { + mp.metrics.IncTxFailure(src.info.PathName, src.info.ChainID, promError.Error()) + } else { + mp.metrics.IncTxFailure(src.info.PathName, src.info.ChainID, "Tx Failure") + } + } + } return } dst.log.Debug("Client update broadcast completed") @@ -430,6 +444,17 @@ func (mp *messageProcessor) sendBatchMessages( zap.String("dst_client_id", dst.info.ClientID), zap.Error(err), } + + for _, promError := range promErrorCatagories { + if mp.metrics != nil { + if errors.Is(err, promError) { + mp.metrics.IncTxFailure(src.info.PathName, src.info.ChainID, promError.Error()) + } else { + mp.metrics.IncTxFailure(src.info.PathName, src.info.ChainID, "Tx Failure") + } + } + } + if errors.Is(err, chantypes.ErrRedundantTx) { mp.log.Debug("Redundant message(s)", errFields...) return @@ -490,6 +515,17 @@ func (mp *messageProcessor) sendSingleMessage( zap.String("src_client_id", src.info.ClientID), zap.String("dst_client_id", dst.info.ClientID), } + + for _, promError := range promErrorCatagories { + if mp.metrics != nil { + if errors.Is(err, promError) { + mp.metrics.IncTxFailure(src.info.PathName, src.info.ChainID, promError.Error()) + } else { + mp.metrics.IncTxFailure(src.info.PathName, src.info.ChainID, "Tx Failure") + } + } + } + errFields = append(errFields, zap.Object("msg", tracker)) errFields = append(errFields, zap.Error(err)) if errors.Is(err, chantypes.ErrRedundantTx) { diff --git a/relayer/processor/metrics.go b/relayer/processor/metrics.go index 27e9e789f..6e522cfc2 100644 --- a/relayer/processor/metrics.go +++ b/relayer/processor/metrics.go @@ -14,6 +14,7 @@ type PrometheusMetrics struct { LatestHeightGauge *prometheus.GaugeVec WalletBalance *prometheus.GaugeVec FeesSpent *prometheus.GaugeVec + TxFailureError *prometheus.CounterVec BlockQueryFailure *prometheus.CounterVec ClientExpiration *prometheus.GaugeVec } @@ -46,9 +47,14 @@ func (m *PrometheusMetrics) IncBlockQueryFailure(chain, err string) { m.BlockQueryFailure.WithLabelValues(chain, err).Inc() } +func (m *PrometheusMetrics) IncTxFailure(path, chain, errDesc string) { + m.TxFailureError.WithLabelValues(path, chain, errDesc).Inc() +} + func NewPrometheusMetrics() *PrometheusMetrics { packetLabels := []string{"path", "chain", "channel", "port", "type"} heightLabels := []string{"chain"} + txFailureLabels := []string{"path", "chain", "cause"} blockQueryFailureLabels := []string{"chain", "type"} walletLabels := []string{"chain", "gas_price", "key", "address", "denom"} clientExpirationLables := []string{"path_name", "chain", "client_id", "trusting_period"} @@ -76,6 +82,10 @@ func NewPrometheusMetrics() *PrometheusMetrics { Name: "cosmos_relayer_fees_spent", Help: "The amount of fees spent from the relayer's wallet", }, walletLabels), + TxFailureError: registerer.NewCounterVec(prometheus.CounterOpts{ + Name: "cosmos_relayer_tx_errors_total", + Help: "The total number of tx failures broken up into categories. See https://github.com/cosmos/relayer/blob/main/docs/advanced_usage.md#monitoring for list of catagories. 'Tx Failure' is the catch-all category", + }, txFailureLabels), BlockQueryFailure: registerer.NewCounterVec(prometheus.CounterOpts{ Name: "cosmos_relayer_block_query_errors_total", Help: "The total number of block query failures. The failures are separated into two catagories: 'RPC Client' and 'IBC Header'", From 993c21bd3f5e813e4d969f23506673f67061689c Mon Sep 17 00:00:00 2001 From: murataniloener Date: Wed, 26 Jul 2023 19:12:11 +0200 Subject: [PATCH 6/6] use the name given by the user to generate the fetch URL (#1233) * use the name given by the user to generate the fetch URL * add example --------- Co-authored-by: Andrew Gouin Co-authored-by: Dan Kanefsky <56059752+boojamya@users.noreply.github.com> --- cmd/chains.go | 5 +++-- cregistry/chain_info.go | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/cmd/chains.go b/cmd/chains.go index 53404cabe..a11eeb1be 100644 --- a/cmd/chains.go +++ b/cmd/chains.go @@ -277,9 +277,10 @@ func chainsAddCmd(a *appState) *cobra.Command { " the chain-registry or passing a file (-f) or url (-u)", Args: withUsage(cobra.MinimumNArgs(0)), Example: fmt.Sprintf(` $ %s chains add cosmoshub + $ %s chains add testnets/cosmoshubtestnet $ %s chains add cosmoshub osmosis $ %s chains add --file chains/ibc0.json ibc0 - $ %s chains add --url https://relayer.com/ibc0.json ibc0`, appName, appName, appName, appName), + $ %s chains add --url https://relayer.com/ibc0.json ibc0`, appName, appName, appName, appName, appName), RunE: func(cmd *cobra.Command, args []string) error { file, url, err := getAddInputs(cmd) if err != nil { @@ -447,7 +448,7 @@ func addChainsFromRegistry(ctx context.Context, a *appState, chains []string) er continue } - chainConfig, err := chainInfo.GetChainConfig(ctx) + chainConfig, err := chainInfo.GetChainConfig(ctx, chain) if err != nil { a.log.Warn( "Error generating chain config", diff --git a/cregistry/chain_info.go b/cregistry/chain_info.go index 4d49017f0..7309296e2 100644 --- a/cregistry/chain_info.go +++ b/cregistry/chain_info.go @@ -206,8 +206,8 @@ func (c ChainInfo) GetRandomRPCEndpoint(ctx context.Context) (string, error) { } // GetAssetList returns the asset metadata from the cosmos chain registry for this particular chain. -func (c ChainInfo) GetAssetList(ctx context.Context) (AssetList, error) { - chainRegURL := fmt.Sprintf("https://raw.githubusercontent.com/cosmos/chain-registry/master/%s/assetlist.json", c.ChainName) +func (c ChainInfo) GetAssetList(ctx context.Context, name string) (AssetList, error) { + chainRegURL := fmt.Sprintf("https://raw.githubusercontent.com/cosmos/chain-registry/master/%s/assetlist.json", name) res, err := http.Get(chainRegURL) if err != nil { @@ -236,11 +236,11 @@ func (c ChainInfo) GetAssetList(ctx context.Context) (AssetList, error) { // GetChainConfig returns a CosmosProviderConfig composed from the details found in the cosmos chain registry for // this particular chain. -func (c ChainInfo) GetChainConfig(ctx context.Context) (*cosmos.CosmosProviderConfig, error) { +func (c ChainInfo) GetChainConfig(ctx context.Context, name string) (*cosmos.CosmosProviderConfig, error) { debug := viper.GetBool("debug") home := viper.GetString("home") - assetList, err := c.GetAssetList(ctx) + assetList, err := c.GetAssetList(ctx, name) if err != nil { return nil, err }