Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
agouin authored Jul 27, 2023
2 parents e6f6475 + 993c21b commit b22f408
Show file tree
Hide file tree
Showing 8 changed files with 113 additions and 15 deletions.
5 changes: 3 additions & 2 deletions cmd/chains.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,9 +291,10 @@ func chainsAddCmd(a *appState) *cobra.Command {
" the chain-registry or passing a file (-f) or url (-u)",
Args: withUsage(cobra.MinimumNArgs(0)),
Example: fmt.Sprintf(` $ %s chains add cosmoshub
$ %s chains add testnets/cosmoshubtestnet
$ %s chains add cosmoshub osmosis
$ %s chains add --file chains/ibc0.json ibc0
$ %s chains add --url https://relayer.com/ibc0.json ibc0`, appName, appName, appName, appName),
$ %s chains add --url https://relayer.com/ibc0.json ibc0`, appName, appName, appName, appName, appName),
RunE: func(cmd *cobra.Command, args []string) error {
file, url, err := getAddInputs(cmd)
if err != nil {
Expand Down Expand Up @@ -461,7 +462,7 @@ func addChainsFromRegistry(ctx context.Context, a *appState, chains []string) er
continue
}

chainConfig, err := chainInfo.GetChainConfig(ctx)
chainConfig, err := chainInfo.GetChainConfig(ctx, chain)
if err != nil {
a.log.Warn(
"Error generating chain config",
Expand Down
7 changes: 6 additions & 1 deletion cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package cmd
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"io/ioutil"
Expand Down Expand Up @@ -393,7 +394,11 @@ func UnmarshalJSONProviderConfig(data []byte, customTypes map[string]reflect.Typ
return nil, err
}

typeName := m["type"].(string)
typeName, ok := m["type"].(string)
if !ok {
return nil, errors.New("cannot find type");
}

var provCfg provider.ProviderConfig
if ty, found := customTypes[typeName]; found {
provCfg = reflect.New(ty).Interface().(provider.ProviderConfig)
Expand Down
8 changes: 4 additions & 4 deletions cregistry/chain_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,8 @@ func (c ChainInfo) GetRandomRPCEndpoint(ctx context.Context) (string, error) {
}

// GetAssetList returns the asset metadata from the cosmos chain registry for this particular chain.
func (c ChainInfo) GetAssetList(ctx context.Context) (AssetList, error) {
chainRegURL := fmt.Sprintf("https://raw.githubusercontent.com/cosmos/chain-registry/master/%s/assetlist.json", c.ChainName)
func (c ChainInfo) GetAssetList(ctx context.Context, name string) (AssetList, error) {
chainRegURL := fmt.Sprintf("https://raw.githubusercontent.com/cosmos/chain-registry/master/%s/assetlist.json", name)

res, err := http.Get(chainRegURL)
if err != nil {
Expand Down Expand Up @@ -236,11 +236,11 @@ func (c ChainInfo) GetAssetList(ctx context.Context) (AssetList, error) {

// GetChainConfig returns a CosmosProviderConfig composed from the details found in the cosmos chain registry for
// this particular chain.
func (c ChainInfo) GetChainConfig(ctx context.Context) (*cosmos.CosmosProviderConfig, error) {
func (c ChainInfo) GetChainConfig(ctx context.Context, name string) (*cosmos.CosmosProviderConfig, error) {
debug := viper.GetBool("debug")
home := viper.GetString("home")

assetList, err := c.GetAssetList(ctx)
assetList, err := c.GetAssetList(ctx, name)
if err != nil {
return nil, err
}
Expand Down
13 changes: 12 additions & 1 deletion docs/advanced_usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,19 @@
**Prometheus exporter**

If you started `rly` with the default `--debug-addr` argument,
you can use `http://$IP:7597/relayer/metrics` as a target for your prometheus scraper.
you can use `http://$IP:5183/relayer/metrics` as a target for your prometheus scraper.


Exported metrics:

| **Exported Metric** | **Description** | **Type** |
|:----------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:--------:|
| cosmos_relayer_observed_packets | The total number of observed packets | Counter |
| cosmos_relayer_relayed_packets | The total number of relayed packets | Counter |
| cosmos_relayer_chain_latest_height | The current height of the chain | Gauge |
| cosmos_relayer_wallet_balance | The current balance for the relayer's wallet | Gauge |
| cosmos_relayer_fees_spent | The amount of fees spent from the relayer's wallet | Gauge |
| cosmos_relayer_tx_failure | <br>The total number of tx failures broken up into catagories .<br>Categories:<br> - "packet messages are redundant"<br> - "insufficient funds"<br> - "invalid coins"<br> - "out of gas"<br> - "incorrect account sequence" <br><br> "Tx Failure" is the the catch all bucket| Counter |
**Example metrics**

```
Expand Down
8 changes: 7 additions & 1 deletion relayer/chains/cosmos/cosmos_chain_processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -386,12 +386,18 @@ func (ccp *CosmosChainProcessor) queryCycle(ctx context.Context, persistence *qu
queryCtx, cancelQueryCtx := context.WithTimeout(ctx, blockResultsQueryTimeout)
defer cancelQueryCtx()
blockRes, err = ccp.chainProvider.RPCClient.BlockResults(queryCtx, &i)
if err != nil && ccp.metrics != nil {
ccp.metrics.IncBlockQueryFailure(chainID, "RPC Client")
}
return err
})
eg.Go(func() (err error) {
queryCtx, cancelQueryCtx := context.WithTimeout(ctx, queryTimeout)
defer cancelQueryCtx()
ibcHeader, err = ccp.chainProvider.QueryIBCHeader(queryCtx, i)
if err != nil && ccp.metrics != nil {
ccp.metrics.IncBlockQueryFailure(chainID, "IBC Header")
}
return err
})

Expand Down Expand Up @@ -550,6 +556,6 @@ func (ccp *CosmosChainProcessor) CurrentRelayerBalance(ctx context.Context) {
bal := relayerWalletBalances.AmountOf(gasDenom.Denom)
// Convert to a big float to get a float64 for metrics
f, _ := big.NewFloat(0.0).SetInt(bal.BigInt()).Float64()
ccp.metrics.SetWalletBalance(ccp.chainProvider.ChainId(), ccp.chainProvider.Key(), address, gasDenom.Denom, f)
ccp.metrics.SetWalletBalance(ccp.chainProvider.ChainId(), ccp.chainProvider.PCfg.GasPrices, ccp.chainProvider.Key(), address, gasDenom.Denom, f)
}
}
2 changes: 1 addition & 1 deletion relayer/chains/cosmos/tx.go
Original file line number Diff line number Diff line change
Expand Up @@ -1596,7 +1596,7 @@ func (cc *CosmosProvider) UpdateFeesSpent(chain, key, address string, fees sdk.C
for _, fee := range cc.TotalFees {
// Convert to a big float to get a float64 for metrics
f, _ := big.NewFloat(0.0).SetInt(fee.Amount.BigInt()).Float64()
cc.metrics.SetFeesSpent(chain, key, address, fee.GetDenom(), f)
cc.metrics.SetFeesSpent(chain, cc.PCfg.GasPrices, key, address, fee.GetDenom(), f)
}
}

Expand Down
43 changes: 43 additions & 0 deletions relayer/processor/message_processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"sync"
"time"

sdkerrors "github.com/cosmos/cosmos-sdk/types/errors"
chantypes "github.com/cosmos/ibc-go/v7/modules/core/04-channel/types"
ibcexported "github.com/cosmos/ibc-go/v7/modules/core/exported"
"github.com/cosmos/relayer/v2/relayer/provider"
Expand All @@ -33,6 +34,9 @@ type messageProcessor struct {
isLocalhost bool
}

// catagories of tx errors for a Prometheus counter. If the error doesnt fall into one of the below categories, it is labeled as "Tx Failure"
var promErrorCatagories = []error{chantypes.ErrRedundantTx, sdkerrors.ErrInsufficientFunds, sdkerrors.ErrInvalidCoins, sdkerrors.ErrOutOfGas, sdkerrors.ErrWrongSequence}

// trackMessage stores the message tracker in the correct slice and index based on the type.
func (mp *messageProcessor) trackMessage(tracker messageToTrack, i int) {
switch t := tracker.(type) {
Expand Down Expand Up @@ -141,8 +145,14 @@ func (mp *messageProcessor) shouldUpdateClientNow(ctx context.Context, src, dst

shouldUpdateClientNow := enoughBlocksPassed && (pastTwoThirdsTrustingPeriod || pastConfiguredClientUpdateThreshold)

if mp.metrics != nil {
timeToExpiration := dst.clientState.TrustingPeriod - time.Since(consensusHeightTime)
mp.metrics.SetClientExpiration(src.info.PathName, dst.info.ChainID, dst.clientState.ClientID, fmt.Sprint(dst.clientState.TrustingPeriod.String()), timeToExpiration)
}

if shouldUpdateClientNow {
mp.log.Info("Client update threshold condition met",
zap.String("path_name", src.info.PathName),
zap.String("chain_id", dst.info.ChainID),
zap.String("client_id", dst.info.ClientID),
zap.Int64("trusting_period", dst.clientState.TrustingPeriod.Milliseconds()),
Expand Down Expand Up @@ -249,6 +259,7 @@ func (mp *messageProcessor) assembleMsgUpdateClient(ctx context.Context, src, ds
clientConsensusHeight.RevisionHeight+1, src.info.ChainID, err)
}
mp.log.Debug("Had to query for client trusted IBC header",
zap.String("path_name", src.info.PathName),
zap.String("chain_id", src.info.ChainID),
zap.String("counterparty_chain_id", dst.info.ChainID),
zap.String("counterparty_client_id", clientID),
Expand Down Expand Up @@ -354,6 +365,16 @@ func (mp *messageProcessor) sendClientUpdate(
zap.String("dst_client_id", dst.info.ClientID),
zap.Error(err),
)

for _, promError := range promErrorCatagories {
if mp.metrics != nil {
if errors.Is(err, promError) {
mp.metrics.IncTxFailure(src.info.PathName, src.info.ChainID, promError.Error())
} else {
mp.metrics.IncTxFailure(src.info.PathName, src.info.ChainID, "Tx Failure")
}
}
}
return
}
dst.log.Debug("Client update broadcast completed")
Expand Down Expand Up @@ -447,6 +468,17 @@ func (mp *messageProcessor) sendBatchMessages(
zap.String("dst_client_id", dst.info.ClientID),
zap.Error(err),
}

for _, promError := range promErrorCatagories {
if mp.metrics != nil {
if errors.Is(err, promError) {
mp.metrics.IncTxFailure(src.info.PathName, src.info.ChainID, promError.Error())
} else {
mp.metrics.IncTxFailure(src.info.PathName, src.info.ChainID, "Tx Failure")
}
}
}

if errors.Is(err, chantypes.ErrRedundantTx) {
mp.log.Debug("Redundant message(s)", errFields...)
return
Expand Down Expand Up @@ -523,6 +555,17 @@ func (mp *messageProcessor) sendSingleMessage(
zap.String("src_client_id", src.info.ClientID),
zap.String("dst_client_id", dst.info.ClientID),
}

for _, promError := range promErrorCatagories {
if mp.metrics != nil {
if errors.Is(err, promError) {
mp.metrics.IncTxFailure(src.info.PathName, src.info.ChainID, promError.Error())
} else {
mp.metrics.IncTxFailure(src.info.PathName, src.info.ChainID, "Tx Failure")
}
}
}

errFields = append(errFields, zap.Object("msg", tracker))
errFields = append(errFields, zap.Error(err))
if errors.Is(err, chantypes.ErrRedundantTx) {
Expand Down
42 changes: 37 additions & 5 deletions relayer/processor/metrics.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package processor

import (
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
Expand All @@ -12,6 +14,9 @@ type PrometheusMetrics struct {
LatestHeightGauge *prometheus.GaugeVec
WalletBalance *prometheus.GaugeVec
FeesSpent *prometheus.GaugeVec
TxFailureError *prometheus.CounterVec
BlockQueryFailure *prometheus.CounterVec
ClientExpiration *prometheus.GaugeVec
}

func (m *PrometheusMetrics) AddPacketsObserved(path, chain, channel, port, eventType string, count int) {
Expand All @@ -26,18 +31,33 @@ func (m *PrometheusMetrics) SetLatestHeight(chain string, height int64) {
m.LatestHeightGauge.WithLabelValues(chain).Set(float64(height))
}

func (m *PrometheusMetrics) SetWalletBalance(chain, key, address, denom string, balance float64) {
m.WalletBalance.WithLabelValues(chain, key, address, denom).Set(balance)
func (m *PrometheusMetrics) SetWalletBalance(chain, gasPrice, key, address, denom string, balance float64) {
m.WalletBalance.WithLabelValues(chain, gasPrice, key, address, denom).Set(balance)
}

func (m *PrometheusMetrics) SetFeesSpent(chain, gasPrice, key, address, denom string, amount float64) {
m.FeesSpent.WithLabelValues(chain, gasPrice, key, address, denom).Set(amount)
}

func (m *PrometheusMetrics) SetClientExpiration(pathName, chain, clientID, trustingPeriod string, timeToExpiration time.Duration) {
m.ClientExpiration.WithLabelValues(pathName, chain, clientID, trustingPeriod).Set(timeToExpiration.Seconds())
}

func (m *PrometheusMetrics) IncBlockQueryFailure(chain, err string) {
m.BlockQueryFailure.WithLabelValues(chain, err).Inc()
}

func (m *PrometheusMetrics) SetFeesSpent(chain, key, address, denom string, amount float64) {
m.FeesSpent.WithLabelValues(chain, key, address, denom).Set(amount)
func (m *PrometheusMetrics) IncTxFailure(path, chain, errDesc string) {
m.TxFailureError.WithLabelValues(path, chain, errDesc).Inc()
}

func NewPrometheusMetrics() *PrometheusMetrics {
packetLabels := []string{"path", "chain", "channel", "port", "type"}
heightLabels := []string{"chain"}
walletLabels := []string{"chain", "key", "address", "denom"}
txFailureLabels := []string{"path", "chain", "cause"}
blockQueryFailureLabels := []string{"chain", "type"}
walletLabels := []string{"chain", "gas_price", "key", "address", "denom"}
clientExpirationLables := []string{"path_name", "chain", "client_id", "trusting_period"}
registry := prometheus.NewRegistry()
registerer := promauto.With(registry)
return &PrometheusMetrics{
Expand All @@ -62,5 +82,17 @@ func NewPrometheusMetrics() *PrometheusMetrics {
Name: "cosmos_relayer_fees_spent",
Help: "The amount of fees spent from the relayer's wallet",
}, walletLabels),
TxFailureError: registerer.NewCounterVec(prometheus.CounterOpts{
Name: "cosmos_relayer_tx_errors_total",
Help: "The total number of tx failures broken up into categories. See https://github.com/cosmos/relayer/blob/main/docs/advanced_usage.md#monitoring for list of catagories. 'Tx Failure' is the catch-all category",
}, txFailureLabels),
BlockQueryFailure: registerer.NewCounterVec(prometheus.CounterOpts{
Name: "cosmos_relayer_block_query_errors_total",
Help: "The total number of block query failures. The failures are separated into two catagories: 'RPC Client' and 'IBC Header'",
}, blockQueryFailureLabels),
ClientExpiration: registerer.NewGaugeVec(prometheus.GaugeOpts{
Name: "cosmos_relayer_client_expiration_seconds",
Help: "Seconds until the client expires",
}, clientExpirationLables),
}
}

0 comments on commit b22f408

Please sign in to comment.