Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/aws s3 collect multiple profiles #174

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/exporter/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ type Config struct {
ProjectID string
Providers struct {
AWS struct {
Profile string
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only thing I'm confused about now is why even have this at all? It's not used anywhere as far as I can tell -

func selectProvider(cfg *config.Config) (provider.Provider, error) {
	switch cfg.Provider {
	case "aws":
		return aws.New(&aws.Config{
			Region:         cfg.Providers.AWS.Region,
			Profiles:       strings.Split(cfg.Providers.AWS.Profiles.String(), ","),
			ScrapeInterval: cfg.Collector.ScrapeInterval,
			Services:       strings.Split(cfg.Providers.AWS.Services.String(), ","),
		})

	case "gcp":
		return google.New(&google.Config{
			ProjectId:       cfg.ProjectID,
			Region:          cfg.Providers.GCP.Region,
			Projects:        cfg.Providers.GCP.Projects.String(),
			DefaultDiscount: cfg.Providers.GCP.DefaultGCSDiscount,
			ScrapeInterval:  cfg.Collector.ScrapeInterval,
			Services:        strings.Split(cfg.Providers.GCP.Services.String(), ","),
		})

	default:
		return nil, fmt.Errorf("unknown provider")
	}
}

Profiles StringSliceFlag
Region string
Services StringSliceFlag
Expand Down
5 changes: 3 additions & 2 deletions cmd/exporter/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ import (

func providerFlags(fs *flag.FlagSet, cfg *config.Config) {
flag.StringVar(&cfg.Provider, "provider", "aws", "aws or gcp")
fs.Var(&cfg.Providers.AWS.Profiles, "aws.profile", "AWS profile(s).")
fs.StringVar(&cfg.Providers.AWS.Profile, "aws.profile", "", "AWS Profile to authenticate with.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto to comment above ^

This doesn't seem to be used at all

fs.Var(&cfg.Providers.AWS.Profiles, "aws.profiles", "AWS Profiles to collect resources from.")
// TODO: RENAME THIS TO JUST PROJECTS
fs.Var(&cfg.Providers.GCP.Projects, "gcp.bucket-projects", "GCP project(s).")
fs.Var(&cfg.Providers.AWS.Services, "aws.services", "AWS service(s).")
Expand All @@ -51,7 +52,7 @@ func selectProvider(cfg *config.Config) (provider.Provider, error) {
case "aws":
return aws.New(&aws.Config{
Region: cfg.Providers.AWS.Region,
Profile: cfg.Providers.AWS.Profiles.String(),
Profiles: strings.Split(cfg.Providers.AWS.Profiles.String(), ","),
ScrapeInterval: cfg.Collector.ScrapeInterval,
Services: strings.Split(cfg.Providers.AWS.Services.String(), ","),
})
Expand Down
46 changes: 27 additions & 19 deletions pkg/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ import (
"time"

awsconfig "github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/service/costexplorer"
awscostexplorer "github.com/aws/aws-sdk-go-v2/service/costexplorer"

"github.com/grafana/cloudcost-exporter/pkg/aws/costexplorer"

Comment on lines +12 to +14
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit GoIsm: remove the empty lines (not the one in line 9, tho).

"github.com/prometheus/client_golang/prometheus"

cloudcost_exporter "github.com/grafana/cloudcost-exporter"
Expand All @@ -19,7 +22,7 @@ import (
type Config struct {
Services []string
Region string
Profile string
Profiles []string
ScrapeInterval time.Duration
}

Expand Down Expand Up @@ -98,26 +101,31 @@ func New(config *Config) (*AWS, error) {
for _, service := range services {
switch service {
case "S3":
// There are two scenarios:
// 1. Running locally, the user must pass in a region and profile to use
// 2. Running within an EC2 instance and the region and profile can be derived
// I'm going to use the AWS SDK to handle this for me. If the user has provided a region and profile, it will use that.
// If not, it will use the EC2 instance metadata service to determine the region and credentials.
// This is the same logic that the AWS CLI uses, so it should be fine.
options := []func(*awsconfig.LoadOptions) error{awsconfig.WithEC2IMDSRegion()}
if config.Region != "" {
options = append(options, awsconfig.WithRegion(config.Region))
var clients []costexplorer.CostExplorer
if len(config.Profiles) == 0 {
return nil, fmt.Errorf("no profiles provided")
Comment on lines +105 to +106
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only problem is this doesn't quite work. If you split an empty string, it results in a length one array 😡

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we either check that:

  • cfg.Providers.AWS.Profiles != "" when defined
  • move the check to the loop:
case "S3":
			var clients []costexplorer.CostExplorer
			for _, profile := range config.Profiles {
				if profile == "" {
					return nil, fmt.Errorf("invalid profile provided")
				}
				// There are two scenarios:
				// 1. Running locally, the user must pass in a region and profile to use
				// 2. Running within an EC2 instance and the region and profile can be derived
				// I'm going to use the AWS SDK to handle this for me. If the user has provided a region and profile, it will use that.
				// If not, it will use the EC2 instance metadata service to determine the region and credentials.
				// This is the same logic that the AWS CLI uses, so it should be fine.
				options := []func(*awsconfig.LoadOptions) error{awsconfig.WithEC2IMDSRegion()}
				if config.Region != "" {
					options = append(options, awsconfig.WithRegion(config.Region))
				}
				options = append(options, awsconfig.WithSharedConfigProfile(profile))
				ac, err := awsconfig.LoadDefaultConfig(context.Background(), options...)
				if err != nil {
					return nil, err
				}
				client := awscostexplorer.NewFromConfig(ac)
				clients = append(clients, client)
			}

			collector, err := s3.New(config.ScrapeInterval, clients)
			if err != nil {
				return nil, fmt.Errorf("error creating s3 collector: %w", err)
			}
			collectors = append(collectors, collector)```

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(suggestion) We probably want to test this, and in order to make it simpler, I'd suggest adding a package variable var ErrNoProfiles = errors.New("no profiles provided") (and BTW, better to use errors.New instead of fmt.Errorf for simple strings). By adding this error then you can use errors.Is in the test.

}
if config.Profile != "" {
options = append(options, awsconfig.WithSharedConfigProfile(config.Profile))
}
ac, err := awsconfig.LoadDefaultConfig(context.Background(), options...)
if err != nil {
return nil, err
for _, profile := range config.Profiles {
// There are two scenarios:
// 1. Running locally, the user must pass in a region and profile to use
// 2. Running within an EC2 instance and the region and profile can be derived
// I'm going to use the AWS SDK to handle this for me. If the user has provided a region and profile, it will use that.
// If not, it will use the EC2 instance metadata service to determine the region and credentials.
// This is the same logic that the AWS CLI uses, so it should be fine.
options := []func(*awsconfig.LoadOptions) error{awsconfig.WithEC2IMDSRegion()}
if config.Region != "" {
options = append(options, awsconfig.WithRegion(config.Region))
}
options = append(options, awsconfig.WithSharedConfigProfile(profile))
ac, err := awsconfig.LoadDefaultConfig(context.Background(), options...)
if err != nil {
return nil, err
}
client := awscostexplorer.NewFromConfig(ac)
clients = append(clients, client)
}

client := costexplorer.NewFromConfig(ac)
collector, err := s3.New(config.ScrapeInterval, client)
collector, err := s3.New(config.ScrapeInterval, clients)
if err != nil {
return nil, fmt.Errorf("error creating s3 collector: %w", err)
}
Expand Down
39 changes: 22 additions & 17 deletions pkg/aws/s3/s3.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ func NewMetrics() Metrics {
// Collector is the AWS implementation of the Collector interface
// It is responsible for registering and collecting metrics
type Collector struct {
client costexplorer.CostExplorer
clients []costexplorer.CostExplorer
interval time.Duration
nextScrape time.Time
metrics Metrics
Expand All @@ -145,9 +145,9 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error {
}

// New creates a new Collector with a client and scrape interval defined.
func New(scrapeInterval time.Duration, client costexplorer.CostExplorer) (*Collector, error) {
func New(scrapeInterval time.Duration, clients []costexplorer.CostExplorer) (*Collector, error) {
return &Collector{
client: client,
clients: clients,
interval: scrapeInterval,
// Initially Set nextScrape to the current time minus the scrape interval so that the first scrape will run immediately
nextScrape: time.Now().Add(-scrapeInterval),
Expand All @@ -171,24 +171,29 @@ func (c *Collector) Register(registry provider.Registry) error {
return nil
}

// Collect is the function that will be called by the Prometheus client anytime a scrape is performed.
// CollectMetrics Collect is the function that will be called by the Prometheus client anytime a scrape is performed.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note, I have not tested this at length, but anecdotally it has improved the performance of collecting multiple profiles:

// CollectMetrics Collect is the function that will be called by the Prometheus client anytime a scrape is performed.
func (c *Collector) CollectMetrics(ch chan<- prometheus.Metric) float64 {
	c.m.Lock()
	defer c.m.Unlock()
	now := time.Now()
	// :fire: Checking scrape interval is to _mitigate_ expensive API calls to the cost explorer API

	if c.billingData == nil || now.After(c.nextScrape) {
		eg := new(errgroup.Group)
		outputsLock := sync.Mutex{}
		var billingOutputs []*awscostexplorer.GetCostAndUsageOutput
		for _, client := range c.clients {
			eg.Go(func() error {
				endDate := time.Now().AddDate(0, 0, -1)
				// Current assumption is that we're going to pull 30 days worth of billing data
				startDate := endDate.AddDate(0, 0, -30)
				billingData, err := getBillingData(client, startDate, endDate, c.metrics)
				if err != nil {
					return err
				}
				outputsLock.Lock()
				billingOutputs = append(billingOutputs, billingData...)
				outputsLock.Unlock()
				return nil
			})
		}

		err := eg.Wait()
		if err != nil {
			log.Printf("Error getting billing data: %v\n", err)
			return 0
		}
		c.nextScrape = time.Now().Add(c.interval)
		c.metrics.NextScrapeGauge.Set(float64(c.nextScrape.Unix()))
		c.billingData = parseBillingData(billingOutputs)
	}

	exportMetrics(c.billingData, c.metrics)
	return 1.0
}

I can't speak to the validity of the results, but running it this way seemed to speed things up....

func (c *Collector) CollectMetrics(ch chan<- prometheus.Metric) float64 {
c.m.Lock()
defer c.m.Unlock()
now := time.Now()
// :fire: Checking scrape interval is to _mitigate_ expensive API calls to the cost explorer API

if c.billingData == nil || now.After(c.nextScrape) {
endDate := time.Now().AddDate(0, 0, -1)
// Current assumption is that we're going to pull 30 days worth of billing data
startDate := endDate.AddDate(0, 0, -30)
billingData, err := getBillingData(c.client, startDate, endDate, c.metrics)
if err != nil {
log.Printf("Error getting billing data: %v\n", err)
return 0
var billingOutputs []*awscostexplorer.GetCostAndUsageOutput
for _, client := range c.clients {
endDate := time.Now().AddDate(0, 0, -1)
// Current assumption is that we're going to pull 30 days worth of billing data
startDate := endDate.AddDate(0, 0, -30)
billingData, err := getBillingData(client, startDate, endDate, c.metrics)
if err != nil {
log.Printf("Error getting billing data: %v\n", err)
return 0
}
billingOutputs = append(billingOutputs, billingData...)
c.nextScrape = time.Now().Add(c.interval)
c.metrics.NextScrapeGauge.Set(float64(c.nextScrape.Unix()))
}
c.billingData = billingData
c.nextScrape = time.Now().Add(c.interval)
c.metrics.NextScrapeGauge.Set(float64(c.nextScrape.Unix()))
c.billingData = parseBillingData(billingOutputs)
}

exportMetrics(c.billingData, c.metrics)
Expand Down Expand Up @@ -278,7 +283,7 @@ func (s *BillingData) AddMetricGroup(region string, component string, group type

// getBillingData is responsible for making the API call to the AWS Cost Explorer API and parsing the response
// into a S3BillingData struct
func getBillingData(client costexplorer.CostExplorer, startDate time.Time, endDate time.Time, m Metrics) (*BillingData, error) {
func getBillingData(client costexplorer.CostExplorer, startDate time.Time, endDate time.Time, m Metrics) ([]*awscostexplorer.GetCostAndUsageOutput, error) {
log.Printf("Getting billing data for %s to %s\n", startDate.Format("2006-01-02"), endDate.Format("2006-01-02"))
input := &awscostexplorer.GetCostAndUsageInput{
TimePeriod: &types.DateInterval{
Expand Down Expand Up @@ -309,7 +314,7 @@ func getBillingData(client costexplorer.CostExplorer, startDate time.Time, endDa
if err != nil {
log.Printf("Error getting cost and usage: %v\n", err)
m.RequestErrorsCount.Inc()
return &BillingData{}, err
return nil, err
}
outputs = append(outputs, output)
if output.NextPageToken == nil {
Expand All @@ -318,7 +323,7 @@ func getBillingData(client costexplorer.CostExplorer, startDate time.Time, endDa
input.NextPageToken = output.NextPageToken
}

return parseBillingData(outputs), nil
return outputs, nil
}

// parseBillingData takes the output from the AWS Cost Explorer API and parses it into a S3BillingData struct
Expand Down
10 changes: 6 additions & 4 deletions pkg/aws/s3/s3_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"go.uber.org/mock/gomock"

mockcostexplorer "github.com/grafana/cloudcost-exporter/mocks/pkg/aws/costexplorer"
"github.com/grafana/cloudcost-exporter/pkg/aws/costexplorer"
mock_provider "github.com/grafana/cloudcost-exporter/pkg/provider/mocks"
)

Expand Down Expand Up @@ -180,7 +181,7 @@ func TestNewCollector(t *testing.T) {
t.Run(name, func(t *testing.T) {
c := mockcostexplorer.NewCostExplorer(t)

got, err := New(tt.args.interval, c)
got, err := New(tt.args.interval, []costexplorer.CostExplorer{c})
if tt.error {
require.Error(t, err)
return
Expand Down Expand Up @@ -600,9 +601,10 @@ cloudcost_aws_s3_storage_by_location_usd_per_gibyte_hour{class="StandardStorage"
}

c := &Collector{
client: ce,
clients: []costexplorer.CostExplorer{ce},
nextScrape: tc.nextScrape,
metrics: NewMetrics(),
//profiles: []string{"testing"},
}
up := c.CollectMetrics(nil)
require.Equal(t, tc.expectedResponse, up)
Expand Down Expand Up @@ -674,7 +676,7 @@ func TestCollector_MultipleCalls(t *testing.T) {
Return(&awscostexplorer.GetCostAndUsageOutput{}, nil)

c := &Collector{
client: ce,
clients: []costexplorer.CostExplorer{ce},
metrics: NewMetrics(),
interval: 1 * time.Hour,
}
Expand Down Expand Up @@ -739,7 +741,7 @@ func TestCollector_MultipleCalls(t *testing.T) {
Times(goroutines * collectCalls)

c := &Collector{
client: ce,
clients: []costexplorer.CostExplorer{ce},
metrics: NewMetrics(),
}

Expand Down
Loading