-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix/aws s3 collect multiple profiles #174
Changes from 4 commits
219daa6
0fb7a6b
fb992dc
4b2608d
0a90223
c855a7a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,7 +28,8 @@ import ( | |
|
||
func providerFlags(fs *flag.FlagSet, cfg *config.Config) { | ||
flag.StringVar(&cfg.Provider, "provider", "aws", "aws or gcp") | ||
fs.Var(&cfg.Providers.AWS.Profiles, "aws.profile", "AWS profile(s).") | ||
fs.StringVar(&cfg.Providers.AWS.Profile, "aws.profile", "", "AWS Profile to authenticate with.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto to comment above ^ This doesn't seem to be used at all |
||
fs.Var(&cfg.Providers.AWS.Profiles, "aws.profiles", "AWS Profiles to collect resources from.") | ||
// TODO: RENAME THIS TO JUST PROJECTS | ||
fs.Var(&cfg.Providers.GCP.Projects, "gcp.bucket-projects", "GCP project(s).") | ||
fs.Var(&cfg.Providers.AWS.Services, "aws.services", "AWS service(s).") | ||
|
@@ -51,7 +52,8 @@ func selectProvider(cfg *config.Config) (provider.Provider, error) { | |
case "aws": | ||
return aws.New(&aws.Config{ | ||
Region: cfg.Providers.AWS.Region, | ||
Profile: cfg.Providers.AWS.Profiles.String(), | ||
Profile: cfg.Providers.AWS.Profile, | ||
Profiles: strings.Split(cfg.Providers.AWS.Profiles.String(), ","), | ||
ScrapeInterval: cfg.Collector.ScrapeInterval, | ||
Services: strings.Split(cfg.Providers.AWS.Services.String(), ","), | ||
}) | ||
|
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -8,7 +8,10 @@ import ( | |||||||||
"time" | ||||||||||
|
||||||||||
awsconfig "github.com/aws/aws-sdk-go-v2/config" | ||||||||||
"github.com/aws/aws-sdk-go-v2/service/costexplorer" | ||||||||||
awscostexplorer "github.com/aws/aws-sdk-go-v2/service/costexplorer" | ||||||||||
|
||||||||||
"github.com/grafana/cloudcost-exporter/pkg/aws/costexplorer" | ||||||||||
|
||||||||||
Comment on lines
+12
to
+14
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit GoIsm: remove the empty lines (not the one in line 9, tho). |
||||||||||
"github.com/prometheus/client_golang/prometheus" | ||||||||||
|
||||||||||
cloudcost_exporter "github.com/grafana/cloudcost-exporter" | ||||||||||
|
@@ -20,6 +23,7 @@ type Config struct { | |||||||||
Services []string | ||||||||||
Region string | ||||||||||
Profile string | ||||||||||
Profiles []string | ||||||||||
ScrapeInterval time.Duration | ||||||||||
} | ||||||||||
|
||||||||||
|
@@ -98,26 +102,30 @@ func New(config *Config) (*AWS, error) { | |||||||||
for _, service := range services { | ||||||||||
switch service { | ||||||||||
case "S3": | ||||||||||
// There are two scenarios: | ||||||||||
// 1. Running locally, the user must pass in a region and profile to use | ||||||||||
// 2. Running within an EC2 instance and the region and profile can be derived | ||||||||||
// I'm going to use the AWS SDK to handle this for me. If the user has provided a region and profile, it will use that. | ||||||||||
// If not, it will use the EC2 instance metadata service to determine the region and credentials. | ||||||||||
// This is the same logic that the AWS CLI uses, so it should be fine. | ||||||||||
options := []func(*awsconfig.LoadOptions) error{awsconfig.WithEC2IMDSRegion()} | ||||||||||
if config.Region != "" { | ||||||||||
options = append(options, awsconfig.WithRegion(config.Region)) | ||||||||||
} | ||||||||||
if config.Profile != "" { | ||||||||||
options = append(options, awsconfig.WithSharedConfigProfile(config.Profile)) | ||||||||||
} | ||||||||||
ac, err := awsconfig.LoadDefaultConfig(context.Background(), options...) | ||||||||||
if err != nil { | ||||||||||
return nil, err | ||||||||||
var clients []costexplorer.CostExplorer | ||||||||||
for _, profile := range config.Profiles { | ||||||||||
// There are two scenarios: | ||||||||||
// 1. Running locally, the user must pass in a region and profile to use | ||||||||||
// 2. Running within an EC2 instance and the region and profile can be derived | ||||||||||
// I'm going to use the AWS SDK to handle this for me. If the user has provided a region and profile, it will use that. | ||||||||||
// If not, it will use the EC2 instance metadata service to determine the region and credentials. | ||||||||||
// This is the same logic that the AWS CLI uses, so it should be fine. | ||||||||||
options := []func(*awsconfig.LoadOptions) error{awsconfig.WithEC2IMDSRegion()} | ||||||||||
if config.Region != "" { | ||||||||||
options = append(options, awsconfig.WithRegion(config.Region)) | ||||||||||
} | ||||||||||
if config.Profile != "" { | ||||||||||
options = append(options, awsconfig.WithSharedConfigProfile(profile)) | ||||||||||
} | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure what's going on here. You check here that the single Basically the What am I missing? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pretty on point question. Mainly, naming is hard 😅 It looks like we're not using the
I think the initial intent of the check you see is when doing local development to figure out which profile we should target. In dev/production that should be handled by: options := []func(*awsconfig.LoadOptions) error{awsconfig.WithEC2IMDSRegion()} Let me remove the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. agreed here, it looks like this might've been a misstep. It now appears that we can't even use the flags separately, they must be used together, as specifying There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
I'd suggest something like this, perhaps with a conditional flag to look if Or better yet, I'd just eliminate the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've opted to stick with
options := []func(*awsconfig.LoadOptions) error{awsconfig.WithEC2IMDSRegion()} I need to be a bit mindful when rolling this change out. If the |
||||||||||
ac, err := awsconfig.LoadDefaultConfig(context.Background(), options...) | ||||||||||
if err != nil { | ||||||||||
return nil, err | ||||||||||
} | ||||||||||
client := awscostexplorer.NewFromConfig(ac) | ||||||||||
clients = append(clients, client) | ||||||||||
} | ||||||||||
|
||||||||||
client := costexplorer.NewFromConfig(ac) | ||||||||||
collector, err := s3.New(config.ScrapeInterval, client) | ||||||||||
collector, err := s3.New(config.ScrapeInterval, clients) | ||||||||||
if err != nil { | ||||||||||
return nil, fmt.Errorf("error creating s3 collector: %w", err) | ||||||||||
} | ||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -122,7 +122,7 @@ func NewMetrics() Metrics { | |
// Collector is the AWS implementation of the Collector interface | ||
// It is responsible for registering and collecting metrics | ||
type Collector struct { | ||
client costexplorer.CostExplorer | ||
clients []costexplorer.CostExplorer | ||
interval time.Duration | ||
nextScrape time.Time | ||
metrics Metrics | ||
|
@@ -145,9 +145,9 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) error { | |
} | ||
|
||
// New creates a new Collector with a client and scrape interval defined. | ||
func New(scrapeInterval time.Duration, client costexplorer.CostExplorer) (*Collector, error) { | ||
func New(scrapeInterval time.Duration, clients []costexplorer.CostExplorer) (*Collector, error) { | ||
return &Collector{ | ||
client: client, | ||
clients: clients, | ||
interval: scrapeInterval, | ||
// Initially Set nextScrape to the current time minus the scrape interval so that the first scrape will run immediately | ||
nextScrape: time.Now().Add(-scrapeInterval), | ||
|
@@ -171,24 +171,29 @@ func (c *Collector) Register(registry provider.Registry) error { | |
return nil | ||
} | ||
|
||
// Collect is the function that will be called by the Prometheus client anytime a scrape is performed. | ||
// CollectMetrics Collect is the function that will be called by the Prometheus client anytime a scrape is performed. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note, I have not tested this at length, but anecdotally it has improved the performance of collecting multiple profiles: // CollectMetrics Collect is the function that will be called by the Prometheus client anytime a scrape is performed.
func (c *Collector) CollectMetrics(ch chan<- prometheus.Metric) float64 {
c.m.Lock()
defer c.m.Unlock()
now := time.Now()
// :fire: Checking scrape interval is to _mitigate_ expensive API calls to the cost explorer API
if c.billingData == nil || now.After(c.nextScrape) {
eg := new(errgroup.Group)
outputsLock := sync.Mutex{}
var billingOutputs []*awscostexplorer.GetCostAndUsageOutput
for _, client := range c.clients {
eg.Go(func() error {
endDate := time.Now().AddDate(0, 0, -1)
// Current assumption is that we're going to pull 30 days worth of billing data
startDate := endDate.AddDate(0, 0, -30)
billingData, err := getBillingData(client, startDate, endDate, c.metrics)
if err != nil {
return err
}
outputsLock.Lock()
billingOutputs = append(billingOutputs, billingData...)
outputsLock.Unlock()
return nil
})
}
err := eg.Wait()
if err != nil {
log.Printf("Error getting billing data: %v\n", err)
return 0
}
c.nextScrape = time.Now().Add(c.interval)
c.metrics.NextScrapeGauge.Set(float64(c.nextScrape.Unix()))
c.billingData = parseBillingData(billingOutputs)
}
exportMetrics(c.billingData, c.metrics)
return 1.0
} I can't speak to the validity of the results, but running it this way seemed to speed things up.... |
||
func (c *Collector) CollectMetrics(ch chan<- prometheus.Metric) float64 { | ||
c.m.Lock() | ||
defer c.m.Unlock() | ||
now := time.Now() | ||
// :fire: Checking scrape interval is to _mitigate_ expensive API calls to the cost explorer API | ||
|
||
if c.billingData == nil || now.After(c.nextScrape) { | ||
endDate := time.Now().AddDate(0, 0, -1) | ||
// Current assumption is that we're going to pull 30 days worth of billing data | ||
startDate := endDate.AddDate(0, 0, -30) | ||
billingData, err := getBillingData(c.client, startDate, endDate, c.metrics) | ||
if err != nil { | ||
log.Printf("Error getting billing data: %v\n", err) | ||
return 0 | ||
var billingOutputs []*awscostexplorer.GetCostAndUsageOutput | ||
for _, client := range c.clients { | ||
endDate := time.Now().AddDate(0, 0, -1) | ||
// Current assumption is that we're going to pull 30 days worth of billing data | ||
startDate := endDate.AddDate(0, 0, -30) | ||
billingData, err := getBillingData(client, startDate, endDate, c.metrics) | ||
if err != nil { | ||
log.Printf("Error getting billing data: %v\n", err) | ||
return 0 | ||
} | ||
billingOutputs = append(billingOutputs, billingData...) | ||
c.nextScrape = time.Now().Add(c.interval) | ||
c.metrics.NextScrapeGauge.Set(float64(c.nextScrape.Unix())) | ||
} | ||
c.billingData = billingData | ||
c.nextScrape = time.Now().Add(c.interval) | ||
c.metrics.NextScrapeGauge.Set(float64(c.nextScrape.Unix())) | ||
c.billingData = parseBillingData(billingOutputs) | ||
} | ||
|
||
exportMetrics(c.billingData, c.metrics) | ||
|
@@ -278,7 +283,7 @@ func (s *BillingData) AddMetricGroup(region string, component string, group type | |
|
||
// getBillingData is responsible for making the API call to the AWS Cost Explorer API and parsing the response | ||
// into a S3BillingData struct | ||
func getBillingData(client costexplorer.CostExplorer, startDate time.Time, endDate time.Time, m Metrics) (*BillingData, error) { | ||
func getBillingData(client costexplorer.CostExplorer, startDate time.Time, endDate time.Time, m Metrics) ([]*awscostexplorer.GetCostAndUsageOutput, error) { | ||
log.Printf("Getting billing data for %s to %s\n", startDate.Format("2006-01-02"), endDate.Format("2006-01-02")) | ||
input := &awscostexplorer.GetCostAndUsageInput{ | ||
TimePeriod: &types.DateInterval{ | ||
|
@@ -309,7 +314,7 @@ func getBillingData(client costexplorer.CostExplorer, startDate time.Time, endDa | |
if err != nil { | ||
log.Printf("Error getting cost and usage: %v\n", err) | ||
m.RequestErrorsCount.Inc() | ||
return &BillingData{}, err | ||
return nil, err | ||
} | ||
outputs = append(outputs, output) | ||
if output.NextPageToken == nil { | ||
|
@@ -318,7 +323,7 @@ func getBillingData(client costexplorer.CostExplorer, startDate time.Time, endDa | |
input.NextPageToken = output.NextPageToken | ||
} | ||
|
||
return parseBillingData(outputs), nil | ||
return outputs, nil | ||
} | ||
|
||
// parseBillingData takes the output from the AWS Cost Explorer API and parses it into a S3BillingData struct | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The only thing I'm confused about now is why even have this at all? It's not used anywhere as far as I can tell -