Skip to content
This repository was archived by the owner on Jul 21, 2025. It is now read-only.

Commit 4cf7d67

Browse files
Merge pull request #26 from HurielHernandez/master
Trigger email alerts, improve error logging, and add CircleCI
2 parents b23a2ba + d999a38 commit 4cf7d67

File tree

16 files changed

+549
-267
lines changed

16 files changed

+549
-267
lines changed

.circleci/config.yml

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
# Java Maven CircleCI 2.0 configuration file
2+
#
3+
# Check https://circleci.com/docs/2.0/language-java/ for more details
4+
#
5+
version: 2.1
6+
jobs:
7+
build_aphelion-service:
8+
docker:
9+
- image: circleci/openjdk:8u171-jdk
10+
working_directory: ~/repo
11+
environment:
12+
MAVEN_OPTS: -Xmx3200m
13+
steps:
14+
- checkout
15+
- setup_remote_docker
16+
- restore_cache:
17+
keys:
18+
# when lock file changes, use increasingly general patterns to restore cache
19+
- maven-repo-{{ .Branch }}-{{ checksum "aphelion/pom.xml" }}
20+
- maven-repo-{{ .Branch }}-
21+
- maven-repo
22+
- run:
23+
name: Run Tests and Build Backend Services
24+
command: |
25+
pushd aphelion
26+
mvn clean package
27+
- save_cache:
28+
paths:
29+
- ~/.m2
30+
key: maven-repo-{{ .Branch }}-{{ checksum "aphelion/pom.xml" }}
31+
- run:
32+
name: Save Tests
33+
command: |
34+
mkdir -p ~/junit
35+
find . -type f -regex ".*/target/surefire-reports/.*xml" -exec cp {} ~/junit/ \;
36+
- store_test_results:
37+
path: ~/junit
38+
- save_cache:
39+
key: aphelion-{{ .Branch }}-{{ epoch }}
40+
paths:
41+
- aphelion/target/aphelion-1.0.jar
42+
- run:
43+
name: Build Aphelion service container
44+
command: |
45+
ls -ltr
46+
cd aphelion
47+
ls -ltr
48+
docker build --build-arg jar_file=aphelion-1.0.jar -t aphelion/aphelion-service ./
49+
docker images
50+
- run:
51+
name: Push Aphelion service to docker repo
52+
command: |
53+
source .circleci/push.sh aphelion/aphelion-service aphelion-service
54+
55+
build_and_push_dashboard-service:
56+
working_directory: ~/repo
57+
docker:
58+
- image: docker:17.05.0-ce-git
59+
steps:
60+
- checkout
61+
- setup_remote_docker
62+
- run:
63+
name: Build dashboard-service
64+
command: |
65+
cd dashboard-service
66+
docker build -t aphelion/aphelion-dashboard-service ./
67+
- run:
68+
name: Push Aphelion dashboard-service to docker repo
69+
command: |
70+
source .circleci/push.sh aphelion/aphelion-dashboard-service aphelion-dashboard-service
71+
build_and_push_web:
72+
working_directory: ~/repo
73+
docker:
74+
- image: docker:17.05.0-ce-git
75+
steps:
76+
- checkout
77+
- setup_remote_docker
78+
- run:
79+
name: Build web-service
80+
command: |
81+
cd web
82+
docker build -t aphelion/aphelion-web ./
83+
- run:
84+
name: Push Aphelion web-service to docker repo
85+
command: |
86+
source .circleci/push.sh aphelion/aphelion-web aphelion-web
87+
88+
workflows:
89+
version: 2.1
90+
ci-push:
91+
jobs:
92+
- build_and_push_dashboard-service
93+
- build_and_push_web
94+
- build_aphelion-service
95+
tag-and-release:
96+
jobs:
97+
- build_and_push_dashboard-service:
98+
filters:
99+
tags:
100+
only: /^v.*/
101+
branches:
102+
ignore: /.*/
103+
- build_and_push_web:
104+
filters:
105+
tags:
106+
only: /^v.*/
107+
branches:
108+
ignore: /.*/
109+
- build_aphelion-service:
110+
filters:
111+
tags:
112+
only: /^v.*/
113+
branches:
114+
ignore: /.*/

.circleci/push.sh

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
echo "$CIRCLE_BRANCH"
2+
3+
DOCKER_TAG=$1
4+
DOCKER_TAG_COMPONENT=$2
5+
DOCKER_REPO=${DOCKER_TAG_REPO}/${DOCKER_TAG_COMPONENT}
6+
7+
BRANCH=${CIRCLE_BRANCH:-UNSET}
8+
DATE=$(date +"%m%d%y")
9+
BUILD_NUM=${CIRCLE_BUILD_NUM:-UNSET}
10+
TAG=${CIRCLE_TAG:-UNSET}
11+
12+
echo "BRANCH IS $BRANCH"
13+
echo "BUILD_NUM IS $BUILD_NUM"
14+
echo "TAG IS $TAG"
15+
16+
if [ "${BRANCH}" != "UNSET" ]; then
17+
echo "This is a CI branch build"
18+
DOCKER_RELEASE_TAG=${BRANCH}-${DATE}-${BUILD_NUM}
19+
DOCKER_LATEST_TAG=latest
20+
else
21+
echo "This is a Tag release"
22+
DOCKER_RELEASE_TAG=$(echo $TAG | cut -d "v" -f 2)
23+
DOCKER_LATEST_TAG=latest
24+
fi
25+
26+
echo "Version Tag will be: ${DOCKER_RELEASE_TAG}"
27+
echo "Latest Tag will be: ${DOCKER_LATEST_TAG}"
28+
29+
echo "pushing and tagging image"
30+
docker tag ${DOCKER_TAG} ${DOCKER_REPO}:${DOCKER_RELEASE_TAG}
31+
docker tag ${DOCKER_TAG} ${DOCKER_REPO}:${DOCKER_LATEST_TAG}
32+
docker images
33+
docker login -u=${DOCKER_USER} -p=${DOCKER_PASS}
34+
docker push ${DOCKER_REPO}:${DOCKER_RELEASE_TAG}
35+
docker push ${DOCKER_REPO}:${DOCKER_LATEST_TAG}
36+
echo "Done!"

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ the particular service exposes the current limit values via API calls.
2222
- Warning >=75% & <80%
2323
- Danger >=80%
2424
- Download CSV report
25+
- Email CSV report after reaching a configurable limit
26+
2527

2628
## Documentation
2729
- [Overview](docs/index.md)

aphelion/Dockerfile

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1-
FROM amazonlinux:latest
1+
FROM amazonlinux:2-with-sources
2+
3+
ARG jar_file
24

35
ENV JAVA_OPTS="-XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap -XX:MaxRAMFraction=1 -XshowSettings:vm "
46

7+
RUN amazon-linux-extras install epel
58
RUN yum install -y epel-release
69
RUN yum install -y --nogpgcheck java-1.8.0-openjdk.x86_64
7-
RUN yum -y install python34-pip
10+
RUN yum -y install python3-pip
811
RUN yum -y install cronie
912
RUN python3 -m pip install boto3
1013
RUN python3 -m pip install awscli
@@ -13,7 +16,8 @@ RUN mkdir -p /opt/staging/limits
1316
RUN mkdir -p /etc/cron.d
1417

1518
ADD deploy/limits/limits-cron /etc/cron.d/limits-cron
16-
ADD target/*.jar /opt/staging
19+
ADD target/${jar_file} /opt/staging
20+
1721
ADD deploy/startup.sh /opt/staging
1822
ADD deploy/limits /opt/staging
1923

aphelion/deploy/limits/limits.py

Lines changed: 111 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,13 @@
1919
import get_role_session
2020
import datetime
2121
import os
22+
import sys
23+
import traceback
24+
import smtplib
25+
from email.mime.multipart import MIMEMultipart
26+
from email.mime.text import MIMEText
27+
from email.mime.base import MIMEBase
28+
from email import encoders
2229

2330
#load environment configurations and get values we need
2431
role_name = os.environ.get('ASSUMED_ROLE_NAME', None)
@@ -28,6 +35,46 @@
2835
report_filename = os.environ.get('REPORT_FILE_NAME')
2936
now = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S-")
3037

38+
#mail settings
39+
sender = os.environ.get('MAIL_SENDER', None)
40+
message = os.environ.get('MAIL_MESSAGE', None)
41+
subject = os.environ.get('MAIL_SUBJECT', None)
42+
mailhost = os.environ.get('MAIL_HOST', None)
43+
port = os.environ.get('MAIL_PORT', None)
44+
receivers = [x.strip() for x in os.environ.get('MAIL_RECEIVERS').split(',')]
45+
reporting_limit = os.environ.get('MAIL_REPORTING_LIMIT', None)
46+
47+
def send_mail_attachment(message, attachment):
48+
msg = MIMEMultipart()
49+
# Create headers
50+
msg['Subject'] = subject
51+
msg['From'] = sender
52+
msg['To'] = ";".join(receivers)
53+
part = MIMEBase('application', "octet-stream")
54+
# Attach attachment
55+
part.set_payload(open(attachment, "rb").read())
56+
encoders.encode_base64(part)
57+
part.add_header('Content-Disposition', 'attachment', filename=attachment.split('/')[-1])
58+
msg.attach(part)
59+
msg.attach(MIMEText(message, 'plain'))
60+
# Send it off
61+
smtpObj = smtplib.SMTP(mailhost, port)
62+
smtpObj.sendmail(sender, receivers, msg.as_string())
63+
print("Successfully sent email")
64+
65+
def send_mail(message):
66+
msg = MIMEMultipart()
67+
# Create headers
68+
msg['Subject'] = subject
69+
msg['From'] = sender
70+
msg['To'] = ";".join(receivers)
71+
# Attach message
72+
msg.attach(MIMEText(message, 'plain'))
73+
# Send it off
74+
smtpObj = smtplib.SMTP(mailhost, port)
75+
smtpObj.sendmail(sender, receivers, msg.as_string())
76+
print("Successfully sent email")
77+
3178
#do some "formatting" of the limits collected by TA, as well as drop limits with no usage
3279
def ta_limit_to_dict(limit):
3380
#extract values into dictionary
@@ -48,48 +95,69 @@ def ta_limit_to_dict(limit):
4895
account_out = {'id': account_id}
4996
all_limits = []
5097
#get boto3 session for the account
51-
sess = get_role_session.get_role_session(account_id, role_name, role_session_name)
52-
#instance_types = defaultdict(int)
53-
for region in regions:
54-
#get total EC2 instances since TA does not check TOTAL on-demand instances, only by instance type
55-
total_instances = 0
56-
ec2 = sess.resource('ec2', region_name=region)
57-
for instance in ec2.instances.page_size(count=100):
58-
if instance.instance_lifecycle != 'spot':
59-
#instance_types["%s %s" % (region,instance.instance_type)] += 1
60-
total_instances += 1
61-
#and check that coutn against the configured limit from EC2
62-
ec2c = sess.client('ec2', region_name=region)
63-
attributes = ec2c.describe_account_attributes(AttributeNames=['max-instances'])
64-
all_limits.append({'region':region, 'service':'EC2', 'limit':'Max On-Demand Instances', 'max':attributes['AccountAttributes'][0]['AttributeValues'][0]['AttributeValue'], 'used':total_instances})
65-
66-
#inspect DMS resources and limits
67-
dms = sess.client('dms', region_name=region)
68-
dms_limits = dms.describe_account_attributes()
69-
for dms_limit in dms_limits['AccountQuotas']:
70-
all_limits.append({'region':region, 'service':'DMS', 'limit':dms_limit['AccountQuotaName'], 'max':dms_limit['Max'], 'used':dms_limit['Used']})
71-
72-
#start the check from trusted advisor. remember that the check needs to be refreshed, so that should have been run at least an hour before hand
73-
# this is only run once per account since TA is global.
74-
support = sess.client('support')
75-
ta_resp = support.describe_trusted_advisor_check_result(checkId = 'eW7HH0l7J9', language='en')
76-
#walk the list of TA resources, and add them to the master list if valid
77-
if 'flaggedResources' in ta_resp['result']:
78-
for limit in ta_resp['result']['flaggedResources']:
79-
limit_dict = ta_limit_to_dict(limit['metadata'])
80-
if limit_dict != None:
81-
all_limits.append(limit_dict)
82-
account_out['limits'] = all_limits
83-
report_out.append(account_out)
84-
85-
headers = ['AccountID','Region','Service','Limit','Used','Max','% Usage']
86-
csvfile = open('/opt/staging/limits/' + now + report_filename, 'w')
87-
csvwriter = csv.writer(csvfile, quoting=csv.QUOTE_ALL)
88-
89-
csvwriter.writerow(headers)
90-
for account in report_out:
91-
for limit in sorted(account['limits'], key=lambda x: str(x['region']) + str(x['service']) + str(x['limit'])):
92-
csvwriter.writerow([account['id'],limit['region'], limit['service'], limit['limit'], limit['used'], limit['max'],
93-
str(int(float(limit['used'])/float(limit['max'])*100)) + '%'])
98+
try:
99+
sess = get_role_session.get_role_session(account_id, role_name, role_session_name)
100+
#instance_types = defaultdict(int)
101+
for region in regions:
102+
#get total EC2 instances since TA does not check TOTAL on-demand instances, only by instance type
103+
total_instances = 0
104+
ec2 = sess.resource('ec2', region_name=region)
105+
for instance in ec2.instances.page_size(count=100):
106+
if instance.instance_lifecycle != 'spot':
107+
#instance_types["%s %s" % (region,instance.instance_type)] += 1
108+
total_instances += 1
109+
#and check that coutn against the configured limit from EC2
110+
ec2c = sess.client('ec2', region_name=region)
111+
attributes = ec2c.describe_account_attributes(AttributeNames=['max-instances'])
112+
all_limits.append({'region':region, 'service':'EC2', 'limit':'Max On-Demand Instances', 'max':attributes['AccountAttributes'][0]['AttributeValues'][0]['AttributeValue'], 'used':total_instances})
113+
114+
#inspect DMS resources and limits
115+
dms = sess.client('dms', region_name=region)
116+
dms_limits = dms.describe_account_attributes()
117+
for dms_limit in dms_limits['AccountQuotas']:
118+
all_limits.append({'region':region, 'service':'DMS', 'limit':dms_limit['AccountQuotaName'], 'max':dms_limit['Max'], 'used':dms_limit['Used']})
119+
120+
#start the check from trusted advisor. remember that the check needs to be refreshed, so that should have been run at least an hour before hand
121+
# this is only run once per account since TA is global.
122+
support = sess.client('support')
123+
ta_resp = support.describe_trusted_advisor_check_result(checkId = 'eW7HH0l7J9', language='en')
124+
#walk the list of TA resources, and add them to the master list if valid
125+
if 'flaggedResources' in ta_resp['result']:
126+
for limit in ta_resp['result']['flaggedResources']:
127+
limit_dict = ta_limit_to_dict(limit['metadata'])
128+
if limit_dict != None:
129+
all_limits.append(limit_dict)
130+
account_out['limits'] = all_limits
131+
report_out.append(account_out)
132+
133+
headers = ['AccountID','Region','Service','Limit','Used','Max','% Usage']
134+
csvfilelocation = '/opt/staging/limits/' + now + report_filename
135+
csvfile = open(csvfilelocation, 'w')
136+
csvwriter = csv.writer(csvfile, quoting=csv.QUOTE_ALL)
137+
138+
is_exceeding_limit = None
139+
140+
csvwriter.writerow(headers)
141+
for account in report_out:
142+
for limit in sorted(account['limits'], key=lambda x: str(x['region']) + str(x['service']) + str(x['limit'])):
143+
limit_percent = int(float(limit['used'])/float(limit['max'])*100)
144+
145+
try:
146+
if reporting_limit is not None and limit_percent >= int(reporting_limit):
147+
is_exceeding_limit = True
148+
except ValueError:
149+
print("Could not convert data to an integer.", sys.exc_info()[0])
150+
151+
152+
csvwriter.writerow([account['id'],limit['region'], limit['service'], limit['limit'], limit['used'], limit['max'],
153+
str(limit_percent) + '%'])
94154

155+
if is_exceeding_limit:
156+
send_mail_attachment(message, csvfilelocation)
95157

158+
except Exception:
159+
print("Unexpected error:", sys.exc_info()[0])
160+
csvfile = open('/opt/staging/limits/' + now + report_filename, 'w')
161+
csvwriter = csv.writer(csvfile, quoting=csv.QUOTE_ALL)
162+
csvwriter.writerow(["ERROR", traceback.format_exc()])
163+
raise

aphelion/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
<artifactId>aphelion</artifactId>
66
<packaging>jar</packaging>
7-
<version>1.0-SNAPSHOT</version>
7+
<version>1.0</version>
88

99
<parent>
1010
<groupId>org.springframework.boot</groupId>

0 commit comments

Comments
 (0)