Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Skip instance_type selection if type is already set #262

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions igvm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,27 @@

from __future__ import division

import json
import logging
import socket
import time
from concurrent import futures
from json import JSONDecodeError
from os import path
from pathlib import Path
from typing import Union, List
from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen

from paramiko import SSHConfig

from igvm.exceptions import TimeoutError
from igvm.settings import (
AWS_INSTANCES_OVERVIEW_URL,
AWS_INSTANCES_OVERVIEW_FILE,
AWS_INSTANCES_OVERVIEW_FILE_ETAG,
AWS_FALLBACK_INSTANCE_TYPE
)

_SIZE_FACTORS = {
'T': 1024 ** 4,
Expand Down Expand Up @@ -219,3 +231,54 @@ def parallel(
results.append(result)

return results


def aws_get_instances_overview(timeout: int = 5) -> Union[List, None]:
"""AWS Get Instances Overview

Load or download the latest instances.json, which contains
a complete overview about all instance_types, their configuration,
performance and pricing.

:param: timeout: Timeout value for the head/get request

:return: VM types overview as list
or None, if the parsing/download failed
"""

url = AWS_INSTANCES_OVERVIEW_URL
file = Path.home() / AWS_INSTANCES_OVERVIEW_FILE
etag_file = Path.home() / AWS_INSTANCES_OVERVIEW_FILE_ETAG

try:
head_req = Request(url, method='HEAD')
resp = urlopen(head_req, timeout=timeout)
if resp.status == 200:
etag = dict(resp.info())['ETag']
else:
log.warning('Could not retrieve ETag from {}'.format(url))
etag = None
if file.exists() and etag_file.exists() and etag:
with open(etag_file, 'r+') as f:
prev_etag = f.read()
if etag == prev_etag:
with open(file, 'r+') as f:
return json.load(f)

resp = urlopen(url, timeout=timeout)
if etag:
with open(etag_file, 'w+') as f:
f.write(etag)
with open(file, 'w+') as f:
content = resp.read().decode('utf-8')
f.write(content)

return json.loads(content)
except (HTTPError, JSONDecodeError, URLError) as e:
log.warning('Could not retrieve instances overview')
log.warning(e)
log.info('Proceeding with instance_type: '
f'{AWS_FALLBACK_INSTANCE_TYPE}'
)

return None
83 changes: 17 additions & 66 deletions igvm/vm.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,32 +17,25 @@
from grp import getgrnam
from hashlib import sha1, sha256
from io import BytesIO
from pathlib import Path
from re import compile as re_compile
from typing import Optional, List, Union
from typing import Optional, List
from uuid import uuid4

import boto3
from botocore.exceptions import ClientError, CapacityNotAvailableError
from fabric.api import cd, get, hide, put, run, settings
from fabric.contrib.files import upload_template
from fabric.exceptions import NetworkError
from json.decoder import JSONDecodeError
from urllib.error import HTTPError
from urllib.request import Request, urlopen

from igvm.exceptions import ConfigError, HypervisorError, RemoteCommandError, VMError
from igvm.host import Host
from igvm.settings import (
AWS_ECU_FACTOR,
AWS_FALLBACK_INSTANCE_TYPE,
AWS_RETURN_CODES,
AWS_INSTANCES_OVERVIEW_FILE,
AWS_INSTANCES_OVERVIEW_FILE_ETAG,
AWS_INSTANCES_OVERVIEW_URL,
)
from igvm.transaction import Transaction
from igvm.utils import parse_size, wait_until
from igvm.utils import parse_size, wait_until, aws_get_instances_overview
from igvm.puppet import clean_cert

if typing.TYPE_CHECKING:
Expand Down Expand Up @@ -713,12 +706,21 @@ def aws_build(self,
:raises: VMError: Generic exception for VM errors of all kinds
"""

vm_types_overview = self.aws_get_instances_overview()
if vm_types_overview:
vm_types = self.aws_get_fitting_vm_types(vm_types_overview)
# The current solution for figuring out the best instance_type is not
# scalable for the disaster recovery case because we are parsing a
# 70 MB big json file in parallel for every VM. We are currently working
# on a different solution to prefill the instance_type for the disaster
# recovery case. We'll keep the functionality in igvm for now to be able
# to build VMs in AWS without the need of a prefill.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is exactly the problem here? Is it loading the json file into Python structures or parsing those structures?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The memory footprint of loading the file is too big. Building 10 machines in parallel that way, the server allocates more then 2GiB of memory and we want to be able to build at least 50 VMs in parallel on one server.

if self.dataset_obj['aws_instance_type']:
vm_types = [self.dataset_obj['aws_instance_type']]
else:
vm_types = [AWS_FALLBACK_INSTANCE_TYPE]
self.dataset_obj['aws_instance_type'] = vm_types[0]
vm_types_overview = aws_get_instances_overview()
if vm_types_overview:
vm_types = self.aws_get_fitting_vm_types(vm_types_overview)
else:
vm_types = [AWS_FALLBACK_INSTANCE_TYPE]
self.dataset_obj['aws_instance_type'] = vm_types[0]

self.check_serveradmin_config()

Expand Down Expand Up @@ -1160,7 +1162,7 @@ def performance_value(self) -> float:
:return: performance_value of VM as float
"""

# Serveradmin can not handle floats right now so we safe them as
# Serveradmin can not handle floats right now so we save them as
# multiple ones of thousand and just divide them here again.
vm_load_99 = self.dataset_obj['load_99'] / 1000 # Default 0
vm_num_cpu = self.dataset_obj['num_cpu']
Expand All @@ -1179,57 +1181,6 @@ def performance_value(self) -> float:

return float(estimated_load)

def aws_get_instances_overview(
self, timeout: int = 5) -> Union[List, None]:
"""AWS Get Instances Overview

Load or download the latest instances.json, which contains
a complete overview about all instance_types, their configuration,
performance and pricing.

:param: timeout: Timeout value for the head/get request

:return: VM types overview as list
or None, if the parsing/download failed
"""

url = AWS_INSTANCES_OVERVIEW_URL
file = Path.home() / AWS_INSTANCES_OVERVIEW_FILE
etag_file = Path.home() / AWS_INSTANCES_OVERVIEW_FILE_ETAG

try:
head_req = Request(url, method='HEAD')
resp = urlopen(head_req, timeout=timeout)
if resp.status == 200:
etag = dict(resp.info())['ETag']
else:
log.warning('Could not retrieve ETag from {}'.format(url))
etag = None
if file.exists() and etag_file.exists() and etag:
with open(etag_file, 'r+') as f:
prev_etag = f.read()
if etag == prev_etag:
with open(file, 'r+') as f:
return json.load(f)

resp = urlopen(url, timeout=timeout)
if etag:
with open(etag_file, 'w+') as f:
f.write(etag)
with open(file, 'w+') as f:
content = resp.read().decode('utf-8')
f.write(content)

return json.loads(content)
except (HTTPError, JSONDecodeError) as e:
log.warning('Could not retrieve instances overview')
log.warning(e)
log.info('Proceeding with instance_type: '
f'{AWS_FALLBACK_INSTANCE_TYPE}'
)

return None

def aws_get_fitting_vm_types(self, overview: List) -> List:
"""AWS Get Fitting VM types

Expand Down