Skip to content

Commit

Permalink
Release 0.2.6
Browse files Browse the repository at this point in the history
  • Loading branch information
boratyng committed Jun 8, 2022
1 parent bee0ed4 commit 480f859
Show file tree
Hide file tree
Showing 15 changed files with 314 additions and 129 deletions.
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
cff-version: "1.1.0"
message: "If you use this software, please cite it using these metadata."
title: ElasticBLAST
version: "0.2.5"
date-released: 2022-03-24
version: "0.2.6"
date-released: 2022-06-08
license: "NCBI Public Domain"
repository-code: "https://github.com/ncbi/elastic-blast/"
authors:
Expand Down
12 changes: 12 additions & 0 deletions DISCLAIMER.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Although all reasonable efforts have been taken to ensure the accuracy
and reliability of the software and data, the NLM and the U.S.
Government do not and cannot warrant the performance or results that
may be obtained by using this software or data. The NLM and the U.S.
Government disclaim all warranties, express or implied, including
warranties of performance, merchantability or fitness for any
particular purpose.

Users of ElasticBLAST are solely responsible for any and all cloud service
provider charges associated with their use of ElasticBLAST. 

See also: [LICENSE](LICENSE.md)
2 changes: 1 addition & 1 deletion bin/gcp-setup-elastic-blast-janitor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ usage() {
echo -e "\t-h: Show this message"
}

while getopts "u:ph" OPT; do
while getopts "u:p:h" OPT; do
case $OPT in
u) user=${OPTARG}
;;
Expand Down
10 changes: 5 additions & 5 deletions requirements/base.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
wheel == 0.37.1
setuptools == 56.0.0
importlib-resources == 5.4.0
importlib-metadata == 4.11.3
pex == 2.1.73
boto3 == 1.21.24
botocore == 1.24.24
importlib-resources == 5.7.1
importlib-metadata == 4.11.4
pex == 2.1.92
boto3 == 1.24.3
botocore == 1.27.3
awslimitchecker == 12.0.0
tenacity == 8.0.1
dataclasses-json == 0.5.7
Expand Down
10 changes: 5 additions & 5 deletions requirements/test.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
-r base.txt

pytest == 7.1.1
pytest == 7.1.2
pytest-cov == 3.0.0
pytest-mock == 3.7.0
teamcity-messages == 1.31
mypy == 0.941
mypy == 0.961
pylint == 2.7.4
tox == 3.24.5
tox == 3.25.0
yamllint == 1.26.3
moto == 3.1.1
moto == 3.1.12
docker == 5.0.3
cfn-lint == 0.58.4
cfn-lint == 0.61.0
14 changes: 3 additions & 11 deletions src/elastic_blast/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
from .constants import ELB_AWS_JANITOR_CFN_TEMPLATE, ELB_DFLT_JANITOR_SCHEDULE_AWS
from .constants import ELB_AWS_JANITOR_LAMBDA_DEPLOYMENT_BUCKET, ELB_AWS_JANITOR_LAMBDA_DEPLOYMENT_KEY
from .constants import CFG_CLOUD_PROVIDER, CFG_CP_AWS_AUTO_SHUTDOWN_ROLE, CSP
from .constants import AWS_JANITOR_ROLE_NAME
from .constants import AWS_JANITOR_ROLE_NAME, ELB_JANITOR_SCHEDULE
from .constants import STATUS_MESSAGE_ERROR, STATUS_MESSAGE_VERBOSE
from .filehelper import parse_bucket_name_key
from .aws_traits import get_machine_properties, create_aws_config, get_availability_zones_for
Expand Down Expand Up @@ -240,8 +240,8 @@ def _init(self, cfg: ElasticBlastConfig, create: bool):
logging.debug(f'Found janitor role for {AWS_JANITOR_ROLE_NAME}: {role.arn}')
except:
logging.debug(f'Did not find janitor role for {AWS_JANITOR_ROLE_NAME}')
if 'ELB_JANITOR_SCHEDULE' in os.environ:
janitor_schedule = os.environ['ELB_JANITOR_SCHEDULE']
if ELB_JANITOR_SCHEDULE in os.environ:
janitor_schedule = os.environ[ELB_JANITOR_SCHEDULE]
logging.debug(f'Overriding janitor schedule to "{janitor_schedule}"')
if 'ELB_DISABLE_AUTO_SHUTDOWN' in os.environ:
janitor_schedule = ''
Expand Down Expand Up @@ -916,14 +916,6 @@ def upload_job_ids(self) -> None:
bucket.put_object(Body=self.job_ids.to_json().encode(), Key=key) # type: ignore
logging.debug(f'Uploaded job IDs to {self.results_bucket}/{ELB_METADATA_DIR}/{ELB_AWS_JOB_IDS}')

# This code is needed for janitor backward compatibility in version
# 0.2.4, and can be removed when the ElasticBLAST janitor is upgraded to version 0.2.4.
ELB_AWS_OLD_JOB_IDS = 'job-ids.json'
bucket_name, key = parse_bucket_name_key(f'{self.results_bucket}/{ELB_METADATA_DIR}/{ELB_AWS_OLD_JOB_IDS}')
bucket = self.s3.Bucket(bucket_name)
bucket.put_object(Body=json.dumps(self.job_ids.to_list()).encode(), Key=key)
logging.debug(f'Uploaded job IDs to {self.results_bucket}/{ELB_METADATA_DIR}/{ELB_AWS_OLD_JOB_IDS}')


def upload_query_length(self, query_length: int) -> None:
"""Save query length in a metadata file in S3"""
Expand Down
19 changes: 5 additions & 14 deletions src/elastic_blast/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
from .constants import ELB_DFLT_AWS_SPOT_BID_PERCENTAGE
from .constants import APP_STATE_RESULTS_MD5, SYSTEM_MEMORY_RESERVE
from .constants import ELB_S3_PREFIX, ELB_GCS_PREFIX
from .constants import ELB_DFLT_AWS_REGION, ELB_DFLT_GCP_REGION
from .constants import ELB_DFLT_AWS_REGION, ELB_DFLT_GCP_REGION, ELB_DFLT_GCP_ZONE
from .util import UserReportError
from .filehelper import parse_bucket_name_key
from typing import List
Expand All @@ -70,6 +70,9 @@ def _load_config_from_environment(cfg: configparser.ConfigParser) -> None:
"""Selected environment variables can be used to configure ElasticBLAST"""
if 'ELB_GCP_PROJECT' in os.environ:
cfg[CFG_CLOUD_PROVIDER][CFG_CP_GCP_PROJECT] = os.environ['ELB_GCP_PROJECT']
# If GCP project is provided by the environment/configuration, leverage it
if 'CLOUDSDK_CORE_PROJECT' in os.environ:
cfg[CFG_CLOUD_PROVIDER][CFG_CP_GCP_PROJECT] = os.environ['CLOUDSDK_CORE_PROJECT']
if 'ELB_GCP_REGION' in os.environ:
cfg[CFG_CLOUD_PROVIDER][CFG_CP_GCP_REGION] = os.environ['ELB_GCP_REGION']
if 'ELB_GCP_ZONE' in os.environ:
Expand Down Expand Up @@ -140,16 +143,6 @@ def configure(args: argparse.Namespace) -> configparser.ConfigParser:
retval[CFG_CLOUD_PROVIDER][CFG_CP_GCP_REGION] = args.gcp_region
if hasattr(args, 'gcp_zone') and args.gcp_zone:
retval[CFG_CLOUD_PROVIDER][CFG_CP_GCP_ZONE] = args.gcp_zone

# If results bucket was provided, set the default region in the
# corresponding cloud service provider if it wasn't specified by the user
if CFG_BLAST_RESULTS in retval[CFG_BLAST]:
if retval[CFG_BLAST][CFG_BLAST_RESULTS].startswith(ELB_S3_PREFIX):
if CFG_CP_AWS_REGION not in retval[CFG_CLOUD_PROVIDER]:
retval[CFG_CLOUD_PROVIDER][CFG_CP_AWS_REGION] = ELB_DFLT_AWS_REGION
elif retval[CFG_BLAST][CFG_BLAST_RESULTS].startswith(ELB_GCS_PREFIX):
if CFG_CP_GCP_REGION not in retval[CFG_CLOUD_PROVIDER]:
retval[CFG_CLOUD_PROVIDER][CFG_CP_GCP_REGION] = ELB_DFLT_GCP_REGION

# Exception to prevent unnecessary API calls and ensure testability
# of some functionality without credentials
Expand All @@ -168,7 +161,7 @@ def _validate_csp(cfg: configparser.ConfigParser) -> None:
Throws a UserReportError in case of invalid configuration.
"""
if CFG_CLOUD_PROVIDER not in cfg:
report_config_error(['Cloud provider configuration is missing'])
return

# are gcp or aws entries present in cloud-provider config
gcp = sum([i.startswith('gcp') for i in cfg[CFG_CLOUD_PROVIDER]]) > 0
Expand All @@ -179,8 +172,6 @@ def _validate_csp(cfg: configparser.ConfigParser) -> None:
# both and none are forbidden
if gcp and aws:
msg.append('Cloud provider config contains entries for more than one cloud provider. Only one cloud provider can be used')
if not gcp and not aws:
msg.append('Cloud provider configuration is missing')

if CFG_CP_NAME in cfg[CFG_CLOUD_PROVIDER]:
logging.debug(f'Cloud Service Provider {cfg[CFG_CLOUD_PROVIDER][CFG_CP_NAME]}')
Expand Down
5 changes: 5 additions & 0 deletions src/elastic_blast/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,9 @@ def __str__(self):


ELB_DFLT_GCP_REGION = 'us-east4'
ELB_DFLT_GCP_ZONE = 'us-east4-b'
ELB_DFLT_AWS_REGION = 'us-east-1'
ELB_UNKNOWN_GCP_PROJECT = 'elb-unknown-gcp-project'

ELB_DOCKER_VERSION = '1.1.0'
ELB_QS_DOCKER_VERSION = '0.1.2'
Expand Down Expand Up @@ -235,6 +237,7 @@ def __str__(self):
CFG_CP_GCP_ZONE = 'gcp-zone'
CFG_CP_GCP_NETWORK = 'gcp-network'
CFG_CP_GCP_SUBNETWORK = 'gcp-subnetwork'
CFG_CP_GCP_GKE_VERSION = 'gke-version'
CFG_CP_AWS_REGION = 'aws-region'
CFG_CP_AWS_KEY_PAIR = 'aws-key-pair'
CFG_CP_AWS_VPC = 'aws-vpc'
Expand Down Expand Up @@ -317,6 +320,8 @@ def __str__(self):
# https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-create-rule-schedule.html
ELB_DFLT_JANITOR_SCHEDULE_AWS = "cron(*/5 * * * ? *)"

ELB_JANITOR_SCHEDULE = 'ELB_JANITOR_SCHEDULE'

ELB_AWS_JANITOR_LAMBDA_DEPLOYMENT_BUCKET = 'elb-camacho'
ELB_AWS_JANITOR_LAMBDA_DEPLOYMENT_KEY = 'functions/'
ELB_AWS_JANITOR_CFN_TEMPLATE = 'https://elb-camacho.s3.amazonaws.com/templates/elastic-blast-janitor-cf.yaml'
Expand Down
98 changes: 89 additions & 9 deletions src/elastic_blast/elb_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
from .constants import CFG_CLOUD_PROVIDER
from .constants import CFG_CP_GCP_PROJECT, CFG_CP_GCP_REGION, CFG_CP_GCP_ZONE
from .constants import CFG_CP_GCP_NETWORK, CFG_CP_GCP_SUBNETWORK
from .constants import CFG_CP_GCP_GKE_VERSION
from .constants import CFG_CP_AWS_REGION, CFG_CP_AWS_VPC, CFG_CP_AWS_SUBNET
from .constants import CFG_CP_AWS_JOB_ROLE, CFG_CP_AWS_BATCH_SERVICE_ROLE
from .constants import CFG_CP_AWS_INSTANCE_ROLE, CFG_CP_AWS_SPOT_FLEET_ROLE
Expand All @@ -82,7 +83,9 @@
from .constants import ELB_DFLT_AWS_NUM_CPUS, ELB_DFLT_GCP_NUM_CPUS
from .constants import ELB_S3_PREFIX, ELB_GCS_PREFIX, ELB_UNKNOWN_MAX_NUMBER_OF_CONCURRENT_JOBS
from .constants import AWS_ROLE_PREFIX, CFG_CP_AWS_AUTO_SHUTDOWN_ROLE
from .constants import BLASTDB_ERROR, ELB_UNKNOWN
from .constants import BLASTDB_ERROR, ELB_UNKNOWN, ELB_JANITOR_SCHEDULE
from .constants import ELB_DFLT_GCP_REGION, ELB_DFLT_GCP_ZONE
from .constants import ELB_DFLT_AWS_REGION, ELB_UNKNOWN_GCP_PROJECT
from .util import validate_gcp_string, check_aws_region_for_invalid_characters
from .util import validate_gke_cluster_name, ElbSupportedPrograms
from .util import get_query_batch_size
Expand Down Expand Up @@ -210,12 +213,15 @@ class CloudProviderBaseConfig:
@dataclass
class GCPConfig(CloudProviderBaseConfig, ConfigParserToDataclassMapper):
"""GCP config for ElasticBLAST"""
project: GCPString
region: GCPString
zone: GCPString
region: GCPString = GCPString(ELB_DFLT_GCP_REGION)
project: GCPString = GCPString(ELB_UNKNOWN_GCP_PROJECT)
zone: GCPString = GCPString(ELB_DFLT_GCP_ZONE)
network: Optional[str] = None
subnet: Optional[str] = None
user: Optional[str] = None
# FIXME: This is a temporary fix for EB-1530. gke_version should be set to
# None once the proper fix is implemented.
gke_version: Optional[str] = '1.21'

# mapping to class attributes to ConfigParser parameters so that objects
# can be initialized from ConfigParser objects
Expand All @@ -225,7 +231,8 @@ class GCPConfig(CloudProviderBaseConfig, ConfigParserToDataclassMapper):
'cloud': None,
'user': None,
'network': ParamInfo(CFG_CLOUD_PROVIDER, CFG_CP_GCP_NETWORK),
'subnet': ParamInfo(CFG_CLOUD_PROVIDER, CFG_CP_GCP_SUBNETWORK)}
'subnet': ParamInfo(CFG_CLOUD_PROVIDER, CFG_CP_GCP_SUBNETWORK),
'gke_version': ParamInfo(CFG_CLOUD_PROVIDER, CFG_CP_GCP_GKE_VERSION)}

def __post_init__(self):
self.cloud = CSP.GCP
Expand All @@ -235,6 +242,13 @@ def __post_init__(self):
if p.stdout:
self.user = p.stdout.decode('utf-8').rstrip()

if self.project == ELB_UNKNOWN_GCP_PROJECT:
proj = get_gcp_project()
if not proj:
raise ValueError(f'GCP project is unset, please invoke gcloud config set project REPLACE_WITH_YOUR_PROJECT_NAME_HERE')
else:
self.project = GCPString(proj)

def validate(self, errors: List[str], task: ElbCommand):
"""Validate config"""
if bool(self.network) != bool(self.subnet):
Expand All @@ -244,7 +258,7 @@ def validate(self, errors: List[str], task: ElbCommand):
@dataclass
class AWSConfig(CloudProviderBaseConfig, ConfigParserToDataclassMapper):
"""AWS config for ElasticBLAST"""
region: AWSRegion
region: AWSRegion = AWSRegion(ELB_DFLT_AWS_REGION)
vpc: Optional[str] = None
subnet: Optional[str] = None
security_group: Optional[str] = None
Expand Down Expand Up @@ -589,7 +603,8 @@ def __init__(self, *args, **kwargs):
# post-init activities

try:
self.cloud_provider.region.validate(dry_run)
if self.cloud_provider.region:
self.cloud_provider.region.validate(dry_run)
except ValueError as err:
raise UserReportError(returncode=INPUT_ERROR, message=str(err))

Expand Down Expand Up @@ -713,7 +728,17 @@ def _init_from_ConfigParser(self, cfg: configparser.ConfigParser,

self._validate_config_parser(cfg)
_validate_csp(cfg)
self.cluster = ClusterConfig.create_from_cfg(cfg)

# determine cloud provider, first by user config, then results bucket
if sum([i.startswith('aws') for i in cfg[CFG_CLOUD_PROVIDER]]) > 0:
cloud = CSP.AWS
elif sum([i.startswith('gcp') for i in cfg[CFG_CLOUD_PROVIDER]]) > 0:
cloud = CSP.GCP
else:
cloud = self.cluster.results.get_cloud_provider()

if cloud == CSP.AWS:
self.cloud_provider = AWSConfig.create_from_cfg(cfg)
# for mypy
self.aws = cast(AWSConfig, self.cloud_provider)
Expand All @@ -722,8 +747,6 @@ def _init_from_ConfigParser(self, cfg: configparser.ConfigParser,
# for mypy
self.gcp = cast(GCPConfig, self.cloud_provider)

self.cluster = ClusterConfig.create_from_cfg(cfg)

if task == ElbCommand.SUBMIT:
self.blast = BlastConfig.create_from_cfg(cfg)

Expand Down Expand Up @@ -853,6 +876,13 @@ def validate(self, task: ElbCommand = ElbCommand.SUBMIT, dry_run=False):
if instance_props.memory - SYSTEM_MEMORY_RESERVE < bytes_to_cache_gb:
errors.append(f'BLAST database {self.blast.db} memory requirements exceed memory available on selected machine type "{self.cluster.machine_type}". Please select machine type with at least {bytes_to_cache_gb + SYSTEM_MEMORY_RESERVE}GB available memory.')

# validate janitor schedule if provided
if ELB_JANITOR_SCHEDULE in os.environ:
try:
validate_janitor_schedule(os.environ[ELB_JANITOR_SCHEDULE], self.cloud_provider.cloud)
except ValueError as err:
errors.append(str(err))

if errors:
raise UserReportError(returncode=INPUT_ERROR,
message='\n'.join(errors))
Expand Down Expand Up @@ -1108,6 +1138,31 @@ def get_instance_props(cloud_provider: CSP, region: str, machine_type: str) -> I
return instance_props


def validate_janitor_schedule(val: str, cloud_provider: CSP) -> None:
"""Validate cron schedule for janitor job. Raises ValueError if validation fails."""
special = r'@(yearly|annually|monthly|weekly|daily|midnight|hourly)'
minute = r'\*|(\*|([1-5]?[0-9]))((,(\*|([1-5]?[0-9])))*([/-][1-5]?[0-9])?)*'
hour = r'\*|(\*|([1-2]?[0-9]))((,(\*|([1-2]?[0-9])))*([/-][1-2]?[0-9])?)*'
day_of_month_gcp = r'\*|(\*|([1-3]?[0-9]))((,(\*|([1-3]?[0-9])))*([/-][1-3]?[0-9])?)*'
day_of_month_aws = r'\*|\?|(\*|([1-3]?[0-9]L?W?))((,(\*|([1-3]?[0-9]L?W?)))*([/-][1-3]?[0-9])?)*'
month = r'\*|(\*|(1?[0-9]))((,(\*|(1?[0-9])))*([/-]1?[0-9])?)*'
day_of_week_gcp = r'\*|((\*|[0-7]|mon|tue|wed|thu|fri|sat|sun)((,(\*|[0-7]|mon|tue|wed|thu|fri|sat|sun))*([/-]([1-6]|mon|tue|wed|thu|fri|sat|sun))?)*)'
day_of_week_aws = r'\*|\?|(((\*|[1-7]|MON|TUE|WED|THU|FRI|SAT|SUN)L?)(([,#](([1-7]|MON|TUE|WED|THU|FRI|SAT|SUN)L?))*([/-]([1-6]|MON|TUE|WED|THU|FRI|SAT|SUN))?)*)'
year = r'\*|(\*|(2[01][0-9]{2}))((,(\*|(2[01][0-9]{2})))*(-2[01][0-9]{2})?(/\d{1,3})?)*'


if cloud_provider == CSP.GCP:
pattern = special + '|' + '((' + minute + r')\s(' + hour + r')\s(' + day_of_month_gcp + r')\s(' + month + r')\s(' + day_of_week_gcp + '))'
url = 'https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/#cron-schedule-syntax'
else:
pattern = r'cron\((' + minute + r')\s(' + hour + r')\s(' + day_of_month_aws + r')\s(' + month + r')\s(' + day_of_week_aws + r')\s(' + year + r')\)'
url = 'https://docs.aws.amazon.com/eventbridge/latest/userguide/eb-create-rule-schedule.html'

r = re.fullmatch(pattern, val)
if r is None:
raise ValueError(f'Invalid value of environment variable {ELB_JANITOR_SCHEDULE} "{val}". The string must match the regular expression "{pattern}". For more information, please see {url}')


class JSONEnumEncoder(json.JSONEncoder):
"""JSON encoder that handles basic types and Enum"""
def default(self, o):
Expand All @@ -1116,3 +1171,28 @@ def default(self, o):
return o.name
else:
return json.JSONEncoder(self, o)


def get_gcp_project() -> Optional[str]:
"""Return current GCP project or None if the property is unset.
Raises:
util.SafeExecError on problems with command line gcloud
RuntimeError if gcloud run is successful, but the result is empty"""
cmd: str = 'gcloud config get-value project'
p = safe_exec(cmd)
result: Optional[str]

# the result should not be empty, for unset properties gcloud returns the
# string: '(unset)' to stderr
if not p.stdout and not p.stderr:
raise RuntimeError('Current GCP project could not be established')

result = p.stdout.decode().split('\n')[0]

# return None if project is unset
if result == '(unset)':
result = None
return result


Loading

0 comments on commit 480f859

Please sign in to comment.