From bcab560c0ebfb679a423be66eba18b61f9cfe71f Mon Sep 17 00:00:00 2001 From: Christiam Camacho Date: Wed, 17 Jul 2024 16:37:55 -0400 Subject: [PATCH] Release 1.3.0 --- CITATION.cff | 4 +- docker-blast/Dockerfile | 3 +- .../Dockerfile-build-from-local-sources | 2 +- docker-blast/Makefile | 2 +- docker-blast/README.md | 1 + docker-blast/test-docker-image-aws.yaml | 4 +- docker-blast/test-docker-image-gcp.yaml | 4 +- .../Dockerfile-build-from-local-sources.gcp | 9 +- docker-janitor/Dockerfile.gcp | 4 +- docker-janitor/Makefile | 5 +- .../Dockerfile-build-from-local-sources.aws | 4 +- docker-job-submit/Dockerfile.aws | 4 +- docker-job-submit/Makefile | 5 +- docker-job-submit/cloud-job-submit.sh | 7 +- docker-qs/Dockerfile | 4 +- docker-qs/Dockerfile-build-from-local-sources | 4 +- docker-qs/Makefile | 3 +- docker-qs/README.md | 2 + requirements/base.txt | 17 +- requirements/test.txt | 18 +- setup.cfg | 2 +- src/elastic_blast/aws_traits.py | 7 +- src/elastic_blast/commands/submit.py | 7 +- src/elastic_blast/constants.py | 6 +- src/elastic_blast/elb_config.py | 19 +- src/elastic_blast/gcp.py | 123 +++++- src/elastic_blast/jobs.py | 5 +- src/elastic_blast/kubernetes.py | 126 +++--- src/elastic_blast/tuner.py | 3 +- src/elastic_blast/util.py | 77 ++-- tests/app/data/bad_bucket_conf.ini | 2 +- tests/app/data/blastdb-notfound.ini | 2 +- tests/app/data/cleanup-error.ini | 2 +- tests/app/data/cluster-error.ini | 2 +- tests/app/data/good_conf.ini | 2 +- tests/app/data/invalid-cpu-req-gcp.ini | 2 +- tests/app/data/invalid-machine-type-gcp.ini | 2 +- tests/app/data/invalid-mem-req.ini | 2 +- tests/app/data/too-many-k8s-jobs.ini | 2 +- tests/app/gcloud | 6 + tests/app/test_elasticblast.py | 8 +- .../data/blastdb-manifest-ncbi-1.1.json | 78 ---- tests/blastdb/data/nr-aws.json | 382 ------------------ tests/blastdb/data/nr-gcp.json | 382 ------------------ tests/blastdb/data/nr-ncbi.json | 60 --- tests/blastdb/data/swissprot-aws.json | 27 -- tests/blastdb/data/swissprot-gcp.json | 27 -- tests/blastdb/data/swissprot-ncbi.json | 16 - tests/cost/__init__.py | 0 tests/cost/data/aws-run-summary.json | 30 ++ tests/cost/pytest.ini | 4 + tests/filehelper/test_filesystem_checks.py | 21 +- tests/util/test_util.py | 54 ++- tests/utils.py | 54 +-- tox.ini | 4 +- validate-pex-cloudbuild.yaml | 17 - 56 files changed, 445 insertions(+), 1224 deletions(-) create mode 100755 tests/app/gcloud delete mode 100644 tests/blastdb/data/blastdb-manifest-ncbi-1.1.json delete mode 100644 tests/blastdb/data/nr-aws.json delete mode 100644 tests/blastdb/data/nr-gcp.json delete mode 100644 tests/blastdb/data/nr-ncbi.json delete mode 100644 tests/blastdb/data/swissprot-aws.json delete mode 100644 tests/blastdb/data/swissprot-gcp.json delete mode 100644 tests/blastdb/data/swissprot-ncbi.json create mode 100644 tests/cost/__init__.py create mode 100644 tests/cost/data/aws-run-summary.json create mode 100644 tests/cost/pytest.ini delete mode 100644 validate-pex-cloudbuild.yaml diff --git a/CITATION.cff b/CITATION.cff index 5b35430..846af7e 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,8 +1,8 @@ cff-version: "1.2.0" message: "If you use this software, please cite it using these metadata." title: ElasticBLAST -version: "1.2.0" -date-released: 2023-11-27 +version: "1.3.0" +date-released: 2024-07-17 license: "NCBI Public Domain" repository-code: "https://github.com/ncbi/elastic-blast/" url: "https://blast.ncbi.nlm.nih.gov/doc/elastic-blast/" diff --git a/docker-blast/Dockerfile b/docker-blast/Dockerfile index 04f12af..102ec54 100644 --- a/docker-blast/Dockerfile +++ b/docker-blast/Dockerfile @@ -18,7 +18,7 @@ # # Please cite NCBI in any work or product based on this material. -FROM ncbi/blast:2.15.0 as blast +FROM ncbi/blast:2.16.0 as blast ARG version LABEL Description="NCBI BLAST" Vendor="NCBI/NLM/NIH" Version=${version} Maintainer=camacho@ncbi.nlm.nih.gov @@ -29,6 +29,7 @@ COPY requirements.txt . RUN apt-get -y -m update && \ apt-get install -y python3 python3-pip time parallel vmtouch curl wget unzip && \ + pip3 --version && python3 -m pip --version && \ rm -rf /var/lib/apt/lists/* RUN python3 -m pip install --no-cache-dir --upgrade pip && \ diff --git a/docker-blast/Dockerfile-build-from-local-sources b/docker-blast/Dockerfile-build-from-local-sources index 86b67dd..6fccb08 100644 --- a/docker-blast/Dockerfile-build-from-local-sources +++ b/docker-blast/Dockerfile-build-from-local-sources @@ -18,7 +18,7 @@ # # Please cite NCBI in any work or product based on this material. -FROM ncbi/blast:latest as blast +FROM ncbi/blast:2.16.0 as blast ARG version LABEL Description="NCBI BLAST" Vendor="NCBI/NLM/NIH" Version=${version} Maintainer=camacho@ncbi.nlm.nih.gov diff --git a/docker-blast/Makefile b/docker-blast/Makefile index 7a6b28c..c9c7563 100644 --- a/docker-blast/Makefile +++ b/docker-blast/Makefile @@ -30,7 +30,7 @@ GCP_IMG?=gcr.io/ncbi-sandbox-blast/${IMG} AWS_SERVER?=public.ecr.aws/i6v3i0i9 AWS_IMG?=${AWS_SERVER}/elasticblast-elb AWS_REGION?=us-east-1 -VERSION?=1.3.1 +VERSION?=1.3.2 ifeq (, $(shell which vmtouch 2>/dev/null)) NOVMTOUCH?=--no-vmtouch diff --git a/docker-blast/README.md b/docker-blast/README.md index a63be5c..eb81eb4 100644 --- a/docker-blast/README.md +++ b/docker-blast/README.md @@ -8,3 +8,4 @@ various repositories. If you have `docker` available, run `make build` to build the image, and `make check` to test it locally. + diff --git a/docker-blast/test-docker-image-aws.yaml b/docker-blast/test-docker-image-aws.yaml index cf286ff..8209cf0 100644 --- a/docker-blast/test-docker-image-aws.yaml +++ b/docker-blast/test-docker-image-aws.yaml @@ -12,7 +12,9 @@ steps: - name: '${_IMG}' args: ['aws', 's3', '--no-sign-request', 'ls', 's3://ncbi-blast-databases/latest-dir'] - name: '${_IMG}' - args: ['gcloud', '--version'] + args: ['gsutil', '--version'] +- name: '${_IMG}' + args: ['gsutil', 'ls', 'gs://blast-db'] - name: '${_IMG}' args: ['printenv', 'BLASTDB', 'PATH'] - name: '${_IMG}' diff --git a/docker-blast/test-docker-image-gcp.yaml b/docker-blast/test-docker-image-gcp.yaml index d09c135..08f9edf 100644 --- a/docker-blast/test-docker-image-gcp.yaml +++ b/docker-blast/test-docker-image-gcp.yaml @@ -12,7 +12,9 @@ steps: - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' args: ['aws', 's3', '--no-sign-request', 'ls', 's3://ncbi-blast-databases/latest-dir'] - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' - args: ['gcloud', '--version'] + args: ['gsutil', '--version'] +- name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' + args: ['gsutil', 'ls', 'gs://blast-db'] - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' args: ['printenv', 'BLASTDB', 'PATH'] - name: 'gcr.io/${PROJECT_ID}/${_IMG}:${_TAG}' diff --git a/docker-janitor/Dockerfile-build-from-local-sources.gcp b/docker-janitor/Dockerfile-build-from-local-sources.gcp index d318790..3c9ef08 100644 --- a/docker-janitor/Dockerfile-build-from-local-sources.gcp +++ b/docker-janitor/Dockerfile-build-from-local-sources.gcp @@ -32,13 +32,12 @@ COPY elastic-blast-janitor.sh /usr/bin/ RUN chmod +x /usr/bin/elastic-blast-janitor.sh && \ apk -U upgrade && \ apk add --no-cache bash python3 py3-pip py3-wheel curl && \ - pip3 install --no-cache-dir --upgrade pip && \ - curl -LO https://storage.googleapis.com/kubernetes-release/release/`curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt`/bin/linux/amd64/kubectl && \ - chmod +x ./kubectl && \ - mv kubectl /usr/bin/ && \ + pip3 install --no-cache-dir --upgrade --break-system-packages pip && \ mkdir /var/elastic-blast && \ rm -rf /var/cache/apk/* +RUN gcloud -q components install kubectl + COPY src/ /var/elastic-blast/src/ COPY bin/ /var/elastic-blast/bin/ COPY requirements/ /var/elastic-blast/requirements/ @@ -47,6 +46,6 @@ COPY setup.cfg_cloud /var/elastic-blast/setup.cfg WORKDIR /var/elastic-blast -RUN pip3 install . -r requirements/test.txt +RUN pip3 install . -r requirements/test.txt --break-system-packages CMD ["/usr/bin/elastic-blast-janitor.sh"] diff --git a/docker-janitor/Dockerfile.gcp b/docker-janitor/Dockerfile.gcp index 0dc13ed..c00cb1b 100644 --- a/docker-janitor/Dockerfile.gcp +++ b/docker-janitor/Dockerfile.gcp @@ -33,8 +33,8 @@ COPY elastic-blast-janitor.sh /usr/bin/ RUN chmod +x /usr/bin/elastic-blast-janitor.sh && \ apk -U upgrade && \ apk add --no-cache bash python3 py3-pip py3-wheel curl && \ - pip3 install --no-cache-dir --upgrade pip && \ - pip3 install --no-cache-dir -r requirements.txt && rm -rf /var/cache/apk/* requirements.txt + pip3 install --no-cache-dir --upgrade --break-system-packages pip && \ + pip3 install --no-cache-dir -r requirements.txt --break-system-packages && rm -rf /var/cache/apk/* requirements.txt RUN gcloud -q components install kubectl diff --git a/docker-janitor/Makefile b/docker-janitor/Makefile index 627b27f..21ed57d 100644 --- a/docker-janitor/Makefile +++ b/docker-janitor/Makefile @@ -27,7 +27,8 @@ SHELL=/bin/bash .PHONY: all pre-check check clean build publish gcp-build gcp-check gcp-clean IMG?=ncbi/elasticblast-janitor -VERSION?=0.3.1 +VERSION?=0.3.2 +ELB_VERSION?=$(shell git describe --tags --abbrev=0) GCP_PROJECT?=$(shell gcloud config get-value project 2>/dev/null) GCP_TEST_BUCKET?=gs://elasticblast-test/query-split-run-test @@ -64,7 +65,7 @@ gcp-build: gcp-build-from-local-sources: rsync -a ../setup.py ../setup.cfg_cloud ../src ../bin ../requirements ${PWD}/ sed -i~ -e '/^value = $${VERSION}/d;' setup.cfg_cloud - echo "value = ${VERSION}" >> setup.cfg_cloud + echo "value = ${ELB_VERSION}" >> setup.cfg_cloud -gcloud builds submit --config cloudbuild.yaml --substitutions _VERSION=${VERSION},_IMG=${IMG},_DOCKERFILE='Dockerfile-build-from-local-sources.gcp' rm -fr src bin requirements setup.cfg_cloud setup.py diff --git a/docker-job-submit/Dockerfile-build-from-local-sources.aws b/docker-job-submit/Dockerfile-build-from-local-sources.aws index 7cc8d74..a27ecae 100644 --- a/docker-job-submit/Dockerfile-build-from-local-sources.aws +++ b/docker-job-submit/Dockerfile-build-from-local-sources.aws @@ -31,7 +31,7 @@ COPY submit_jobs.py /usr/bin/ RUN chmod +x /usr/bin/submit_jobs.py && \ apk -U upgrade && \ apk add --no-cache bash python3 py3-pip py3-wheel && \ - pip3 install --no-cache-dir --upgrade pip && \ + pip3 install --no-cache-dir --upgrade --break-system-packages pip && \ mkdir -p /var/elastic-blast && \ rm -rf /var/cache/apk/* @@ -43,6 +43,6 @@ COPY setup.cfg_cloud /var/elastic-blast/setup.cfg WORKDIR /var/elastic-blast -RUN pip3 install . -r requirements/base.txt +RUN pip3 install . -r requirements/base.txt --break-system-packages CMD ["/usr/bin/submit_jobs.py", "--help"] diff --git a/docker-job-submit/Dockerfile.aws b/docker-job-submit/Dockerfile.aws index e6c1dd0..4eb8334 100644 --- a/docker-job-submit/Dockerfile.aws +++ b/docker-job-submit/Dockerfile.aws @@ -27,8 +27,8 @@ COPY submit_jobs.py /usr/bin/ RUN chmod +x /usr/bin/submit_jobs.py && \ apk -U upgrade && \ apk add --no-cache bash python3 py3-pip py3-wheel curl unzip && \ - pip3 install --no-cache-dir --upgrade pip && \ - pip3 install --no-cache-dir -r requirements.txt && rm -rf /var/cache/apk/* requirements.txt + pip3 install --no-cache-dir --upgrade --break-system-packages pip && \ + pip3 install --no-cache-dir -r requirements.txt --break-system-packages && rm -rf /var/cache/apk/* requirements.txt LABEL Description="NCBI ElasticBLAST Cloud Job Submission Module" LABEL Version=${version} diff --git a/docker-job-submit/Makefile b/docker-job-submit/Makefile index 84df9c6..b9c1cfe 100644 --- a/docker-job-submit/Makefile +++ b/docker-job-submit/Makefile @@ -28,7 +28,8 @@ SHELL=/bin/bash .PHONY: all pre-check check clean build publish gcp-build gcp-check gcp-clean IMG?=ncbi/elasticblast-job-submit -VERSION?=4.0.2 +VERSION?=4.0.3 +ELB_VERSION?=$(shell git describe --tags --abbrev=0) GCP_PROJECT?=$(shell gcloud config get-value project 2>/dev/null) GCP_TEST_BUCKET?=gs://elasticblast-test/cloud-job-submission AWS_REGION?=us-east-1 @@ -73,7 +74,7 @@ aws-build: aws-build-from-local-sources: rsync -a ../setup.py ../setup.cfg_cloud ../src ../bin ../requirements ${PWD}/ sed -i~ -e '/^value = $${VERSION}/d;' setup.cfg_cloud - echo "value = ${VERSION}" >> setup.cfg_cloud + echo "value = ${ELB_VERSION}" >> setup.cfg_cloud -gcloud builds submit --config awscloudbuild.yaml --substitutions _SERVER=${AWS_SERVER},TAG_NAME=${VERSION},_IMG=${AWS_IMG},_DOCKERFILE=Dockerfile-build-from-local-sources.aws,_AWS_ECR_PASSWD="`aws ecr-public get-login-password --region ${AWS_REGION}`" rm -fr src bin requirements setup.cfg_cloud setup.py diff --git a/docker-job-submit/cloud-job-submit.sh b/docker-job-submit/cloud-job-submit.sh index 8d2bccb..cfaa85d 100755 --- a/docker-job-submit/cloud-job-submit.sh +++ b/docker-job-submit/cloud-job-submit.sh @@ -201,7 +201,10 @@ done # label the new persistent disk export pv=$(${KUBECTL} get -f pvc-rom.yaml -o jsonpath='{.spec.volumeName}') -jq -n --arg dd $pv '[$dd]' | gsutil cp - ${ELB_RESULTS}/${ELB_METADATA_DIR}/$ELB_DISK_ID_FILE +export vs=snapshot-$(${KUBECTL} get -f /templates/volume-snapshot.yaml -o jsonpath='{.metadata.uid}') +echo "PV: $pv" +echo "Volume snapshot: $vs" +jq -n --arg dd $pv --arg ss $vs '{"disks": [$dd], "snapshots": [$ss]}' | gsutil -qm cp - ${ELB_RESULTS}/${ELB_METADATA_DIR}/$ELB_DISK_ID_FILE gcloud compute disks update $pv --update-labels ${ELB_LABELS} --zone ${ELB_GCP_ZONE} --project ${ELB_GCP_PROJECT} # delete snapshot @@ -214,6 +217,6 @@ if gcloud compute disks describe $pv_rwo --zone $ELB_GCP_ZONE ; then sleep 10 if gcloud compute disks describe $pv_rwo --zone $ELB_GCP_ZONE ; then - jq -n --arg d1 $pv_rwo --arg d2 $pv '[d1, d2]' | gsutil cp - ${ELB_RESULTS}/${ELB_METADATA_DIR}/$ELB_DISK_ID_FILE + jq -n --arg d1 $pv_rwo --arg d2 $pv --arg ss $vs '{"disks": [$d1, $d2], "snapshots": [$ss]}' | gsutil -qm cp - ${ELB_RESULTS}/${ELB_METADATA_DIR}/$ELB_DISK_ID_FILE fi fi diff --git a/docker-qs/Dockerfile b/docker-qs/Dockerfile index f62ea9d..6d86803 100644 --- a/docker-qs/Dockerfile +++ b/docker-qs/Dockerfile @@ -28,8 +28,8 @@ COPY run.sh /usr/bin/ RUN chmod +x /usr/bin/run.sh && \ apk -U upgrade && \ apk add --no-cache bash python3 py3-pip py3-wheel curl unzip && \ - pip3 install --no-cache-dir --upgrade pip && \ - pip3 install --no-cache-dir -r requirements.txt && rm -rf /var/cache/apk/* requirements.txt + pip3 install --no-cache-dir --upgrade pip --break-system-packages && \ + pip3 install --no-cache-dir -r requirements.txt --break-system-packages && rm -rf /var/cache/apk/* requirements.txt LABEL Description="NCBI ElasticBLAST Query Splitting Module" LABEL Version=${version} diff --git a/docker-qs/Dockerfile-build-from-local-sources b/docker-qs/Dockerfile-build-from-local-sources index 82d552a..2fad12c 100644 --- a/docker-qs/Dockerfile-build-from-local-sources +++ b/docker-qs/Dockerfile-build-from-local-sources @@ -34,8 +34,8 @@ COPY run.sh /usr/bin/ RUN chmod +x /usr/bin/run.sh && \ apk -U upgrade && \ apk add --no-cache bash python3 py3-pip py3-wheel && \ - pip3 install --no-cache-dir --upgrade pip && \ - pip3 install --no-cache-dir -r requirements.txt && \ + pip3 install --no-cache-dir --upgrade pip --break-system-packages && \ + pip3 install --no-cache-dir -r requirements.txt --break-system-packages && \ mkdir -p /var/elastic-blast && \ rm -rf /var/cache/apk/* requirements.txt diff --git a/docker-qs/Makefile b/docker-qs/Makefile index 045538f..c0a05e6 100644 --- a/docker-qs/Makefile +++ b/docker-qs/Makefile @@ -29,6 +29,7 @@ SHELL=/bin/bash IMG?=ncbi/elasticblast-query-split VERSION?=0.1.4 +ELB_VERSION=?=$(shell git describe --tags --abbrev=0) GCP_PROJECT?=$(shell gcloud config get-value project 2>/dev/null) GCP_TEST_BUCKET?=gs://elasticblast-test/query-split-run-test AWS_REGION?=us-east-1 @@ -84,7 +85,7 @@ aws-build: aws-build-from-local-sources: rsync -a ../setup.py ../setup.cfg_cloud ../src ../bin ../requirements ${PWD}/ sed -i~ -e '/^value = $${VERSION}/d;' setup.cfg_cloud - echo "value = ${VERSION}" >> setup.cfg_cloud + echo "value = ${ELB_VERSION}" >> setup.cfg_cloud gcloud builds submit --config awscloudbuild.yaml --substitutions _SERVER=${AWS_SERVER},TAG_NAME=${VERSION},_IMG=${AWS_IMG},_DOCKERFILE=Dockerfile-build-from-local-sources,_AWS_ECR_PASSWD="`aws ecr-public get-login-password --region ${AWS_REGION}`" rm -fr src bin requirements setup.cfg_cloud setup.py diff --git a/docker-qs/README.md b/docker-qs/README.md index bdc45b2..ccc7954 100644 --- a/docker-qs/README.md +++ b/docker-qs/README.md @@ -10,3 +10,5 @@ various repositories. If you have `docker` available, run `make build` to build the image, and `make check` to test it locally. + +You will need credentials for NCBI-AWS-ELASTICBLAST-OPERATIONS to push to AWS ECR, see *To get NCBI AWS credentials in NCBI-AWS-ELASTICBLAST-OPERATIONS* in [README-ncbi.md](../README-ncbi.md). diff --git a/requirements/base.txt b/requirements/base.txt index 8a8362b..42d239d 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,11 +1,10 @@ -wheel==0.38.4 +wheel==0.43.0 setuptools==70.0.0 -importlib-resources==5.10.2 -importlib-metadata==6.0.0 -pex==2.1.152 -boto3==1.29.1 -botocore==1.32.1 +importlib-resources==6.1.1 +importlib-metadata==7.0.0 +pex==2.9.0 +boto3==1.34.141 +botocore==1.34.141 awslimitchecker==12.0.0 -tenacity==8.2.3 -dataclasses-json==0.6.2 -types-pkg-resources==0.1.3 +tenacity==8.5.0 +dataclasses-json==0.6.7 diff --git a/requirements/test.txt b/requirements/test.txt index c54000c..667a266 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1,14 +1,14 @@ -r base.txt -pytest==7.4.3 -pytest-cov==4.1.0 -pytest-mock==3.12.0 +pytest==8.2.2 +pytest-cov==5.0.0 +pytest-mock==3.14.0 teamcity-messages==1.32 -mypy==1.7.0 +mypy==1.10.1 pylint==2.7.4 tox==4.4.12 -virtualenv==20.21.0 -yamllint==1.33.0 -moto==4.2.8 -docker==6.1.3 -cfn-lint==0.83.3 +virtualenv==20.24.5 +yamllint==1.35.1 +moto==4.2.14 +docker==7.1.0 +cfn-lint==1.5.1 diff --git a/setup.cfg b/setup.cfg index 9a85dea..c9d1f41 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = elastic_blast -description = ElasticBLAST is a cloud-based tool to perform your BLAST searches faster and make you more effective +description = ElasticBLAST speeds up your work by distributing your BLAST+ searches across multiple cloud instances. The ability to scale resources in this way allows larger numbers of queries to be searched in a shorter time than you could with BLAST+ on a single host. Use of the cloud also allows you to collaborate with colleagues, allowing the sharing of results, datasets and pipelines on a common platform. The National Center for Biotechnology Information ([NCBI](https://www.ncbi.nlm.nih.gov)), part of the National Library of Medicine at the NIH, developed and maintains ElasticBLAST. long_description = file:README.md long_description_content_type = text/markdown maintainer = NCBI diff --git a/src/elastic_blast/aws_traits.py b/src/elastic_blast/aws_traits.py index e171052..360a338 100644 --- a/src/elastic_blast/aws_traits.py +++ b/src/elastic_blast/aws_traits.py @@ -31,7 +31,7 @@ from .util import UserReportError, check_aws_region_for_invalid_characters from .base import InstanceProperties, PositiveInteger, MemoryStr from .constants import ELB_DFLT_AWS_REGION, INPUT_ERROR, PERMISSIONS_ERROR -from .constants import DEPENDENCY_ERROR +from .constants import DEPENDENCY_ERROR, ELB_DFLT_AWS_REGION def create_aws_config(region: Optional[str] = None) -> Config: @@ -114,7 +114,8 @@ def get_instance_type_offerings(region: str) -> List[str]: def get_suitable_instance_types(min_memory: MemoryStr, min_cpus: PositiveInteger, - instance_types: Optional[List[str]] = None) -> List[Any]: + instance_types: Optional[List[str]] = None, + region: str = ELB_DFLT_AWS_REGION) -> List[Any]: """Get a list of instance type descriptions with at least min_memory and number of CPUs @@ -126,7 +127,7 @@ def get_suitable_instance_types(min_memory: MemoryStr, Returns: A list of instance type descriptions for instance types that satisfy the above constraints""" - ec2 = boto3.client('ec2') + ec2 = boto3.client('ec2', region_name=region) # select only 64-bit CPUs filters = [{'Name': 'processor-info.supported-architecture', diff --git a/src/elastic_blast/commands/submit.py b/src/elastic_blast/commands/submit.py index a7cb305..fe11db3 100755 --- a/src/elastic_blast/commands/submit.py +++ b/src/elastic_blast/commands/submit.py @@ -41,8 +41,9 @@ from elastic_blast.split import FASTAReader from elastic_blast.gcp import check_cluster as gcp_check_cluster from elastic_blast.gcp_traits import get_machine_properties -from elastic_blast.util import get_blastdb_size, UserReportError +from elastic_blast.util import check_user_provided_blastdb_exists, UserReportError from elastic_blast.util import get_resubmission_error_msg +from elastic_blast.util import ElbSupportedPrograms from elastic_blast.constants import ELB_AWS_JOB_IDS, ELB_METADATA_DIR, ELB_STATE_DISK_ID_FILE, QuerySplitMode from elastic_blast.constants import ELB_QUERY_BATCH_DIR, BLASTDB_ERROR, INPUT_ERROR from elastic_blast.constants import PERMISSIONS_ERROR, CLUSTER_ERROR, CSP, QUERY_LIST_EXT @@ -118,7 +119,7 @@ def submit(args, cfg, clean_up_stack): cfg.validate(ElbCommand.SUBMIT, dry_run) # For now, checking resources is only implemented for AWS - if cfg.cloud_provider.cloud == CSP.AWS: + if cfg.cloud_provider.cloud == CSP.AWS and os.getenv('TEAMCITY_VERSION') is None: check_resource_quotas(cfg) if check_running_cluster(cfg): @@ -151,7 +152,7 @@ def submit(args, cfg, clean_up_stack): # check database availability gcp_prj = None if cfg.cloud_provider.cloud == CSP.AWS else cfg.gcp.get_project_for_gcs_downloads() try: - get_blastdb_size(cfg.blast.db, cfg.cluster.db_source, gcp_prj) + check_user_provided_blastdb_exists(cfg.blast.db, ElbSupportedPrograms().get_db_mol_type(cfg.blast.program), cfg.cluster.db_source, gcp_prj) except ValueError as err: raise UserReportError(returncode=BLASTDB_ERROR, message=str(err)) diff --git a/src/elastic_blast/constants.py b/src/elastic_blast/constants.py index 99df836..19533bb 100644 --- a/src/elastic_blast/constants.py +++ b/src/elastic_blast/constants.py @@ -210,10 +210,10 @@ def __str__(self): ELB_DFLT_AWS_REGION = 'us-east-1' ELB_UNKNOWN_GCP_PROJECT = 'elb-unknown-gcp-project' -ELB_DOCKER_VERSION = '1.3.1' # ElasticBLAST 1.2.0 uses BLAST+ 2.15.0 +ELB_DOCKER_VERSION = '1.3.2' # ElasticBLAST 1.3.0 uses BLAST+ 2.16.0 ELB_QS_DOCKER_VERSION = '0.1.4' -ELB_JANITOR_DOCKER_VERSION = '0.3.1' -ELB_JOB_SUBMIT_DOCKER_VERSION = '4.0.2' +ELB_JANITOR_DOCKER_VERSION = '0.3.2' +ELB_JOB_SUBMIT_DOCKER_VERSION = '4.0.3' ELB_DOCKER_IMAGE_GCP = f'gcr.io/ncbi-sandbox-blast/ncbi/elb:{ELB_DOCKER_VERSION}' ELB_DOCKER_IMAGE_AWS = f'public.ecr.aws/ncbi-elasticblast/elasticblast-elb:{ELB_DOCKER_VERSION}' diff --git a/src/elastic_blast/elb_config.py b/src/elastic_blast/elb_config.py index 96a6e02..0dd4bc3 100644 --- a/src/elastic_blast/elb_config.py +++ b/src/elastic_blast/elb_config.py @@ -29,6 +29,7 @@ from dataclasses import dataclass from dataclasses import InitVar, field, fields, asdict from dataclasses_json import dataclass_json, LetterCase, config +from dataclasses_json import DataClassJsonMixin import getpass from hashlib import md5 import configparser @@ -531,13 +532,25 @@ class TimeoutsConfig(ConfigParserToDataclassMapper): 'blast_k8s': ParamInfo(CFG_TIMEOUTS, CFG_TIMEOUT_BLAST_K8S_JOB)} +@dataclass +class ResourceIds(DataClassJsonMixin): + """Cloud resource ids""" + # persistent disk ids + disks: List[str] = field(default_factory = list, + metadata = config(letter_case=LetterCase.KEBAB)) + # volume snapshot ids + snapshots: List[str] = field(default_factory = list, + metadata = config(letter_case=LetterCase.KEBAB)) + + @dataclass_json(letter_case=LetterCase.KEBAB) @dataclass class AppState: """Application state values""" - # The GCP persistent disk ID - disk_ids: List[str] = field(default_factory=list) + # The GCP persistent disk and volume snapshot IDs + resources: ResourceIds = field(default_factory = lambda: ResourceIds()) + # The kubernetes context k8s_ctx: Optional[str] = None @@ -682,7 +695,7 @@ def __init__(self, *args, **kwargs): raise UserReportError(returncode=BLASTDB_ERROR, message=f'Metadata for BLAST database "{self.blast.db}" was not found. Please, make sure that the database exists and database molecular type corresponds to your blast program: "{self.blast.program}". To get a list of NCBI provided databases, please see https://github.com/ncbi/blast_plus_docs#blast-databases.') else: - logging.warning('Database metadata file was not provided. We recommend creating and providing a BLAST database metadata file. Benefits include better elastic-blast performance and error checking. Please, see https://blast.ncbi.nlm.nih.gov/doc/elastic-blast/tutorials/create-blastdb-metadata.html for more information and instructions.') + logging.warning(f'The BLAST database at {self.blast.db} does not have a metadata file. We recommend creating and providing a BLAST database metadata file. Benefits include better elastic-blast performance and error checking. Please, see https://www.ncbi.nlm.nih.gov/books/NBK569839/#_usrman_BLAST_feat_BLAST_database_metadat_ and https://blast.ncbi.nlm.nih.gov/doc/elastic-blast/tutorials/create-blastdb-metadata.html for more information and instructions.') # set mt_mode if self.blast: diff --git a/src/elastic_blast/gcp.py b/src/elastic_blast/gcp.py index e53fd07..b22f9da 100644 --- a/src/elastic_blast/gcp.py +++ b/src/elastic_blast/gcp.py @@ -32,6 +32,7 @@ import time import logging import json +import shutil from timeit import default_timer as timer from typing import Any, DefaultDict, Dict, Optional, List, Tuple import uuid @@ -46,6 +47,7 @@ from .jobs import read_job_template, write_job_files from .util import ElbSupportedPrograms, safe_exec, UserReportError, SafeExecError from .util import validate_gcp_disk_name, get_blastdb_info, get_usage_reporting +from .util import is_newer_version from . import kubernetes from .constants import CLUSTER_ERROR, ELB_NUM_JOBS_SUBMITTED, ELB_METADATA_DIR, K8S_JOB_SUBMIT_JOBS @@ -60,7 +62,7 @@ from .constants import GKE_CLUSTER_STATUS_RUNNING, GKE_CLUSTER_STATUS_RUNNING_WITH_ERROR from .constants import GKE_CLUSTER_STATUS_STOPPING, GKE_CLUSTER_STATUS_ERROR from .constants import STATUS_MESSAGE_ERROR -from .elb_config import ElasticBlastConfig +from .elb_config import ElasticBlastConfig, ResourceIds from .elasticblast import ElasticBlast from .gcp_traits import enable_gcp_api from . import VERSION @@ -183,11 +185,11 @@ def submit(self, query_batches: List[str], query_length, one_stage_cloud_query_s # save persistent disk id disk_ids = kubernetes.get_persistent_disks(self.cfg.appstate.k8s_ctx) logging.debug(f'New persistent disk id: {disk_ids}') - self.cfg.appstate.disk_ids += disk_ids + self.cfg.appstate.resources.disks += disk_ids dest = os.path.join(self.cfg.cluster.results, ELB_METADATA_DIR, ELB_STATE_DISK_ID_FILE) with open_for_write_immediate(dest) as f: - f.write(json.dumps(self.cfg.appstate.disk_ids)) + f.write(self.cfg.appstate.resources.to_json()) kubernetes.label_persistent_disk(self.cfg, 'blast-dbs-pvc') kubernetes.delete_volume_snapshots(self.cfg.appstate.k8s_ctx) @@ -531,6 +533,26 @@ def get_disks(cfg: ElasticBlastConfig, dry_run: bool = False) -> List[str]: return [i['name'] for i in disks] +def get_snapshots(cfg: ElasticBlastConfig, dry_run: bool = False) -> List[str]: + """Return a list of volume snapshot names in the current GCP project. + Raises: + util.SafeExecError on problems with command line gcloud, + RuntimeError when gcloud results cannot be parsed""" + cmd = f'gcloud compute snapshots list --format json --project {cfg.gcp.project}' + if dry_run: + logging.info(cmd) + return list() + + p = safe_exec(cmd) + try: + snapshots = json.loads(p.stdout.decode()) + except Exception as err: + raise RuntimeError('Error when parsing listing of GCP snapshots' + str(err)) + if snapshots is None: + raise RuntimeError('Improperly read gcloud disk listing') + return [i['name'] for i in snapshots] + + def delete_disk(name: str, cfg: ElasticBlastConfig) -> None: """Delete a persistent disk. @@ -549,13 +571,32 @@ def delete_disk(name: str, cfg: ElasticBlastConfig) -> None: safe_exec(cmd) +def delete_snapshot(name: str, cfg: ElasticBlastConfig) -> None: + """Delete a volume snapshot. + + Arguments: + name: Volume snapshot name + cfg: Application config + + Raises: + util.SafeExecError on problems with command line tools + ValueError if disk name is empty""" + if not name: + raise ValueError('No disk name provided') + if not cfg: + raise ValueError('No application config provided') + cmd = f'gcloud compute snaphots delete -q {name} --project {cfg.gcp.project} --zone {cfg.gcp.zone}' + safe_exec(cmd) + + @retry(reraise=True, stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=10)) # type: ignore -def _get_pd_id(cfg: ElasticBlastConfig) -> List[str]: +def _get_resource_ids(cfg: ElasticBlastConfig) -> ResourceIds: """ Try to get the GCP persistent disk ID from elastic-blast records""" - retval = list() - if cfg.appstate.disk_ids: - retval = cfg.appstate.disk_ids - logging.debug(f'GCP disk ID {retval}') + retval = ResourceIds() + if cfg.appstate.resources.disks and cfg.appstate.resources.snapshots: + retval = cfg.appstate.resources + logging.debug(f'GCP disk ID {retval.disks}') + logging.debug(f'GCP volume snapshot ID {retval.snapshots}') # no need to get disk id from GS if we already have it return retval @@ -570,25 +611,23 @@ def _get_pd_id(cfg: ElasticBlastConfig) -> List[str]: cmd = f'gsutil -q cat {disk_id_on_gcs}' try: p = safe_exec(cmd) - gcp_disk_ids = json.loads(p.stdout.decode()) + retval = ResourceIds.from_json(p.stdout.decode()) + err = p.stderr.decode() - if gcp_disk_ids: - logging.debug(f"Retrieved GCP disk IDs {gcp_disk_ids} from {disk_id_on_gcs}") + if retval.disks or retval.snapshots: + logging.debug(f"Retrieved GCP resource IDs {retval} from {disk_id_on_gcs}") try: - for disk_id in gcp_disk_ids: + for disk_id in retval.disks: validate_gcp_disk_name(disk_id) except ValueError: logging.error(f'GCP disk ID "{disk_id}" retrieved from {disk_id_on_gcs} is invalid.') - gcp_disk_id = '' - else: - retval += gcp_disk_ids else: raise RuntimeError('Persistent disk id stored in GS is empty') except Exception as e: logging.error(f'Unable to read {disk_id_on_gcs}: {e}') raise - logging.debug(f'Fetched disk IDs {retval}') + logging.debug(f'Fetched resource IDs {retval}') return retval @@ -601,12 +640,16 @@ def delete_cluster_with_cleanup(cfg: ElasticBlastConfig) -> None: dry_run = cfg.cluster.dry_run try_kubernetes = True pds = [] + snapshots = [] try: - pds = _get_pd_id(cfg) # type: ignore + resources = _get_resource_ids(cfg) + pds = resources.disks + snapshots = resources.snapshots except Exception as e: logging.error(f'Unable to read disk id from GS: {e}') else: logging.debug(f'PD id {" ".join(pds)}') + logging.debug(f'Snapshot id {" ".join(snapshots)}') # determine the course of action based on cluster status while True: @@ -675,6 +718,12 @@ def delete_cluster_with_cleanup(cfg: ElasticBlastConfig) -> None: except Exception as e: logging.warning(f'kubernetes.get_persistent_disks failed.\tDetails: {e}') + try: + # get cluster's volume snapshots in case they leak + snapshots = kubernetes.get_volume_snapshots(k8s_ctx, dry_run) + except Exception as e: + logging.warning(f'kubernetes.get_volume_snapshots failed.\tDetails: {e}') + try: # delete all k8s jobs, persistent volumes and volume claims # this should delete persistent disks @@ -686,6 +735,13 @@ def delete_cluster_with_cleanup(cfg: ElasticBlastConfig) -> None: logging.debug(f'PD {i} still present after deleting k8s jobs and PVCs') else: logging.debug(f'PD {i} was deleted by deleting k8s PVC') + + all_snapshots = get_snapshots(cfg, dry_run) + for i in snapshots: + if i in all_snapshots: + logging.debug(f'Snapshot {i} still present after deleting k8s jobs and volume snapshots') + else: + logging.debug(f'Snapshot {i} was deleted by deleting k8s volume snapshots') except Exception as e: # nothing to do the above fails, the code below will take care of # persistent disk leak @@ -700,6 +756,11 @@ def delete_cluster_with_cleanup(cfg: ElasticBlastConfig) -> None: if i in disks: logging.debug(f'PD {i} still present after cluster deletion, deleting again') delete_disk(i, cfg) + all_snapshots = get_snapshots(cfg, dry_run) + for i in snapshots: + if i in all_snapshots: + logging.debug(f'Snapshot {i} still present after cluster deletion, deleting again') + delete_snapshot(i, cfg) except Exception as e: logging.error(getattr(e, 'message', repr(e))) # if the above failed, try deleting each disk unconditionally to @@ -709,6 +770,11 @@ def delete_cluster_with_cleanup(cfg: ElasticBlastConfig) -> None: delete_disk(i, cfg) except Exception as e: logging.error(getattr(e, 'message', repr(e))) + for i in snapshots: + try: + delete_snapshot(i, cfg) + except Exception as e: + logging.error(getattr(e, 'message', repr(e))) finally: disks = get_disks(cfg, dry_run) for i in pds: @@ -719,6 +785,16 @@ def delete_cluster_with_cleanup(cfg: ElasticBlastConfig) -> None: f'gcloud compute disks list --project {cfg.gcp.project} | grep {i}\n' \ f'and delete it with:\ngcloud compute disks delete {i} --project {cfg.gcp.project} --zone {cfg.gcp.zone}' logging.error(msg) + + all_snapshots = get_snapshots(cfg, dry_run) + for i in snapshots: + if i in all_snapshots: + msg = f'ElasticBLAST was not able to delete volume snapshot "{i}". ' \ + 'Leaving it may cause additional charges from the cloud provider. ' \ + 'You can verify that the disk still exists using this command:\n' \ + f'gcloud compute disks snapshots --project {cfg.gcp.project} | grep {i}\n' \ + f'and delete it with:\ngcloud compute snapshots delete {i} --project {cfg.gcp.project} --zone {cfg.gcp.zone}' + logging.error(msg) # Remove the exception for now, as we want to delete the cluster always! #raise UserReportError(returncode=CLUSTER_ERROR, msg) @@ -915,7 +991,7 @@ def delete_cluster(cfg: ElasticBlastConfig): def check_prerequisites() -> None: - """ Check that necessary tools, gcloud, gsutil, and kubectl + """ Check that necessary tools, gcloud, gsutil, gke-gcloud-auth-plugin and kubectl are available if necessary. If execution of one of these tools is unsuccessful it will throw UserReportError exception.""" @@ -928,12 +1004,21 @@ def check_prerequisites() -> None: try: # client=true prevents kubectl from addressing server which can be down at the moment - p = safe_exec('kubectl version --client=true') + p = safe_exec('kubectl version --output=json --client=true') except SafeExecError as e: message = f"Required pre-requisite 'kubectl' doesn't work, check Kubernetes installation.\nDetails: {e.message}" raise UserReportError(DEPENDENCY_ERROR, message) logging.debug(f'{":".join(p.stdout.decode().split())}') + version_data = json.loads(p.stdout.decode()) + kubectl_version = version_data["clientVersion"]["major"] + "." + kubectl_version += version_data["clientVersion"]["minor"] + if is_newer_version(kubectl_version, "1.25") and shutil.which("gke-gcloud-auth-plugin") is None: + message = f"Missing dependency 'gke-gcloud-auth-plugin', " + message += "for more information, please see " + message += "https://cloud.google.com/blog/products/containers-kubernetes/kubectl-auth-changes-in-gke" + raise UserReportError(DEPENDENCY_ERROR, message) + # Check we have gsutil available try: p = safe_exec('gsutil --version') diff --git a/src/elastic_blast/jobs.py b/src/elastic_blast/jobs.py index 38dbc53..52885e9 100644 --- a/src/elastic_blast/jobs.py +++ b/src/elastic_blast/jobs.py @@ -29,7 +29,7 @@ import os import re from typing import List -from pkg_resources import resource_string +from importlib_resources import files from typing import Optional from .filehelper import open_for_read, open_for_write @@ -50,7 +50,8 @@ def read_job_template(template_name=ELB_DFLT_BLAST_JOB_TEMPLATE, cfg: Optional[E resource_prefix_len = len(resource_prefix) if template_name[:resource_prefix_len] == resource_prefix: template_name = template_name[resource_prefix_len:] - return resource_string('elastic_blast', template_name).decode() + ref = files('elastic_blast').joinpath(template_name) + return ref.read_text() with open_for_read(template_name) as f: return f.read() diff --git a/src/elastic_blast/kubernetes.py b/src/elastic_blast/kubernetes.py index d4d11d7..6abb3db 100644 --- a/src/elastic_blast/kubernetes.py +++ b/src/elastic_blast/kubernetes.py @@ -30,7 +30,7 @@ import time from tenacity import retry, stop_after_delay, stop_after_attempt, wait_random from timeit import default_timer as timer -from pkg_resources import resource_string, resource_filename, set_extraction_path +from importlib_resources import files, as_file from tempfile import TemporaryDirectory from typing import List, Optional @@ -117,6 +117,24 @@ def get_persistent_disks(k8s_ctx: str, dry_run: bool = False) -> List[str]: return list() +def get_volume_snapshots(k8s_ctx: str, dry_run: bool = False) -> List[str]: + """Return a list of volume snapshot ids for a kubernetes cluster. + Kubeconfig file determines the cluster that will be contacted. + + Raises: + util.SafeExecError on problems communicating with the cluster + json.decoder.JSONDecodeError on problems with parsing kubectl json output""" + cmd = f'kubectl --context={k8s_ctx} get volumesnapshot -o json' + if dry_run: + logging.info(cmd) + else: + p = safe_exec(cmd) + if p.stdout: + pds = json.loads(p.stdout.decode()) + return [f"snapshot-{i['metadata']['uid']}" for i in pds['items']] + return list() + + @retry( stop=(stop_after_delay(ELB_K8S_JOB_SUBMISSION_TIMEOUT) | stop_after_attempt(ELB_K8S_JOB_SUBMISSION_MAX_RETRIES)), wait=wait_random(min=ELB_K8S_JOB_SUBMISSION_MIN_WAIT, max=ELB_K8S_JOB_SUBMISSION_MAX_WAIT)) # type: ignore def submit_jobs_with_retries(k8s_ctx: str, path: pathlib.Path, dry_run=False) -> List[str]: """ Retry kubernetes job submissions with the parameters specified in the decorator """ @@ -482,8 +500,8 @@ def initialize_local_ssd(cfg: ElasticBlastConfig, query_files: List[str] = [], w 'TIMEOUT': str(init_blastdb_minutes_timeout*60) } with TemporaryDirectory() as d: - set_extraction_path(d) - job_cloud_split_local_ssd_tmpl = resource_string('elastic_blast', f'templates/{job_template}').decode() + ref = files('elastic_blast').joinpath(f'templates/{job_template}') + job_cloud_split_local_ssd_tmpl = ref.read_text() job_cloud_split_local_ssd = pathlib.Path(os.path.join(d, 'job-cloud-split-local-ssd.yaml')) with job_cloud_split_local_ssd.open(mode='wt') as f: f.write(substitute_params(job_cloud_split_local_ssd_tmpl, subs)) @@ -521,10 +539,10 @@ def initialize_local_ssd(cfg: ElasticBlastConfig, query_files: List[str] = [], w } logging.debug(f"Initializing local SSD: {ELB_DOCKER_IMAGE_GCP}") with TemporaryDirectory() as d: - set_extraction_path(d) start = timer() - job_init_local_ssd_tmpl = resource_string('elastic_blast', f'templates/{job_init_template}').decode() + ref = files('elastic_blast').joinpath(f'templates/{job_init_template}') + job_init_local_ssd_tmpl = ref.read_text() for n in range(num_nodes): job_init_local_ssd = pathlib.Path(os.path.join(d, f'job-init-local-ssd-{n}.yaml')) subs['NODE_ORDINAL'] = str(n) @@ -659,17 +677,18 @@ def initialize_persistent_disk(cfg: ElasticBlastConfig, query_files: List[str] = logging.debug(f"Initializing persistent volume: {ELB_DOCKER_IMAGE_GCP} {ELB_QS_DOCKER_IMAGE_GCP}") with TemporaryDirectory() as d: - set_extraction_path(d) - storage_gcp = resource_filename('elastic_blast', 'templates/storage-gcp-ssd.yaml') - cmd = f"kubectl --context={k8s_ctx} apply -f {storage_gcp}" - if dry_run: - logging.info(cmd) - else: - safe_exec(cmd) + ref = files('elastic_blast') / 'templates/storage-gcp-ssd.yaml' + with as_file(ref) as storage_gcp: + cmd = f"kubectl --context={k8s_ctx} apply -f {storage_gcp}" + if dry_run: + logging.info(cmd) + else: + safe_exec(cmd) pvc_yaml = os.path.join(d, 'pvc-rwo.yaml') with open(pvc_yaml, 'wt') as f: - f.write(substitute_params(resource_string('elastic_blast', 'templates/pvc-rwo.yaml.template').decode(), subs)) + ref = files('elastic_blast').joinpath('templates/pvc-rwo.yaml.template') + f.write(substitute_params(ref.read_text(), subs)) cmd = f"kubectl --context={k8s_ctx} apply -f {pvc_yaml}" if dry_run: logging.info(cmd) @@ -679,7 +698,8 @@ def initialize_persistent_disk(cfg: ElasticBlastConfig, query_files: List[str] = start = timer() job_init_pv = pathlib.Path(os.path.join(d, 'job-init-pv.yaml')) with job_init_pv.open(mode='wt') as f: - f.write(substitute_params(resource_string('elastic_blast', f'templates/{job_init_pv_template}').decode(), subs)) + ref = files('elastic_blast').joinpath(f'templates/{job_init_pv_template}') + f.write(substitute_params(ref.read_text(), subs)) cmd = f"kubectl --context={k8s_ctx} apply -f {job_init_pv}" if dry_run: logging.info(cmd) @@ -694,13 +714,14 @@ def initialize_persistent_disk(cfg: ElasticBlastConfig, query_files: List[str] = disks = get_persistent_disks(k8s_ctx, dry_run) if disks: logging.debug(f'GCP disk IDs {disks}') - cfg.appstate.disk_ids += disks + cfg.appstate.resources.disks += disks dest = os.path.join(cfg.cluster.results, ELB_METADATA_DIR, ELB_STATE_DISK_ID_FILE) with open_for_write_immediate(dest) as f: - f.write(json.dumps(cfg.appstate.disk_ids)) + f.write(cfg.appstate.resources.to_json()) elif not dry_run: logging.error('Failed to get disk ID') + if wait != ElbExecutionMode.WAIT: return @@ -730,25 +751,37 @@ def initialize_persistent_disk(cfg: ElasticBlastConfig, query_files: List[str] = # PVC snapshot logging.debug('Creating PVC snapshot') start = timer() - snapshot_class = resource_filename('elastic_blast', 'templates/volume-snapshot-class.yaml') - cmd = f"kubectl --context={k8s_ctx} apply -f {snapshot_class}" - if dry_run: - logging.info(cmd) - else: - safe_exec(cmd) + ref = files('elastic_blast') / 'templates/volume-snapshot-class.yaml' + with as_file(ref) as snapshot_class: + cmd = f"kubectl --context={k8s_ctx} apply -f {snapshot_class}" + if dry_run: + logging.info(cmd) + else: + safe_exec(cmd) - snapshot = resource_filename('elastic_blast', 'templates/volume-snapshot.yaml') - cmd = f"kubectl --context={k8s_ctx} apply -f {snapshot}" - if dry_run: - logging.info(cmd) - else: - safe_exec(cmd) + ref = files('elastic_blast') / 'templates/volume-snapshot.yaml' + with as_file(ref) as snapshot: + cmd = f"kubectl --context={k8s_ctx} apply -f {snapshot}" + if dry_run: + logging.info(cmd) + else: + safe_exec(cmd) # wair until snapshot is ready _wait_for_snapshot(k8s_ctx, pathlib.Path(snapshot), dry_run=dry_run) end = timer() logging.debug(f'PVC snapshot created and ready in {end - start:.2f} seconds') + snapshots = get_volume_snapshots(k8s_ctx, dry_run) + if snapshots: + logging.debug(f'GCP volume snapshot IDs {snapshots}') + cfg.appstate.resources.snapshots += snapshots + dest = os.path.join(cfg.cluster.results, ELB_METADATA_DIR, ELB_STATE_DISK_ID_FILE) + with open_for_write_immediate(dest) as f: + f.write(cfg.appstate.resources.to_json()) + elif not dry_run: + logging.error('Failed to get snapshot ID') + # delete the persistent disk logging.debug('Deleting writable persistent disk') cmd = f'kubectl --context={k8s_ctx} delete -f {pvc_yaml}' @@ -761,7 +794,8 @@ def initialize_persistent_disk(cfg: ElasticBlastConfig, query_files: List[str] = logging.debug('Creating ReadOnlyMany PVC from snapshot') cloned_pvc_yaml = os.path.join(d, 'pvc-rom.yaml') with open(cloned_pvc_yaml, 'wt') as f: - f.write(substitute_params(resource_string('elastic_blast', 'templates/pvc-rom.yaml.template').decode(), subs)) + ref = files('elastic_blast').joinpath('templates/pvc-rom.yaml.template') + f.write(substitute_params(ref.read_text(), subs)) cmd = f"kubectl --context={k8s_ctx} apply -f {cloned_pvc_yaml}" if dry_run: logging.info(cmd) @@ -808,7 +842,7 @@ def label_persistent_disk(cfg: ElasticBlastConfig, pv_claim: str) -> None: def check_server(k8s_ctx: str, dry_run: bool = False): """Check that server set after gcp.get_gke_credentials is alive""" - cmd = f'kubectl --context={k8s_ctx} version --short' + cmd = f'kubectl --context={k8s_ctx} version' if dry_run: logging.info(cmd) else: @@ -874,18 +908,18 @@ def enable_service_account(cfg: ElasticBlastConfig): dry_run = cfg.cluster.dry_run logging.debug(f"Enabling service account") with TemporaryDirectory() as d: - set_extraction_path(d) - rbac_yaml = resource_filename('elastic_blast', 'templates/elb-janitor-rbac.yaml') - cmd = f"kubectl --context={cfg.appstate.k8s_ctx} apply -f {rbac_yaml}" - if dry_run: - logging.info(cmd) - else: - try: - safe_exec(cmd) - except: - msg = 'ElasticBLAST is missing permissions for its auto-shutdown and cloud job submission feature. To provide these permissions, please run ' - msg += f'gcloud projects add-iam-policy-binding {cfg.gcp.project} --member={cfg.gcp.user} --role=roles/container.admin' - raise UserReportError(returncode=PERMISSIONS_ERROR, message=msg) + ref = files('elastic_blast') / 'templates/elb-janitor-rbac.yaml' + with as_file(ref) as rbac_yaml: + cmd = f"kubectl --context={cfg.appstate.k8s_ctx} apply -f {rbac_yaml}" + if dry_run: + logging.info(cmd) + else: + try: + safe_exec(cmd) + except: + msg = 'ElasticBLAST is missing permissions for its auto-shutdown and cloud job submission feature. To provide these permissions, please run ' + msg += f'gcloud projects add-iam-policy-binding {cfg.gcp.project} --member={cfg.gcp.user} --role=roles/container.admin' + raise UserReportError(returncode=PERMISSIONS_ERROR, message=msg) def submit_janitor_cronjob(cfg: ElasticBlastConfig): @@ -908,10 +942,10 @@ def submit_janitor_cronjob(cfg: ElasticBlastConfig): } logging.debug(f"Submitting ElasticBLAST janitor cronjob: {ELB_JANITOR_DOCKER_IMAGE_GCP}") with TemporaryDirectory() as d: - set_extraction_path(d) cronjob_yaml = os.path.join(d, 'elb-cronjob.yaml') with open(cronjob_yaml, 'wt') as f: - f.write(substitute_params(resource_string('elastic_blast', 'templates/elb-janitor-cronjob.yaml.template').decode(), subs)) + ref = files('elastic_blast').joinpath('templates/elb-janitor-cronjob.yaml.template') + f.write(substitute_params(ref.read_text(), subs)) cmd = f"kubectl --context={cfg.appstate.k8s_ctx} apply -f {cronjob_yaml}" if dry_run: logging.info(cmd) @@ -938,10 +972,10 @@ def submit_job_submission_job(cfg: ElasticBlastConfig): } logging.debug(f"Submitting job submission job: {ELB_CJS_DOCKER_IMAGE_GCP}") with TemporaryDirectory() as d: - set_extraction_path(d) job_yaml = os.path.join(d, 'job-submit-jobs.yaml') with open(job_yaml, 'wt') as f: - f.write(substitute_params(resource_string('elastic_blast', 'templates/job-submit-jobs.yaml.template').decode(), subs)) + ref = files('elastic_blast').joinpath('templates/job-submit-jobs.yaml.template') + f.write(substitute_params(ref.read_text(), subs)) cmd = f"kubectl --context={cfg.appstate.k8s_ctx} apply -f {job_yaml}" if dry_run: logging.info(cmd) diff --git a/src/elastic_blast/tuner.py b/src/elastic_blast/tuner.py index 6a8601b..9d3bb3f 100644 --- a/src/elastic_blast/tuner.py +++ b/src/elastic_blast/tuner.py @@ -344,7 +344,8 @@ def aws_get_machine_type(memory: MemoryStr, num_cpus: PositiveInteger, region: s # get properties of suitable instances suitable_props = get_suitable_instance_types(min_memory=memory, min_cpus=num_cpus, - instance_types=supported_offerings) + instance_types=supported_offerings, + region=region) if not suitable_props: raise UserReportError(returncode = UNKNOWN_ERROR, message = f'An AWS machine type with memory {memory.asGiB()}GB and {num_cpus} CPUs could not be found') diff --git a/src/elastic_blast/util.py b/src/elastic_blast/util.py index 20ba111..5fef96e 100644 --- a/src/elastic_blast/util.py +++ b/src/elastic_blast/util.py @@ -33,14 +33,16 @@ import datetime import json import inspect +from itertools import zip_longest from functools import reduce -from pkg_resources import resource_exists +from importlib_resources import files from typing import List, Union, Callable, Optional, Dict from .constants import MolType, GCS_DFLT_BUCKET from .constants import DEPENDENCY_ERROR, AWS_MAX_TAG_LENGTH, GCP_MAX_LABEL_LENGTH from .constants import AWS_MAX_JOBNAME_LENGTH, CSP, ELB_GCS_PREFIX from .constants import ELB_DFLT_LOGLEVEL, ELB_DFLT_LOGFILE from .constants import INPUT_ERROR +from .constants import ELB_S3_PREFIX from .base import DBSource RESOURCES = [ @@ -61,13 +63,14 @@ # Not used by elastic-blast tool: # storage-gcp.yaml # cloudformation-admin-iam.yaml -# Used directly (without pkg_resources) in aws.py +# Used directly (without importlib_resources) in aws.py # elastic-blast-cf.yaml # Used from bucket resource # elastic-blast-janitor-cf.yaml def validate_installation(): for r in RESOURCES: - if not resource_exists('elastic_blast', os.path.join('templates', r)): + ref = files('elastic_blast') / 'templates' / r + if not ref: raise UserReportError(DEPENDENCY_ERROR, f'Resource {r} is missing from the package. Please re-install ElasticBLAST') @@ -282,27 +285,23 @@ def get_blastdb_info(blastdb: str, gcp_prj: Optional[str] = None): return db, db_path, sanitize_for_k8s(db) -def get_blastdb_size(db: str, db_source: DBSource, gcp_prj: Optional[str] = None) -> float: - """Request blast database size from GCP using gcp module +def check_user_provided_blastdb_exists(db: str, mol_type: MolType, db_source: DBSource, gcp_prj: Optional[str] = None) -> None: + """Request blast database size from cloud service provider object storage If applied to custom db, just check the presence Returns the size in GB, if not found raises ValueError exception cfg: application configuration object """ - if db.startswith(ELB_GCS_PREFIX): - # Custom database, just check the presence - try: + try: + if db.startswith(ELB_GCS_PREFIX): + # Custom database, just check the presence prj = f'-u {gcp_prj}' if gcp_prj else '' - safe_exec(f'gsutil {prj} ls {db}.*') - except SafeExecError: - raise ValueError(f'BLAST database {db} was not found') - # TODO: find a way to check custom DB size w/o transferring it to user machine - return 1000000 - if db_source == DBSource.GCP: - return gcp_get_blastdb_size(db, gcp_prj) - elif db_source == DBSource.AWS: - return 1000000 # FIXME - raise NotImplementedError("Not implemented for sources other than GCP") + safe_exec(f'gsutil {prj} stat {db}.*') + elif db.startswith(ELB_S3_PREFIX): + cmd = f'aws s3 ls {db}' + safe_exec(cmd) + except SafeExecError: + raise ValueError(f'BLAST database {db} was not found') def gcp_get_blastdb_latest_path(gcp_prj: Optional[str]) -> str: @@ -313,22 +312,6 @@ def gcp_get_blastdb_latest_path(gcp_prj: Optional[str]) -> str: return os.path.join(GCS_DFLT_BUCKET, proc.stdout.decode().rstrip()) -def gcp_get_blastdb_size(db: str, gcp_prj: Optional[str]) -> float: - """Request blast database size from GCP using gsutil - Returns the size in GB, if not found raises ValueError exception - - db: database name - """ - latest_path = gcp_get_blastdb_latest_path(gcp_prj) - prj = f'-u {gcp_prj}' if gcp_prj else '' - cmd = f'gsutil {prj} cat {latest_path}/blastdb-manifest.json' - proc = safe_exec(cmd) - blastdb_metadata = json.loads(proc.stdout.decode()) - if not db in blastdb_metadata: - raise ValueError(f'BLAST database {db} was not found') - return blastdb_metadata[db]['size'] - - def check_positive_int(val: str) -> int: """Function to check the passed value is a positive integer""" try: @@ -600,6 +583,32 @@ def get_gcp_project() -> Optional[str]: return result +def is_newer_version(version1: str, version2: str) -> bool: + """ + Compare two version strings to determine if version1 is newer than version2. + + Args: + version1 (str): The first version string. + version2 (str): The second version string. + + Returns: + bool: True if version1 is newer than version2, False otherwise. + """ + + # Split version strings into lists of integers + v1_parts = [int(part) for part in version1.split('.')] + v2_parts = [int(part) for part in version2.split('.')] + + # Compare each part of the version numbers + for v1, v2 in zip_longest(v1_parts, v2_parts, fillvalue=0): + if v1 > v2: + return True + elif v1 < v2: + return False + + return False + + class MetaFileName(type): """ Auxiliary class to get the source file name """ def __repr__(self): diff --git a/tests/app/data/bad_bucket_conf.ini b/tests/app/data/bad_bucket_conf.ini index 92014af..a1f33e6 100644 --- a/tests/app/data/bad_bucket_conf.ini +++ b/tests/app/data/bad_bucket_conf.ini @@ -1,7 +1,7 @@ # ElasticBLAST configuration file template to run blastn against nt [cloud-provider] -gcp-project = ncbi-sandbox-blast +gcp-project = a-gcp-project gcp-region = us-east4 gcp-zone = us-east4-b diff --git a/tests/app/data/blastdb-notfound.ini b/tests/app/data/blastdb-notfound.ini index 193a157..5a1229b 100644 --- a/tests/app/data/blastdb-notfound.ini +++ b/tests/app/data/blastdb-notfound.ini @@ -1,5 +1,5 @@ [cloud-provider] -gcp-project = ncbi-sandbox-blast +gcp-project = a-gcp-project gcp-region = us-east4 gcp-zone = us-east4-b diff --git a/tests/app/data/cleanup-error.ini b/tests/app/data/cleanup-error.ini index b792a62..5ecddd7 100644 --- a/tests/app/data/cleanup-error.ini +++ b/tests/app/data/cleanup-error.ini @@ -1,7 +1,7 @@ # ElasticBLAST configuration file template to run blastn against nt [cloud-provider] -gcp-project = ncbi-sandbox-blast +gcp-project = a-gcp-project gcp-region = us-east4 gcp-zone = us-east4-b diff --git a/tests/app/data/cluster-error.ini b/tests/app/data/cluster-error.ini index 5b18918..206ac7a 100644 --- a/tests/app/data/cluster-error.ini +++ b/tests/app/data/cluster-error.ini @@ -1,7 +1,7 @@ # ElasticBLAST configuration file template to run blastn against nt [cloud-provider] -gcp-project = ncbi-sandbox-blast +gcp-project = a-gcp-project gcp-region = us-east4 gcp-zone = us-east4-b diff --git a/tests/app/data/good_conf.ini b/tests/app/data/good_conf.ini index 5a87d59..763ce29 100644 --- a/tests/app/data/good_conf.ini +++ b/tests/app/data/good_conf.ini @@ -1,7 +1,7 @@ # ElasticBLAST configuration file template to run blastn against nt [cloud-provider] -gcp-project = ncbi-sandbox-blast +gcp-project = a-gcp-project gcp-region = us-east4 gcp-zone = us-east4-b diff --git a/tests/app/data/invalid-cpu-req-gcp.ini b/tests/app/data/invalid-cpu-req-gcp.ini index 788e91d..4d6843c 100644 --- a/tests/app/data/invalid-cpu-req-gcp.ini +++ b/tests/app/data/invalid-cpu-req-gcp.ini @@ -1,7 +1,7 @@ # ElasticBLAST configuration file template to run blastn against nt [cloud-provider] -gcp-project = ncbi-sandbox-blast +gcp-project = a-gcp-project gcp-region = us-east4 gcp-zone = us-east4-b diff --git a/tests/app/data/invalid-machine-type-gcp.ini b/tests/app/data/invalid-machine-type-gcp.ini index 1c67b66..2daca73 100644 --- a/tests/app/data/invalid-machine-type-gcp.ini +++ b/tests/app/data/invalid-machine-type-gcp.ini @@ -1,7 +1,7 @@ # ElasticBLAST configuration file template to run blastn against nt [cloud-provider] -gcp-project = ncbi-sandbox-blast +gcp-project = a-gcp-project gcp-region = us-east4 gcp-zone = us-east4-b diff --git a/tests/app/data/invalid-mem-req.ini b/tests/app/data/invalid-mem-req.ini index e60415e..65c65e8 100644 --- a/tests/app/data/invalid-mem-req.ini +++ b/tests/app/data/invalid-mem-req.ini @@ -1,7 +1,7 @@ # ElasticBLAST configuration file template to run blastn against nt [cloud-provider] -gcp-project = ncbi-sandbox-blast +gcp-project = a-gcp-project gcp-region = us-east4 gcp-zone = us-east4-b diff --git a/tests/app/data/too-many-k8s-jobs.ini b/tests/app/data/too-many-k8s-jobs.ini index 3e526a6..7ad8e38 100644 --- a/tests/app/data/too-many-k8s-jobs.ini +++ b/tests/app/data/too-many-k8s-jobs.ini @@ -2,7 +2,7 @@ # will lead to too many k8s jobs being created (i.e.: should fail) [cloud-provider] -gcp-project = ncbi-sandbox-blast +gcp-project = a-gcp-project gcp-region = us-east4 gcp-zone = us-east4-b diff --git a/tests/app/gcloud b/tests/app/gcloud new file mode 100755 index 0000000..9f473ae --- /dev/null +++ b/tests/app/gcloud @@ -0,0 +1,6 @@ +#!/bin/bash + +# This is a fake gcloud that does nothing and always succeeds. +# It is needed for test_dependency_error unit test. + +exit 0 diff --git a/tests/app/test_elasticblast.py b/tests/app/test_elasticblast.py index 6f9069e..496d2fa 100644 --- a/tests/app/test_elasticblast.py +++ b/tests/app/test_elasticblast.py @@ -124,7 +124,7 @@ def app_mocks(caplog, aws_credentials, gke_mock, mocker): mocker.patch('elastic_blast.elasticblast_factory.ElasticBlastAws', new=MagicMock(return_value=MagicMock())) mocker.patch(target='elastic_blast.tuner.aws_get_machine_properties', new=MagicMock(return_value=InstanceProperties(32, 120))) mocker.patch('elastic_blast.commands.submit.harvest_query_splitting_results', new=MagicMock(return_value=QuerySplittingResults(query_length=5, query_batches=['batch_0.fa']))) - mocker.patch('elastic_blast.commands.submit.get_blastdb_size', new=MagicMock(return_value=1.0)) + mocker.patch('elastic_blast.commands.submit.check_user_provided_blastdb_exists', new=MagicMock(return_value=1.0)) mocker.patch('elastic_blast.gcp.get_blastdb_info', new=MagicMock(return_value=('gs://test-bucket/testdb', 'gs://test-bucket/testdb.tar.gz', 'testdb'))) mocker.patch('elastic_blast.commands.submit.get_length', new=MagicMock(return_value=1)) @@ -595,10 +595,8 @@ def test_dependency_error(): print(msg) assert 'Traceback' not in msg assert "Required pre-requisite 'gcloud' doesn't work" in msg - # Eliminate gcloud, check kubectl missing - p = safe_exec('which gcloud') - exepath = p.stdout.decode() - newpath += ':' + os.path.dirname(exepath) + # Use fake gcloud, check kubectl missing + newpath += ':./tests/app' p = subprocess.run([ELB_EXENAME, 'submit', '--cfg', INI_VALID, '--dry-run'], env={'PATH': newpath}, stderr=subprocess.PIPE) assert p.returncode == constants.DEPENDENCY_ERROR msg = p.stderr.decode() diff --git a/tests/blastdb/data/blastdb-manifest-ncbi-1.1.json b/tests/blastdb/data/blastdb-manifest-ncbi-1.1.json deleted file mode 100644 index 857a43b..0000000 --- a/tests/blastdb/data/blastdb-manifest-ncbi-1.1.json +++ /dev/null @@ -1,78 +0,0 @@ -[ - { - "version": "1.1", - "dbname": "nr", - "dbtype": "Protein", - "description": "All non-redundant GenBank CDS translations+PDB+SwissProt+PIR+PRF excluding environmental samples from WGS projects", - "number-of-letters": "142849168103", - "number-of-sequences": "393558513", - "files": [ - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.00.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.01.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.02.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.03.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.04.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.05.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.06.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.07.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.08.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.09.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.10.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.11.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.12.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.13.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.14.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.15.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.16.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.17.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.18.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.19.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.20.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.21.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.22.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.23.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.24.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.25.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.26.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.27.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.28.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.29.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.30.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.31.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.32.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.33.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.34.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.35.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.36.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.37.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.38.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.39.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.40.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.41.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.42.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.43.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.44.tar.gz" - ], - "last-updated": "2021-05-03T10:02:19.264333-04:00", - "bytes-total": "254653277435", - "bytes-to-cache": "146391203506", - "bytes-total-compressed": "133956574643", - "number-of-volumes": "45" - }, - { - "version": "1.1", - "dbname": "swissprot", - "dbtype": "Protein", - "description": "Non-redundant UniProtKB/SwissProt sequences", - "number-of-letters": "180536657", - "number-of-sequences": "476618", - "files": [ - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/swissprot.tar.gz" - ], - "last-updated": "2021-05-03T10:02:19.264333-04:00", - "bytes-total": "353180979", - "bytes-to-cache": "184826348", - "bytes-total-compressed": "186046116", - "number-of-volumes": "1" - } -] diff --git a/tests/blastdb/data/nr-aws.json b/tests/blastdb/data/nr-aws.json deleted file mode 100644 index 0bf4069..0000000 --- a/tests/blastdb/data/nr-aws.json +++ /dev/null @@ -1,382 +0,0 @@ -{ - "version": "1.1", - "dbname": "nr", - "dbtype": "Protein", - "description": "All non-redundant GenBank CDS translations+PDB+SwissProt+PIR+PRF excluding environmental samples from WGS projects", - "number-of-letters": "142849168103", - "number-of-sequences": "393558513", - "files": [ - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.40.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.26.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.32.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.36.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.08.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.41.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.ptf", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.05.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.21.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.02.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.00.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.03.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.43.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.28.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.29.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.33.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.43.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.24.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.07.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.25.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.39.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.37.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.24.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.13.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.36.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.12.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.pdb", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.25.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.02.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.03.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.27.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.22.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.20.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.43.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.41.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.05.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.42.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.43.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.02.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.05.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.42.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.18.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.14.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.31.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.37.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.07.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.32.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.15.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.01.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.20.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.13.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.15.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.37.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.22.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.05.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.26.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.14.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.08.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.19.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.00.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.17.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.06.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.33.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.19.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.43.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.44.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.40.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.39.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.03.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.31.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.32.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.32.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.27.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.33.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.40.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.16.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.02.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.42.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.33.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.01.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.20.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.17.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.13.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.19.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.35.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.07.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.10.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.24.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.04.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.38.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.15.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.39.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.23.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.18.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.01.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.42.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.02.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.30.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.01.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.17.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.44.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.16.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.09.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.06.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.18.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.44.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.19.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.33.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.06.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.04.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.18.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.29.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.37.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.36.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.00.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.27.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.26.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.06.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.34.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.07.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.01.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.03.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.34.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.37.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.42.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.27.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.29.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.17.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.07.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.14.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.06.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.41.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.24.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.18.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.09.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.08.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.22.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.23.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.17.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.37.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.13.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.30.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.42.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.11.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.pal", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.00.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.12.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.01.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.22.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.17.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.pos", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.35.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.08.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.02.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.08.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.26.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.42.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.32.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.14.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.40.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.30.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.00.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.11.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.37.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.09.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.05.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.44.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.07.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.19.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.40.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.24.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.15.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.01.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.14.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.19.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.21.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.15.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.02.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.16.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.00.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.35.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.39.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.41.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.30.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.03.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.23.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.30.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.13.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.pot", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.36.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.15.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.35.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.00.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.28.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.12.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.43.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.25.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.40.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.32.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.44.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.10.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.04.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.29.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.04.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.40.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.35.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.08.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.34.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.11.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.38.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.32.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.13.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.22.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.09.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.27.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.34.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.18.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.31.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.41.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.16.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.14.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.11.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.21.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.44.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.35.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.28.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.25.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.38.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.28.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.25.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.04.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.12.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.31.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.07.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.27.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.24.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.22.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.43.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.21.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.39.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.44.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.29.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.36.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.33.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.34.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.29.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.32.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.24.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.29.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.44.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.31.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.22.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.23.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.31.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.12.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.00.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.10.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.07.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.08.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.19.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.15.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.20.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.25.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.pto", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.06.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.36.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.38.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.04.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.23.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.22.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.35.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.09.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.36.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.03.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.16.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.01.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.12.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.38.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.28.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.10.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.11.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.12.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.25.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.06.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.05.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.26.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.38.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.16.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.04.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.34.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.28.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.11.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.10.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.09.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.39.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.30.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.31.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.14.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.31.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.41.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.38.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.43.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.11.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.34.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.10.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.03.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.09.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.21.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.23.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.21.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.14.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.23.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.42.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.16.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.06.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.10.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.28.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.27.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.20.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.20.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.26.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.10.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.24.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.02.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.05.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.03.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.33.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.04.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.37.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.41.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.40.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.30.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.17.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.39.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.39.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.41.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.30.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.09.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.33.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.12.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.28.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.08.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.11.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.21.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.05.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.21.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.25.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.29.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.19.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.34.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.20.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.27.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.20.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.17.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.36.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.13.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.15.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.35.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.13.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.23.phi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.26.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.38.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.18.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.26.phd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.16.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/nr.18.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/taxdb.btd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/taxdb.bti" - ], - "last-updated": "2021-05-03T10:02:19.264333-04:00", - "bytes-total": "254653277435", - "bytes-to-cache": "146391203506", - "number-of-volumes": "45" -} diff --git a/tests/blastdb/data/nr-gcp.json b/tests/blastdb/data/nr-gcp.json deleted file mode 100644 index 0fe9898..0000000 --- a/tests/blastdb/data/nr-gcp.json +++ /dev/null @@ -1,382 +0,0 @@ -{ - "version": "1.1", - "dbname": "nr", - "dbtype": "Protein", - "description": "All non-redundant GenBank CDS translations+PDB+SwissProt+PIR+PRF excluding environmental samples from WGS projects", - "number-of-letters": "142849168103", - "number-of-sequences": "393558513", - "files": [ - "gs://blast-db/2021-05-06-01-05-01/nr.40.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.26.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.32.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.36.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.08.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.41.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.ptf", - "gs://blast-db/2021-05-06-01-05-01/nr.05.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.21.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.02.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.00.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.03.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.43.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.28.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.29.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.33.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.43.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.24.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.07.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.25.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.39.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.37.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.24.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.13.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.36.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.12.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.pdb", - "gs://blast-db/2021-05-06-01-05-01/nr.25.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.02.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.03.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.27.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.22.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.20.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.43.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.41.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.05.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.42.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.43.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.02.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.05.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.42.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.18.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.14.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.31.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.37.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.07.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.32.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.15.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.01.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.20.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.13.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.15.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.37.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.22.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.05.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.26.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.14.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.08.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.19.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.00.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.17.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.06.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.33.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.19.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.43.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.44.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.40.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.39.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.03.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.31.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.32.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.32.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.27.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.33.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.40.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.16.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.02.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.42.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.33.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.01.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.20.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.17.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.13.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.19.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.35.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.07.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.10.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.24.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.04.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.38.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.15.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.39.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.23.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.18.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.01.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.42.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.02.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.30.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.01.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.17.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.44.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.16.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.09.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.06.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.18.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.44.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.19.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.33.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.06.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.04.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.18.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.29.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.37.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.36.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.00.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.27.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.26.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.06.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.34.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.07.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.01.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.03.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.34.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.37.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.42.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.27.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.29.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.17.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.07.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.14.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.06.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.41.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.24.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.18.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.09.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.08.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.22.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.23.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.17.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.37.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.13.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.30.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.42.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.11.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.pal", - "gs://blast-db/2021-05-06-01-05-01/nr.00.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.12.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.01.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.22.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.17.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.pos", - "gs://blast-db/2021-05-06-01-05-01/nr.35.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.08.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.02.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.08.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.26.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.42.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.32.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.14.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.40.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.30.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.00.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.11.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.37.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.09.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.05.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.44.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.07.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.19.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.40.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.24.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.15.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.01.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.14.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.19.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.21.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.15.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.02.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.16.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.00.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.35.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.39.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.41.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.30.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.03.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.23.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.30.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.13.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.pot", - "gs://blast-db/2021-05-06-01-05-01/nr.36.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.15.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.35.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.00.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.28.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.12.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.43.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.25.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.40.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.32.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.44.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.10.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.04.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.29.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.04.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.40.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.35.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.08.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.34.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.11.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.38.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.32.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.13.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.22.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.09.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.27.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.34.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.18.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.31.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.41.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.16.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.14.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.11.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.21.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.44.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.35.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.28.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.25.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.38.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.28.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.25.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.04.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.12.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.31.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.07.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.27.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.24.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.22.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.43.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.21.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.39.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.44.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.29.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.36.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.33.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.34.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.29.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.32.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.24.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.29.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.44.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.31.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.22.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.23.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.31.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.12.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.00.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.10.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.07.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.08.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.19.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.15.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.20.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.25.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.pto", - "gs://blast-db/2021-05-06-01-05-01/nr.06.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.36.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.38.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.04.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.23.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.22.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.35.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.09.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.36.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.03.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.16.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.01.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.12.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.38.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.28.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.10.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.11.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.12.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.25.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.06.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.05.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.26.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.38.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.16.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.04.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.34.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.28.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.11.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.10.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.09.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.39.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.30.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.31.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.14.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.31.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.41.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.38.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.43.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.11.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.34.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.10.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.03.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.09.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.21.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.23.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.21.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.14.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.23.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.42.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.16.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.06.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.10.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.28.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.27.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.20.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.20.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.26.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.10.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.24.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.02.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.05.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.03.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.33.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.04.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.37.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.41.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.40.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.30.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.17.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.39.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.39.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.41.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.30.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.09.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.33.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.12.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.28.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.08.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.11.psq", - "gs://blast-db/2021-05-06-01-05-01/nr.21.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.05.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.21.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.25.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.29.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.19.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.34.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.20.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.27.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.20.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.17.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.36.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.13.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.15.pin", - "gs://blast-db/2021-05-06-01-05-01/nr.35.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.13.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.23.phi", - "gs://blast-db/2021-05-06-01-05-01/nr.26.ppi", - "gs://blast-db/2021-05-06-01-05-01/nr.38.phr", - "gs://blast-db/2021-05-06-01-05-01/nr.18.pog", - "gs://blast-db/2021-05-06-01-05-01/nr.26.phd", - "gs://blast-db/2021-05-06-01-05-01/nr.16.ppd", - "gs://blast-db/2021-05-06-01-05-01/nr.18.ppi", - "gs://blast-db/2021-05-06-01-05-01/taxdb.btd", - "gs://blast-db/2021-05-06-01-05-01/taxdb.bti" - ], - "last-updated": "2021-05-03T10:02:19.264333-04:00", - "bytes-total": "254653277435", - "bytes-to-cache": "146391203506", - "number-of-volumes": "45" -} diff --git a/tests/blastdb/data/nr-ncbi.json b/tests/blastdb/data/nr-ncbi.json deleted file mode 100644 index 2016a8f..0000000 --- a/tests/blastdb/data/nr-ncbi.json +++ /dev/null @@ -1,60 +0,0 @@ -{ - "version": "1.1", - "dbname": "nr", - "dbtype": "Protein", - "description": "All non-redundant GenBank CDS translations+PDB+SwissProt+PIR+PRF excluding environmental samples from WGS projects", - "number-of-letters": "142849168103", - "number-of-sequences": "393558513", - "files": [ - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.00.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.01.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.02.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.03.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.04.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.05.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.06.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.07.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.08.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.09.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.10.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.11.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.12.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.13.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.14.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.15.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.16.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.17.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.18.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.19.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.20.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.21.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.22.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.23.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.24.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.25.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.26.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.27.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.28.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.29.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.30.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.31.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.32.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.33.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.34.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.35.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.36.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.37.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.38.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.39.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.40.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.41.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.42.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.43.tar.gz", - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nr.44.tar.gz" - ], - "last-updated": "2021-05-03T10:02:19.264333-04:00", - "bytes-total": "254653277435", - "bytes-to-cache": "146391203506", - "bytes-total-compressed": "133956574643", - "number-of-volumes": "45" -} diff --git a/tests/blastdb/data/swissprot-aws.json b/tests/blastdb/data/swissprot-aws.json deleted file mode 100644 index 99c40c8..0000000 --- a/tests/blastdb/data/swissprot-aws.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "version": "1.1", - "dbname": "swissprot", - "dbtype": "Protein", - "description": "Non-redundant UniProtKB/SwissProt sequences", - "number-of-letters": "180536657", - "number-of-sequences": "476618", - "files": [ - "s3://ncbi-blast-databases/2021-05-06-01-05-01/swissprot.ppi", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/swissprot.pos", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/swissprot.pog", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/swissprot.phr", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/swissprot.ppd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/swissprot.psq", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/swissprot.pto", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/swissprot.pin", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/swissprot.pot", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/swissprot.ptf", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/swissprot.pdb", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/taxdb.btd", - "s3://ncbi-blast-databases/2021-05-06-01-05-01/taxdb.bti" - ], - "last-updated": "2021-05-03T10:02:19.264333-04:00", - "bytes-total": "353180979", - "bytes-to-cache": "184826348", - "number-of-volumes": "1" -} diff --git a/tests/blastdb/data/swissprot-gcp.json b/tests/blastdb/data/swissprot-gcp.json deleted file mode 100644 index b35413e..0000000 --- a/tests/blastdb/data/swissprot-gcp.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "version": "1.1", - "dbname": "swissprot", - "dbtype": "Protein", - "description": "Non-redundant UniProtKB/SwissProt sequences", - "number-of-letters": "180536657", - "number-of-sequences": "476618", - "files": [ - "gs://blast-db/2021-05-06-01-05-01/swissprot.ppi", - "gs://blast-db/2021-05-06-01-05-01/swissprot.pos", - "gs://blast-db/2021-05-06-01-05-01/swissprot.pog", - "gs://blast-db/2021-05-06-01-05-01/swissprot.phr", - "gs://blast-db/2021-05-06-01-05-01/swissprot.ppd", - "gs://blast-db/2021-05-06-01-05-01/swissprot.psq", - "gs://blast-db/2021-05-06-01-05-01/swissprot.pto", - "gs://blast-db/2021-05-06-01-05-01/swissprot.pin", - "gs://blast-db/2021-05-06-01-05-01/swissprot.pot", - "gs://blast-db/2021-05-06-01-05-01/swissprot.ptf", - "gs://blast-db/2021-05-06-01-05-01/swissprot.pdb", - "gs://blast-db/2021-05-06-01-05-01/taxdb.btd", - "gs://blast-db/2021-05-06-01-05-01/taxdb.bti" - ], - "last-updated": "2021-05-03T10:02:19.264333-04:00", - "bytes-total": "353180979", - "bytes-to-cache": "184826348", - "number-of-volumes": "1" -} diff --git a/tests/blastdb/data/swissprot-ncbi.json b/tests/blastdb/data/swissprot-ncbi.json deleted file mode 100644 index e831a42..0000000 --- a/tests/blastdb/data/swissprot-ncbi.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "version": "1.1", - "dbname": "swissprot", - "dbtype": "Protein", - "description": "Non-redundant UniProtKB/SwissProt sequences", - "number-of-letters": "180536657", - "number-of-sequences": "476618", - "files": [ - "ftp://ftp.ncbi.nlm.nih.gov/blast/db/swissprot.tar.gz" - ], - "last-updated": "2021-05-03T10:02:19.264333-04:00", - "bytes-total": "353180979", - "bytes-to-cache": "184826348", - "bytes-total-compressed": "186046116", - "number-of-volumes": "1" -} diff --git a/tests/cost/__init__.py b/tests/cost/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/cost/data/aws-run-summary.json b/tests/cost/data/aws-run-summary.json new file mode 100644 index 0000000..a2bb459 --- /dev/null +++ b/tests/cost/data/aws-run-summary.json @@ -0,0 +1,30 @@ +{ + "version": "1.0", + "clusterInfo": { + "provider": "AWS", + "numMachines": 10, + "numVCPUsPerMachine": 32, + "RamPerMachine": 120, + "machineType": "m5.8xlarge", + "region": "us-east-1", + "zone": "us-east-1b", + "storageType": "persistentDisk" + }, + "runtime": { + "wallClock": 54000, + "blastdbSetup": { + "startTime": 2147483647, + "endTime": 2147489647 + }, + "blast": { + "startTime": 2147489747, + "endTime": 2147491747 + }, + "blastData": { + "queryLength": 53353969, + "databaseLength": 399515 + }, + "lettersPerSecondPerCpu": 0, + "exitCode": 0 + } +} diff --git a/tests/cost/pytest.ini b/tests/cost/pytest.ini new file mode 100644 index 0000000..3697176 --- /dev/null +++ b/tests/cost/pytest.ini @@ -0,0 +1,4 @@ +# This file is here to provide selective pytest in presence of tox.ini at the root +# It allows run only this test suite as: +# pytest tests/fasta_split +# See https://docs.pytest.org/en/latest/customize.html for description how test root is determined \ No newline at end of file diff --git a/tests/filehelper/test_filesystem_checks.py b/tests/filehelper/test_filesystem_checks.py index f0b4271..272a90e 100644 --- a/tests/filehelper/test_filesystem_checks.py +++ b/tests/filehelper/test_filesystem_checks.py @@ -29,35 +29,34 @@ from elastic_blast import filehelper from tempfile import TemporaryDirectory import pytest +from tests.utils import gke_mock, NOT_WRITABLE_BUCKET TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), 'data') -WRITEABLE_BUCKET = 'gs://blast-test' +WRITEABLE_BUCKET = 'gs://test-bucket' -GCP_PRJ = "ncbi-sandbox-blast" - -def test_check_for_read_success(): - filehelper.check_for_read('gs://blast-db/latest-dir', gcp_prj=GCP_PRJ) - #filehelper.check_for_read('s3://ncbi-blast-databases/latest-dir') +def test_check_for_read_success(gke_mock): + filehelper.check_for_read('gs://blast-db/latest-dir') + filehelper.check_for_read('s3://ncbi-blast-databases/latest-dir') filehelper.check_for_read(os.path.join(TEST_DATA_DIR, 'test.tar')) -def test_check_for_read_failure(): +def test_check_for_read_failure(gke_mock): with pytest.raises(FileNotFoundError): - filehelper.check_for_read('gs://blast-db/non-existent-file', gcp_prj=GCP_PRJ) + filehelper.check_for_read('gs://blast-db/non-existent-file') with pytest.raises(FileNotFoundError): filehelper.check_for_read(os.path.join(TEST_DATA_DIR, 'non-existent-file')) with pytest.raises(FileNotFoundError): filehelper.check_for_read('https://storage.googleapis.com/blast-db/invalid-file') -def test_check_for_write_success(): +def test_check_for_write_success(gke_mock): filehelper.check_dir_for_write(WRITEABLE_BUCKET) with TemporaryDirectory() as d: filehelper.check_dir_for_write(d) -def test_check_for_write_failure(): +def test_check_for_write_failure(gke_mock): with pytest.raises(PermissionError): - filehelper.check_dir_for_write('gs://arbitrary-non-existent-bucket-test') + filehelper.check_dir_for_write(f'gs://{NOT_WRITABLE_BUCKET}') with pytest.raises(PermissionError): filehelper.check_dir_for_write('/home/') diff --git a/tests/util/test_util.py b/tests/util/test_util.py index 2a31cef..b9bff60 100644 --- a/tests/util/test_util.py +++ b/tests/util/test_util.py @@ -27,15 +27,15 @@ import os import unittest from unittest.mock import patch, MagicMock -import re from elastic_blast import util from elastic_blast.constants import ELB_DFLT_GCP_MACHINE_TYPE from elastic_blast.constants import ElbCommand, MolType from elastic_blast.util import get_query_batch_size -from elastic_blast.util import get_blastdb_size, sanitize_aws_batch_job_name +from elastic_blast.util import check_user_provided_blastdb_exists, sanitize_aws_batch_job_name from elastic_blast.util import safe_exec, SafeExecError from elastic_blast.util import sanitize_for_k8s +from elastic_blast.util import is_newer_version from elastic_blast.util import validate_gcp_string, convert_labels_to_aws_tags from elastic_blast.util import validate_gcp_disk_name, gcp_get_regions from elastic_blast.util import ElbSupportedPrograms, UserReportError @@ -44,7 +44,26 @@ from elastic_blast.base import InstanceProperties from elastic_blast.db_metadata import DbMetadata import pytest -from tests.utils import MockedCompletedProcess, gke_mock, GCP_REGIONS, gcp_env_vars +from tests.utils import MockedCompletedProcess, gke_mock, GCP_REGIONS + + +def test_is_newer_version(): + assert is_newer_version("1.5", "1.0") + assert is_newer_version("1.0.1", "1.0") + assert is_newer_version("0.3", "1.0") is False + assert is_newer_version("1.5", "1.2.3") is True + assert is_newer_version("1.5", "1.7.3") is False + + assert is_newer_version("1.0", "1.0") is False + assert is_newer_version("1.0", "1.0.0") is False + assert is_newer_version("1.0.0", "1.0") is False + +def test_get_db_moltype_for_program(): + assert(ElbSupportedPrograms().get_db_mol_type('blastp') == MolType.PROTEIN) + assert(ElbSupportedPrograms().get_db_mol_type('blastx') == MolType.PROTEIN) + assert(ElbSupportedPrograms().get_db_mol_type('blastn') == MolType.NUCLEOTIDE) + assert(ElbSupportedPrograms().get_db_mol_type('tblastn') == MolType.NUCLEOTIDE) + assert(ElbSupportedPrograms().get_db_mol_type('tblastx') == MolType.NUCLEOTIDE) DB_METADATA = DbMetadata(version = '1', @@ -162,20 +181,15 @@ def test_sanitize_aws_batch_job_name(self): def test_sanitize_aws_user_name(self): self.assertEqual('user-name', sanitize_aws_batch_job_name('user.name')) -@patch(target='elastic_blast.elb_config.gcp_get_regions', new=MagicMock(return_value=GCP_REGIONS)) -def test_get_blastdb_size(gcp_env_vars): - cfg = create_config_for_db('nr') - gcp_prj = os.environ.get('CLOUDSDK_CORE_PROJECT', "ncbi-sandbox-blast") - dbsize = get_blastdb_size(cfg.blast.db, cfg.cluster.db_source, gcp_prj) - assert dbsize >= 227.4 +def test_check_user_provided_blastdb_exists(gke_mock): + cfg = create_config_for_db('gs://test-bucket/testdb') + check_user_provided_blastdb_exists(cfg.blast.db, MolType.PROTEIN, cfg.cluster.db_source) -@patch(target='elastic_blast.elb_config.gcp_get_regions', new=MagicMock(return_value=GCP_REGIONS)) -def test_get_blastdb_size_invalid_database(gcp_env_vars): - cfg = create_config_for_db('non_existent_blast_database') +def test_check_user_provided_blastdb_exists_invalid_database(gke_mock): + cfg = create_config_for_db('gs://test-bucket/non_existent_blast_database') with pytest.raises(ValueError): - gcp_prj = os.environ.get('CLOUDSDK_CORE_PROJECT', "ncbi-sandbox-blast") - get_blastdb_size(cfg.blast.db, cfg.cluster.db_source, gcp_prj) + check_user_provided_blastdb_exists(cfg.blast.db, MolType.PROTEIN, cfg.cluster.db_source) @patch(target='elastic_blast.elb_config.get_db_metadata', new=MagicMock(return_value=DB_METADATA)) @@ -349,24 +363,22 @@ def safe_exec_gsutil_ls(cmd): return orig_safe_exec(cmd) mocker.patch('elastic_blast.util.safe_exec', side_effect=safe_exec_gsutil_ls) - gcp_prj = os.environ.get('CLOUDSDK_CORE_PROJECT', "ncbi-sandbox-blast") - # tar.gz file, db_path should explicitely mention it - db, db_path, k8sdblabel = util.get_blastdb_info(DB, gcp_prj) + db, db_path, k8sdblabel = util.get_blastdb_info(DB) assert(db_path == DB+'.tar.gz') assert(k8sdblabel == DB_LABEL) print(db, db_path, k8sdblabel) # no tar.gz file, db_path should have .* response = DB_NAME+'tar.gz.md5' - db, db_path, k8sdblabel = util.get_blastdb_info(DB, gcp_prj) + db, db_path, k8sdblabel = util.get_blastdb_info(DB) assert(db_path == DB+'.*') assert(k8sdblabel == DB_LABEL) print(db, db_path, k8sdblabel) # tar.gz file, db_path should explicitely mention it response = DB_NAME+'tar.gz'+'\n'+DB_NAME+'.ndb' - db, db_path, k8sdblabel = util.get_blastdb_info(DB, gcp_prj) + db, db_path, k8sdblabel = util.get_blastdb_info(DB) assert(db_path == DB+'.tar.gz') assert(k8sdblabel == DB_LABEL) print(db, db_path, k8sdblabel) @@ -374,7 +386,7 @@ def safe_exec_gsutil_ls(cmd): # empty result, should throw an exception response = '' with pytest.raises(ValueError): - util.get_blastdb_info(DB, gcp_prj) + util.get_blastdb_info(DB) # error executing gsutil, should throw an exception def safe_exec_gsutil_ls_exception(cmd): @@ -382,4 +394,4 @@ def safe_exec_gsutil_ls_exception(cmd): raise SafeExecError(1, 'CommandException: One or more URLs matched no objects.') mocker.patch('elastic_blast.util.safe_exec', side_effect=safe_exec_gsutil_ls_exception) with pytest.raises(ValueError): - util.get_blastdb_info(DB, gcp_prj) + util.get_blastdb_info(DB) diff --git a/tests/utils.py b/tests/utils.py index 131d8fd..aaad3ab 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -162,10 +162,12 @@ def gke_mock(mocker): mock.cloud.conf['project'] = GCP_PROJECT + mock.cloud.storage['gs://test-bucket'] = 0 mock.cloud.storage['gs://test-bucket/test-query.fa'] = '>query\nACTGGAGATGAC' mock.cloud.storage['gs://test-results'] = '' mock.cloud.storage[f'gs://{NOT_WRITABLE_BUCKET}'] = '' mock.cloud.storage['s3://test-bucket/test-query.fa'] = '>query\nACTGGAGATGAC' + mock.cloud.storage['s3://test-bucket'] = 0 mock.cloud.storage['s3://test-results'] = '' mock.cloud.storage[f's3://{NOT_WRITABLE_BUCKET}'] = '' @@ -180,8 +182,10 @@ def gke_mock(mocker): # User database metadata mock.cloud.storage[f'gs://test-bucket/{DB_METADATA_PROT_FILE_NAME}'] = DB_METADATA_PROT mock.cloud.storage[f'gs://test-bucket/{DB_METADATA_NUCL_FILE_NAME}'] = DB_METADATA_NUCL + mock.cloud.storage['gs://test-bucket/testdb.pal'] = 'A fake user database' mock.cloud.storage[f's3://test-bucket/{DB_METADATA_PROT_FILE_NAME}'] = DB_METADATA_PROT mock.cloud.storage[f's3://test-bucket/{DB_METADATA_NUCL_FILE_NAME}'] = DB_METADATA_NUCL + mock.cloud.storage['s3://test-bucket/testdb.pal'] = 'A fake user database' # we need gcp.safe_exec instead of util.safe exec here, because # safe_exec is imported in gcp.py with 'from util import safe_exec' @@ -199,6 +203,7 @@ def gke_mock(mocker): mocker.patch('boto3.client', side_effect=mock.mocked_client) mocker.patch('botocore.exceptions.ClientError.__init__', new=MagicMock(return_value=None)) mocker.patch.dict(os.environ, {'ELB_PAUSE_AFTER_INIT_PV': '1'}) + mocker.patch('shutil.which', side_effect=MagicMock(return_value='.')) yield mock del mock @@ -290,6 +295,15 @@ def mocked_safe_exec(cmd: Union[List[str], str], env: Optional[Dict[str, str]] = elif ' '.join(cmd).startswith('gcloud compute disks delete'): return MockedCompletedProcess() + # get a list of volume snapshots + elif ' '.join(cmd).startswith('gcloud compute snapshots list --format json'): + result = [{'name': 'snapshot-12345'}] + return MockedCompletedProcess(json.dumps(result)) + + # delete a volume snapshot + elif ' '.join(cmd).startswith('gcloud compute snapshots delete'): + return MockedCompletedProcess() + # GKE cluster status elif ' '.join(cmd).startswith('gcloud container clusters list --format=value(status) --filter name'): return MockedCompletedProcess('RUNNING\n') @@ -324,6 +338,11 @@ def mocked_safe_exec(cmd: Union[List[str], str], env: Optional[Dict[str, str]] = result['items'].append({'spec': {'csi': {'volumeHandle': f'/test-project/test-region/{i}'}}}) # type: ignore return MockedCompletedProcess(json.dumps(result)) + # get volume snapshots + elif cmd[0] == 'kubectl' and 'get volumesnapshot' in ' '.join(cmd): + result = {'items': [{'metadata': {'uid': '12345'}}]} + return MockedCompletedProcess(json.dumps(result)) + # get kubernetes jobs elif cmd[0] == 'kubectl' and 'get jobs -o json' in ' '.join(cmd): result = {'items': []} @@ -372,7 +391,7 @@ def mocked_safe_exec(cmd: Union[List[str], str], env: Optional[Dict[str, str]] = # check if kubernetes client is installed or cluster is alive elif ' '.join(cmd).startswith('kubectl') and 'version' in ' '.join(cmd): - return MockedCompletedProcess() + return MockedCompletedProcess('{ "clientVersion": { "major": "1", "minor": "27", "gitVersion": "v1.27.4", "gitCommit": "286cfa5f978c4a89c776347c82fa09a232eef144", "gitTreeState": "clean", "buildDate": "2024-03-06T00:56:29Z", "goVersion": "go1.20.12 X:strictfipsruntime", "compiler": "gc", "platform": "linux/amd64" }, "kustomizeVersion": "v5.0.1" }') # delete a kubernetes resopurce by file elif cmd[0] == 'kubectl' and 'delete -f' in ' '.join(cmd): @@ -393,6 +412,14 @@ def mocked_safe_exec(cmd: Union[List[str], str], env: Optional[Dict[str, str]] = # Check whether a file exists in GCS elif ' '.join(cmd).startswith('gsutil') and 'stat' in cmd: if cloud_state: + # handle a wildcard '*' + if cmd[-1].rstrip().endswith('*'): + for key in cloud_state.storage: + if key.startswith(cmd[-1][:-1]): + return MockedCompletedProcess() + raise SafeExecError(returncode=1, message=f'File {cmd[-1]} was not found') + + # handle an exact name if cmd[-1] in cloud_state.storage: return MockedCompletedProcess() else: @@ -406,7 +433,7 @@ def mocked_safe_exec(cmd: Union[List[str], str], env: Optional[Dict[str, str]] = # simulate reading NCBI database manifest if cmd.endswith('latest-dir'): return MockedCompletedProcess(stdout='xxxx') - elif cmd.endswith('blastdb-manifest.json'): + elif cmd.endswith('blastdb-metadata-1-1.json'): manifest = {'nr': {'size': 25}, 'nt': {'size': 25}, 'pdbnt': {'size': 25}, 'testdb': {'size': 25}} return MockedCompletedProcess(stdout=json.dumps(manifest)) else: @@ -549,29 +576,6 @@ def mocked_client(self, client, config=None): raise NotImplementedError(f'boto3 mock for {client} client is not implemented') -@pytest.fixture() -def gcp_env_vars(): - env = { 'CLOUDSDK_CORE_PROJECT': 'ncbi-sandbox-blast' } - orig_env = {} - - if 'TEAMCITY_VERSION' in os.environ: - for var_name in env: - if var_name in os.environ: - orig_env[var_name] = os.environ[var_name] - os.environ[var_name] = str(env[var_name]) - - yield env - - # cleanup - for var_name in env: - if var_name in orig_env: - os.environ[var_name] = orig_env[var_name] - else: - # os.unsetenv does not work on every system - del os.environ[var_name] - else: - yield orig_env - @pytest.fixture def aws_credentials(): """Credentials for mocked AWS services. This fixture ensures that we are diff --git a/tox.ini b/tox.ini index 105226f..99c0024 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,6 @@ [tox] ; put list of your test environments here: -envlist = py39 +envlist = py39, py311 ; this parameter should be used if your project ; doesn't have setup.py file: http://stackoverflow.com/questions/18962403/how-do-i-run-tox-in-a-project-that-has-no-setup-py @@ -35,4 +35,4 @@ commands = ; put here your tests folder and module(s) to test ; for example: addopts = tests/ --cov my_module1 --cov my_module2 --cov-report term --cov-report html ; for more information see: https://pypi.python.org/pypi/pytest-cov -addopts = tests/ --cov elb --cov-report term --cov-report html -x +addopts = tests/ --cov=elastic_blast --cov-report term --cov-report html -x diff --git a/validate-pex-cloudbuild.yaml b/validate-pex-cloudbuild.yaml deleted file mode 100644 index 7ae5ed1..0000000 --- a/validate-pex-cloudbuild.yaml +++ /dev/null @@ -1,17 +0,0 @@ -steps: -- name: 'python:3.7-alpine3.14' - entrypoint: sh - args: - - -c - - | - apk add curl && - curl -sO https://storage.googleapis.com/elastic-blast/release/${_VERSION}/elastic-blast && - curl -sO https://storage.googleapis.com/elastic-blast/release/${_VERSION}/elastic-blast.md5 && - md5sum -c elastic-blast.md5 && - chmod +x elastic-blast && - ./elastic-blast --version && \ - ./elastic-blast --help && \ - ./elastic-blast submit --help && \ - ./elastic-blast status --help && \ - ./elastic-blast delete --help && \ - ./elastic-blast run-summary --help