diff --git a/.github/workflows/python-ci-minimal.yml b/.github/workflows/python-ci-minimal.yml index eae233e2f3..080a581d19 100644 --- a/.github/workflows/python-ci-minimal.yml +++ b/.github/workflows/python-ci-minimal.yml @@ -36,6 +36,6 @@ jobs: python_version: ${{ matrix.python-version }} cc: ${{ matrix.cc }} cxx: ${{ matrix.cxx }} - report_codecov: ${{ matrix.python-version == '3.10' }} - run_lint: ${{ matrix.python-version == '3.10' }} + report_codecov: ${{ matrix.python-version == '3.11' }} + run_lint: ${{ matrix.python-version == '3.11' }} secrets: inherit diff --git a/apis/python/tests/test_io.py b/apis/python/tests/test_io.py index 6e4639384a..d466a1d663 100644 --- a/apis/python/tests/test_io.py +++ b/apis/python/tests/test_io.py @@ -184,8 +184,10 @@ def test_add_matrices(tmp_path): See https://github.com/single-cell-data/TileDB-SOMA/issues/1565.""" # Create a soma object from an anndata object soma_path = tmp_path.as_posix() - h5ad_path = HERE.parent / 'testdata/pbmc-small.h5ad' - soma_uri = soma.io.from_h5ad(soma_path, input_path=h5ad_path, measurement_name="RNA") + h5ad_path = HERE.parent / "testdata/pbmc-small.h5ad" + soma_uri = soma.io.from_h5ad( + soma_path, input_path=h5ad_path, measurement_name="RNA" + ) # Synthesize some new data to be written into two matrices within the soma object (ensuring it's different from the # original data, so that writes must be performed) diff --git a/profiler/src/profiler/data.py b/profiler/src/profiler/data.py index 0649ca642a..10fec9c4e8 100644 --- a/profiler/src/profiler/data.py +++ b/profiler/src/profiler/data.py @@ -3,13 +3,14 @@ import hashlib import json import os +import uuid from abc import ABC, abstractmethod from typing import Any, Dict, List, Optional -from botocore.client import ClientError -import tempfile + import attr import boto3 -import uuid +from botocore.client import ClientError + @attr.define class ProfileData: @@ -134,6 +135,7 @@ def add(self, data: ProfileData) -> str: def close(self): pass + class S3ProfileDB(ProfileDB): """Represents a S3-based implementation of a ProfileDB database. Each run is stored as a separate S3 object under a key with the structure `//`. @@ -143,8 +145,8 @@ def read_object_keys(self, prefix: str, suffix: str) -> List[str]: # return all the objects kets starting with prefix and ending with suffix result = self.s3.list_objects(Bucket=self.bucket_name, Prefix=prefix) keys: List[str] = [] - for o in result.get('Contents'): - object_key = o.get('Key') + for o in result.get("Contents"): + object_key = o.get("Key") if object_key.endswith(suffix): keys.append(object_key) return keys @@ -152,12 +154,11 @@ def read_object_keys(self, prefix: str, suffix: str) -> List[str]: def read_s3_text(self, key: str) -> str: # Assume the key is associated with one object. Otherwise, return the first object result = self.s3.list_objects(Bucket=self.bucket_name, Prefix=key) - for o in result.get('Contents'): - data = self.s3.get_object(Bucket=self.bucket_name, Key=o.get('Key')) - contents = data['Body'].read().decode("utf-8") + for o in result.get("Contents"): + data = self.s3.get_object(Bucket=self.bucket_name, Key=o.get("Key")) + contents = data["Body"].read().decode("utf-8") return contents - def bucket_exist_and_accessible(self): try: self.s3.head_bucket(Bucket=self.bucket.name) @@ -167,13 +168,16 @@ def bucket_exist_and_accessible(self): def __init__(self, bucket_name: str): # Initialize bucket's info - self.s3 = boto3.client('s3') + self.s3 = boto3.client("s3") self.bucket_name = bucket_name - self.bucket = boto3.resource('s3').Bucket(self.bucket_name) + self.bucket = boto3.resource("s3").Bucket(self.bucket_name) # Check if the bucket exists if not self.bucket_exist_and_accessible(): - raise( - Exception(f"Bucket {self.bucket_name} does not exist or access is not granted.")) + raise ( + Exception( + f"Bucket {self.bucket_name} does not exist or access is not granted." + ) + ) def __str__(self): result = "" @@ -186,9 +190,11 @@ def __str__(self): command = self.read_s3_text(command_file_key) # Get the number of runs. n_runs = len(self.read_object_keys(runs_prefix, ".json")) - result =+ f"[{command_file_key}] \"{command}\": {n_runs} runs\n" + result = +f'[{command_file_key}] "{command}": {n_runs} runs\n' return result - return Exception(f"Bucket {self.bucket_name} does not exist or access is not granted.") + return Exception( + f"Bucket {self.bucket_name} does not exist or access is not granted." + ) def find(self, command) -> List[ProfileData]: key = _command_key(command) @@ -208,7 +214,9 @@ def add(self, data: ProfileData) -> str: with open(filename, "w") as fp: fp.write(data.command.strip()) - self.s3.upload_file(os.path.abspath(str(fp.name)), self.bucket_name, s3_command_key) + self.s3.upload_file( + os.path.abspath(str(fp.name)), self.bucket_name, s3_command_key + ) os.unlink(filename) key2 = data.timestamp diff --git a/profiler/src/profiler/profiler.py b/profiler/src/profiler/profiler.py index 3716068af3..6ea571abb7 100644 --- a/profiler/src/profiler/profiler.py +++ b/profiler/src/profiler/profiler.py @@ -16,7 +16,7 @@ from .context_generator import host_context # import context_generator -from .data import FileBasedProfileDB, ProfileData, ProfileDB, S3ProfileDB +from .data import ProfileData, ProfileDB, S3ProfileDB GNU_TIME_FORMAT = ( 'Command being timed: "%C"\n'