From fe114ac2304315db8ec6ad99043da5e6ae3bef65 Mon Sep 17 00:00:00 2001 From: David Gault Date: Sun, 10 Mar 2024 20:03:47 +0000 Subject: [PATCH] Reintroduce S3FileSystemStore --- pom.xml | 8 +- src/loci/formats/S3FileSystemStore.java | 306 ++++++++++++++++++ src/loci/formats/in/ZarrReader.java | 17 +- .../formats/services/JZarrServiceImpl.java | 48 +-- 4 files changed, 357 insertions(+), 22 deletions(-) create mode 100644 src/loci/formats/S3FileSystemStore.java diff --git a/pom.xml b/pom.xml index ca08d16..dadfe03 100644 --- a/pom.xml +++ b/pom.xml @@ -91,9 +91,11 @@ 2.7.2 runtime - - - + + software.amazon.awssdk + s3 + 2.13.8 + diff --git a/src/loci/formats/S3FileSystemStore.java b/src/loci/formats/S3FileSystemStore.java new file mode 100644 index 0000000..48cc817 --- /dev/null +++ b/src/loci/formats/S3FileSystemStore.java @@ -0,0 +1,306 @@ +package loci.formats; + +/*- + * #%L + * Implementation of Bio-Formats readers for the next-generation file formats + * %% + * Copyright (C) 2020 - 2022 Open Microscopy Environment + * %% + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * #L% + */ + +import com.bc.zarr.ZarrConstants; +import com.bc.zarr.ZarrUtils; +import com.bc.zarr.storage.Store; + +import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsCredentials; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.core.sync.ResponseTransformer; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.S3Configuration; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsResponse; +import software.amazon.awssdk.services.s3.model.S3Object; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.file.FileSystem; +import java.nio.file.Files; +import java.nio.file.LinkOption; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.ListIterator; +import java.util.TreeSet; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class S3FileSystemStore implements Store { + + private Path root; + S3Client client; + protected static final Logger LOGGER = + LoggerFactory.getLogger(S3FileSystemStore.class); + + public S3FileSystemStore(String path, FileSystem fileSystem) { + if (fileSystem == null) { + root = Paths.get(path); + } else { + root = fileSystem.getPath(path); + } + setupClient(); + } + + public void updateRoot(String path) { + root = Paths.get(path); + } + + public String getRoot() { + return root.toString(); + } + + private void setupClient() { + String[] pathSplit = root.toString().split(File.separator); + String endpoint = "https://" + pathSplit[1] + File.separator; + URI endpoint_uri; + try { + endpoint_uri = new URI(endpoint); + final S3Configuration config = S3Configuration.builder() + .pathStyleAccessEnabled(true) + .build(); + AwsCredentials credentials = AnonymousCredentialsProvider.create().resolveCredentials(); + client = S3Client.builder() + .endpointOverride(endpoint_uri) + .serviceConfiguration(config) + .region(Region.EU_WEST_1) // Ignored but required by the client + .credentialsProvider(StaticCredentialsProvider.create(credentials)).build(); + + } catch (URISyntaxException e) { + LOGGER.info( "Syntax error generating URI from endpoint: " + endpoint); + e.printStackTrace(); + } catch (Exception e) { + LOGGER.info("Exception caught while constructing S3 client"); + e.printStackTrace(); + } + + } + + public void close() { + if (client != null) { + client.close(); + } + } + + public S3FileSystemStore(Path rootPath) { + root = rootPath; + setupClient(); + } + + @Override + public InputStream getInputStream(String key) throws IOException { + String[] pathSplit = root.toString().split(File.separator); + String bucketName = pathSplit[2]; + String key2 = root.toString().substring(root.toString().indexOf(pathSplit[3]), root.toString().length()) + File.separator + key; + + try { + GetObjectRequest getRequest = GetObjectRequest.builder().bucket(bucketName).key(key2).build(); + ResponseInputStream responseStream = client.getObject(getRequest, ResponseTransformer.toInputStream()); + return responseStream; + } catch (Exception e) { + LOGGER.info( "Unable to locate or access key: " + key2); + e.printStackTrace(); + } + + return null; + } + + @Override + public OutputStream getOutputStream(String key) throws IOException { + final Path filePath = root.resolve(key); + final Path dir = filePath.getParent(); + Files.createDirectories(dir); + return Files.newOutputStream(filePath); + } + + @Override + public void delete(String key) throws IOException { + final Path toBeDeleted = root.resolve(key); + if (Files.isDirectory(toBeDeleted)) { + ZarrUtils.deleteDirectoryTreeRecursively(toBeDeleted); + } + if (Files.exists(toBeDeleted)){ + Files.delete(toBeDeleted); + } + if (Files.exists(toBeDeleted)|| Files.isDirectory(toBeDeleted)) { + throw new IOException("Unable to initialize " + toBeDeleted.toAbsolutePath().toString()); + } + } + + @Override + public TreeSet getArrayKeys() throws IOException { + return getKeysFor(ZarrConstants.FILENAME_DOT_ZARRAY); + } + + @Override + public TreeSet getGroupKeys() throws IOException { + return getKeysFor(ZarrConstants.FILENAME_DOT_ZGROUP); + } + + /** + * Copied from {@com.bc.zarr.storage.FileSystemStorage#getKeysEndingWith(String). + * + * @param suffix + * @return + * @throws IOException + */ + public TreeSet getKeysEndingWith(String suffix) throws IOException { + return (TreeSet)Files.walk(this.root).filter((path) -> { + return path.toString().endsWith(suffix); + }).map((path) -> { + return this.root.relativize(path).toString(); + }).collect(Collectors.toCollection(TreeSet::new)); + } + + /** + * Copied from {@com.bc.zarr.storage.FileSystemStorage#getRelativeLeafKeys(String). + * + * @param key + * @return + * @throws IOException + */ + public Stream getRelativeLeafKeys(String key) throws IOException { + Path walkingRoot = this.root.resolve(key); + return Files.walk(walkingRoot).filter((path) -> { + return !Files.isDirectory(path, new LinkOption[0]); + }).map((path) -> { + return walkingRoot.relativize(path).toString(); + }).map(ZarrUtils::normalizeStoragePath).filter((s) -> { + return s.trim().length() > 0; + }); + } + + private TreeSet getKeysFor(String suffix) throws IOException { + TreeSet keys = new TreeSet(); + + String[] pathSplit = root.toString().split(File.separator); + + String bucketName = pathSplit[2]; + String key2 = root.toString().substring(root.toString().indexOf(pathSplit[3]), root.toString().length()); + + ListObjectsRequest listObjectsRequest = ListObjectsRequest + .builder() + .bucket(bucketName) + .prefix(key2) + .build() + ; + + ListObjectsResponse listObjectsResponse = null; + String lastKey = null; + + do { + if ( listObjectsResponse != null ) { + listObjectsRequest = listObjectsRequest.toBuilder() + .marker(lastKey) + .build() + ; + } + + listObjectsResponse = client.listObjects(listObjectsRequest); + List objects = listObjectsResponse.contents(); + + // Iterate over results + ListIterator iterVals = objects.listIterator(); + while (iterVals.hasNext()) { + S3Object object = (S3Object) iterVals.next(); + String k = object.key(); + if (k.contains(suffix)) { + String key = k.substring(k.indexOf(key2) + key2.length() + 1, k.indexOf(suffix)); + if (!key.isEmpty()) { + keys.add(key.substring(0, key.length()-1)); + } + } + lastKey = k; + } + } while ( listObjectsResponse.isTruncated() ); + + return keys; + } + + public ArrayList getFiles() throws IOException { + ArrayList keys = new ArrayList(); + + String[] pathSplit = root.toString().split(File.separator); + String bucketName = pathSplit[2]; + String key2 = root.toString().substring(root.toString().indexOf(pathSplit[3]), root.toString().length()); + + ListObjectsRequest listObjectsRequest = ListObjectsRequest + .builder() + .bucket(bucketName) + .prefix(key2) + .build() + ; + + ListObjectsResponse listObjectsResponse = null; + String lastKey = null; + + do { + if ( listObjectsResponse != null ) { + listObjectsRequest = listObjectsRequest.toBuilder() + .marker(lastKey) + .build() + ; + } + + listObjectsResponse = client.listObjects(listObjectsRequest); + List objects = listObjectsResponse.contents(); + + // Iterate over results + ListIterator iterVals = objects.listIterator(); + while (iterVals.hasNext()) { + S3Object object = (S3Object) iterVals.next(); + String k = object.key(); + String key = k.substring(k.indexOf(key2) + key2.length() + 1, k.length()); + if (!key.isEmpty()) { + keys.add(key.substring(0, key.length()-1)); + } + lastKey = k; + } + } while ( listObjectsResponse.isTruncated() ); + return keys; + } + + +} \ No newline at end of file diff --git a/src/loci/formats/in/ZarrReader.java b/src/loci/formats/in/ZarrReader.java index 5bfc736..19c153c 100644 --- a/src/loci/formats/in/ZarrReader.java +++ b/src/loci/formats/in/ZarrReader.java @@ -97,6 +97,8 @@ public class ZarrReader extends FormatReader { public static final String LIST_PIXELS_ENV_KEY = "OME_ZARR_LIST_PIXELS"; public static final String INCLUDE_LABELS_KEY = "omezarr.include_labels"; public static final boolean INCLUDE_LABELS_DEFAULT = false; + public static final String ALT_STORE_KEY = "omezarr.alt_store"; + public static final String ALT_STORE_DEFAULT = null; protected transient ZarrService zarrService; private ArrayList arrayPaths = new ArrayList(); @@ -468,7 +470,7 @@ public void reopenFile() throws IOException { } protected void initializeZarrService(String rootPath) throws IOException, FormatException { - zarrService = new JZarrServiceImpl(rootPath); + zarrService = new JZarrServiceImpl(altStore()); openZarr(); } @@ -1200,6 +1202,19 @@ public boolean includeLabels() { } return INCLUDE_LABELS_DEFAULT; } + + /** + * Used to provide the location of an alternative file store where the data is located + * @return String representing the root path of the alternative file store or null if no alternative location exist + */ + public String altStore() { + MetadataOptions options = getMetadataOptions(); + if (options instanceof DynamicMetadataOptions) { + return ((DynamicMetadataOptions) options).get( + ALT_STORE_KEY, ALT_STORE_DEFAULT); + } + return ALT_STORE_DEFAULT; + } private boolean systemEnvListPixels() { String value = System.getenv(LIST_PIXELS_ENV_KEY); diff --git a/src/loci/formats/services/JZarrServiceImpl.java b/src/loci/formats/services/JZarrServiceImpl.java index dce0186..482315f 100644 --- a/src/loci/formats/services/JZarrServiceImpl.java +++ b/src/loci/formats/services/JZarrServiceImpl.java @@ -54,6 +54,7 @@ import loci.common.services.AbstractService; import loci.formats.FormatException; import loci.formats.FormatTools; +import loci.formats.S3FileSystemStore; import loci.formats.meta.IPyramidStore; import loci.formats.meta.MetadataRetrieve; import ucar.ma2.InvalidRangeException; @@ -65,6 +66,7 @@ public class JZarrServiceImpl extends AbstractService public static final String NO_ZARR_MSG = "JZARR is required to read Zarr files."; // -- Fields -- + S3FileSystemStore s3fs; ZarrArray zarrArray; String currentId; Compressor zlibComp = CompressorFactory.create("zlib", "level", 8); // 8 = compression level .. valid values 0 .. 9 @@ -76,20 +78,20 @@ public class JZarrServiceImpl extends AbstractService */ public JZarrServiceImpl(String root) { checkClassDependency(com.bc.zarr.ZarrArray.class); - if (root != null && root.toLowerCase().contains("s3:")) { - LOGGER.warn("S3 access currently not supported"); + if (root != null && (root.toLowerCase().contains("s3:") || root.toLowerCase().contains("s3."))) { + s3fs = new S3FileSystemStore(Paths.get(root)); } } @Override public void open(String file) throws IOException, FormatException { currentId = file; - // TODO: Update s3 location identification - if (!file.toLowerCase().contains("s3:")) { + if (s3fs == null) { zarrArray = ZarrArray.open(file); } - else { - LOGGER.warn("S3 access currently not supported"); + else { + s3fs.updateRoot(getZarrRoot(s3fs.getRoot()) + stripZarrRoot(file)); + zarrArray = ZarrArray.open(s3fs); } } @@ -100,48 +102,48 @@ public void open(String id, ZarrArray array) { public Map getGroupAttr(String path) throws IOException, FormatException { ZarrGroup group = null; - if (!path.toLowerCase().contains("s3:")) { + if (s3fs == null) { group = ZarrGroup.open(path); } else { - LOGGER.warn("S3 access currently not supported"); - return null; + s3fs.updateRoot(getZarrRoot(s3fs.getRoot()) + stripZarrRoot(path)); + group = ZarrGroup.open(s3fs); } return group.getAttributes(); } public Map getArrayAttr(String path) throws IOException, FormatException { ZarrArray array = null; - if (!path.toLowerCase().contains("s3:")) { + if (s3fs == null) { array = ZarrArray.open(path); } else { - LOGGER.warn("S3 access currently not supported"); - return null; + s3fs.updateRoot(getZarrRoot(s3fs.getRoot()) + stripZarrRoot(path)); + array = ZarrArray.open(s3fs); } return array.getAttributes(); } public Set getGroupKeys(String path) throws IOException, FormatException { ZarrGroup group = null; - if (!path.toLowerCase().contains("s3:")) { + if (s3fs == null) { group = ZarrGroup.open(path); } else { - LOGGER.warn("S3 access currently not supported"); - return null; + s3fs.updateRoot(getZarrRoot(s3fs.getRoot()) + stripZarrRoot(path)); + group = ZarrGroup.open(s3fs); } return group.getGroupKeys(); } public Set getArrayKeys(String path) throws IOException, FormatException { ZarrGroup group = null; - if (!path.toLowerCase().contains("s3:")) { + if (s3fs == null) { group = ZarrGroup.open(path); } else { - LOGGER.warn("S3 access currently not supported"); - return null; + s3fs.updateRoot(getZarrRoot(s3fs.getRoot()) + stripZarrRoot(path)); + group = ZarrGroup.open(s3fs); } return group.getArrayKeys(); } @@ -247,6 +249,9 @@ public boolean isLittleEndian() { public void close() throws IOException { zarrArray = null; currentId = null; + if (s3fs != null) { + s3fs.close(); + } } @Override @@ -358,5 +363,12 @@ public void create(String id, MetadataRetrieve meta, int[] chunks) throws IOExce create(id, meta, chunks, Compression.NONE); } + private String stripZarrRoot(String path) { + return path.substring(path.indexOf(".zarr")+5); + } + + private String getZarrRoot(String path) { + return path.substring(0, path.indexOf(".zarr")+5); + } }