Skip to content

Commit

Permalink
Merge branch 'develop' into cantaloup-5.0.6
Browse files Browse the repository at this point in the history
  • Loading branch information
glenrobson committed May 8, 2024
2 parents f95a1a3 + 21057cf commit 2841244
Show file tree
Hide file tree
Showing 140 changed files with 3,915 additions and 1,324 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Thumbs.db
.settings/*

# IntelliJ stuff
/.idea/*
/.idea
/*.iml
/*.ipr
/*.iws
Expand Down
34 changes: 34 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,39 @@
# Change Log

## 6.0

### Endpoints

* Image and information responses include a `Last-Modified` header when
possible.
* The health endpoint is enabled via `endpoint.health.enabled` rather than
`endpoint.api.enabled`.
* Added an HTTP API method to purge all infos from the derivative cache.
* Added a configuration option to automatically purge source-cached images
whose format cannot be inferred.

### Sources

* HttpSource supports a client HTTP proxy. (Thanks to @mightymax and
@mlindeman)
* HttpSource can be configured to send a ranged GET request instead of a HEAD
request, enabling it to work with pre-signed URLs that do not allow HEAD
requests.
* S3Source supports multiple endpoints when using ScriptLookupStrategy.

### Caches

* S3Cache uses multipart uploads, which reduces memory usage when caching
derivatives larger than 5 MB.

### Delegate Script

* The delegate script pathname can be set using the
`-Dcantaloupe.delegate_script` VM argument, which takes precedence over the
`delegate_script.pathname` configuration key.
* The delegate script's `metadata` context key contains a new field,
`xmp_elements`, that provides a high-level key-value view of the XMP data.

## 5.0.6

* IIIF information endpoints always return JSON in HTTP 4xx responses.
Expand Down
14 changes: 14 additions & 0 deletions UPGRADING.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,20 @@
If you are skipping versions, work through these sections backwards from your
current version.

## 5.0 → 6.0

1. Add the following keys from the sample configuration:
* `endpoint.health.enabled`
* `HttpSource.proxy.http.host`
* `HttpSource.proxy.http.port`
* `HttpSource.BasicLookupStrategy.send_head_requests`
* `processor.purge_incompatible_from_source_cache`
2. Add the following methods from the sample delegate script:
* `jdbcsource_last_modified()`
3. If you are using a Java delegate, add the following method to your delegate
class:
* `getJDBCSourceLastModified()`

## 4.1.x → 5.0

1. Note that the application is now packaged as a JAR file which can no longer
Expand Down
19 changes: 19 additions & 0 deletions cantaloupe.properties.sample
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ delegate_script.enabled = false
# !! This can be an absolute path, or a filename; if only a filename is
# specified, it will be searched for in the same folder as this file, and
# then the current working directory.
# The delegate script pathname can also be set using the
# -Dcantaloupe.delegate_script VM argument, which overrides this value.
delegate_script.pathname = delegates.rb

###########################################################################
Expand Down Expand Up @@ -126,6 +128,9 @@ endpoint.api.enabled = false
endpoint.api.username =
endpoint.api.secret =

# Enables the health check endpoint, at /health.
endpoint.health.enabled = true

# If true, sources and caches will be checked, resulting in a more robust
# but slower health check. Set this to false if these services already have
# their own health checks.
Expand Down Expand Up @@ -170,6 +175,10 @@ HttpSource.allow_insecure = false
# Request timeout in seconds.
HttpSource.request_timeout =

# !! Client HTTP proxy.
HttpSource.proxy.http.host =
HttpSource.proxy.http.port =

# Tells HttpSource how to look up resources. Allowed values are
# `BasicLookupStrategy` and `ScriptLookupStrategy`. ScriptLookupStrategy
# uses a delegate method for dynamic lookups; see the user manual.
Expand All @@ -187,6 +196,12 @@ HttpSource.BasicLookupStrategy.url_suffix =
HttpSource.BasicLookupStrategy.auth.basic.username =
HttpSource.BasicLookupStrategy.auth.basic.secret =

# Before an image is retrieved, a preliminary request is sent to check
# various characteristics. Typically this is a HEAD request, but some
# resources, such as those using pre-signed URLs, may not support HEAD
# requests. This key enables a ranged GET request to be sent instead.
HttpSource.BasicLookupStrategy.send_head_requests = true

# Read data in chunks when it may be more efficient. (This also may end up
# being less efficient, depending on many variables; see the user manual.)
HttpSource.chunking.enabled = true
Expand Down Expand Up @@ -359,6 +374,10 @@ processor.stream_retrieval_strategy = StreamStrategy
# * `AbortStrategy` causes the request to fail.
processor.fallback_retrieval_strategy = DownloadStrategy

# If true, images stored in the source cache for which no format can be
# inferred will be purged.
processor.purge_incompatible_from_source_cache = false

# Resolution of vector rasterization (of e.g. PDFs) at a scale of 1.
processor.dpi = 150

Expand Down
65 changes: 50 additions & 15 deletions delegates.rb.sample
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,17 @@ class CustomDelegate
#
# 1. String URI
# 2. Hash with the following keys:
# * `uri` [String] (required)
# * `username` [String] For HTTP Basic authentication (optional).
# * `secret` [String] For HTTP Basic authentication (optional).
# * `headers` [Hash<String,String>] Hash of request headers (optional).
# * `uri` [String] (required)
# * `username` [String] For HTTP Basic authentication
# (optional).
# * `secret` [String] For HTTP Basic authentication
# (optional).
# * `headers` [Hash<String,String>] Hash of request headers
# (optional).
# * `send_head_request` [Boolean] Optional; defaults to `true`. See the
# documentation of the
# `HttpSource.BasicLookupStrategy.send_head_requests`
# configuration key.
# 3. nil if not found.
#
# N.B.: this method should not try to perform authorization. `authorize()`
Expand All @@ -241,18 +248,33 @@ class CustomDelegate
# should be used instead.
#
# @param options [Hash] Empty hash.
# @return [String] Identifier of the image corresponding to the given
# identifier in the database.
# @return [String, nil] Database identifier of the image corresponding to the
# identifier in the context, or nil if not found.
#
def jdbcsource_database_identifier(options = {})
end

##
# Returns either the last-modified timestamp of an image in ISO 8601 format,
# or an SQL statement that can be used to retrieve it from a `TIMESTAMP`-type
# column in the database. In the latter case, the "SELECT" and "FROM" clauses
# should be in uppercase in order to be autodetected.
#
# Implementing this method is optional, but may be necessary for certain
# features (like `Last-Modified` response headers) to work.
#
# @param options [Hash] Empty hash.
# @return [String, nil]
#
def jdbcsource_last_modified(options = {})
end

##
# Returns either the media (MIME) type of an image, or an SQL statement that
# can be used to retrieve it, if it is stored in the database. In the latter
# case, the "SELECT" and "FROM" clauses should be in uppercase in order to
# be autodetected. If nil is returned, the media type will be inferred some
# other way, such as by identifier extension or magic bytes.
# can be used to retrieve it from a `CHAR`-type column in the database. In
# the latter case, the "SELECT" and "FROM" clauses should be in uppercase in
# order to be autodetected. If nil is returned, the media type will be
# inferred some other way, such as by identifier extension or magic bytes.
#
# @param options [Hash] Empty hash.
# @return [String, nil]
Expand All @@ -273,8 +295,11 @@ class CustomDelegate
# should be used instead.
#
# @param options [Hash] Empty hash.
# @return [Hash<String,Object>,nil] Hash containing `bucket` and `key` keys;
# or nil if not found.
# @return [Hash<String,Object>,nil] Hash containing `bucket` and `key` keys.
# It may also contain an `endpoint` key, indicating that the endpoint
# is different from the one set in the configuration. In that case,
# it may also contain `region`, `access_key_id`, and/or
# `secret_access_key` keys.
#
def s3source_object_info(options = {})
end
Expand Down Expand Up @@ -356,7 +381,14 @@ class CustomDelegate
# "Field2Name": value
# ],
# "xmp_string": "<rdf:RDF>...</rdf:RDF>",
# "xmp_model": https://jena.apache.org/documentation/javadoc/jena/org/apache/jena/rdf/model/Model.html
# "xmp_model": See https://jena.apache.org/documentation/javadoc/jena/org/apache/jena/rdf/model/Model.html,
# "xmp_elements": {
# "Field1Name": "value",
# "Field2Name": [
# "value1",
# "value2"
# ]
# },
# "native": {
# # structure varies
# }
Expand All @@ -366,10 +398,13 @@ class CustomDelegate
# * The `exif` key refers to embedded EXIF data. This also includes IFD0
# metadata from source TIFFs, whether or not an EXIF IFD is present.
# * The `iptc` key refers to embedded IPTC IIM data.
# * The `xmp_string` key refers to raw embedded XMP data, which may or may
# not contain EXIF and/or IPTC information.
# * The `xmp_string` key refers to raw embedded XMP data.
# * The `xmp_model` key contains a Jena Model object pre-loaded with the
# contents of `xmp_string`.
# * The `xmp_elements` key contains a view of the embedded XMP data as key-
# value pairs. This is convenient to use, but may not work correctly with
# all XMP fields--in particular, those that cannot be expressed as
# key-value pairs.
# * The `native` key refers to format-specific metadata.
#
# Any combination of the above keys may be present or missing depending on
Expand Down
8 changes: 5 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<groupId>edu.illinois.library.cantaloupe</groupId>
<artifactId>cantaloupe</artifactId>
<packaging>jar</packaging>
<version>5.0.5</version>
<version>6.0-SNAPSHOT</version>
<name>Cantaloupe</name>
<url>https://cantaloupe-project.github.io/</url>
<inceptionYear>2015</inceptionYear>
Expand All @@ -17,6 +17,7 @@
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<aws-sdk.version>2.21.4</aws-sdk.version>
<jackson.version>2.15.2</jackson.version>
<!-- develop version 11.0.5 -->
<jetty.version>9.4.53.v20231009</jetty.version>
<jruby.version>9.4.3.0</jruby.version>
<surefire.version>3.0.0-M5</surefire.version>
Expand Down Expand Up @@ -423,8 +424,9 @@
<configuration>
<runOrder>random</runOrder>
<reuseForks>false</reuseForks>
<!-- JAI requires this in JDK 16+. It should be removed when JAI is. -->
<argLine>--add-opens java.desktop/sun.awt.image=ALL-UNNAMED</argLine>
<!-- JAI requires add-opens in JDK 16+. It should be removed when JAI is. -->
<!-- JAI requires illegal-access in JDK >= 16. -->
<argLine>--add-opens java.desktop/sun.awt.image=ALL-UNNAMED --illegal-access=permit</argLine>
</configuration>
</plugin>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import jakarta.servlet.ServletContextEvent;
import jakarta.servlet.ServletContextListener;
import javax.script.ScriptEngineManager;
import javax.servlet.ServletContextEvent;
import javax.servlet.ServletContextListener;
import java.util.stream.Collectors;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import edu.illinois.library.cantaloupe.resource.FileServlet;
import edu.illinois.library.cantaloupe.resource.HandlerServlet;
import org.eclipse.jetty.alpn.server.ALPNServerConnectionFactory;
import org.eclipse.jetty.http.UriCompliance;
import org.eclipse.jetty.http2.HTTP2Cipher;
import org.eclipse.jetty.http2.server.HTTP2CServerConnectionFactory;
import org.eclipse.jetty.http2.server.HTTP2ServerConnectionFactory;
Expand Down Expand Up @@ -265,7 +266,8 @@ public void start() throws Exception {
// HTTP/2.
if (isHTTPEnabled()) {
HttpConfiguration config = new HttpConfiguration();
HttpConnectionFactory http1 = new HttpConnectionFactory();
config.setUriCompliance(UriCompliance.LEGACY);
HttpConnectionFactory http1 = new HttpConnectionFactory(config);

HTTP2CServerConnectionFactory http2 =
new HTTP2CServerConnectionFactory(config);
Expand All @@ -280,11 +282,12 @@ public void start() throws Exception {
// Initialize the HTTPS server.
if (isHTTPSEnabled()) {
HttpConfiguration config = new HttpConfiguration();
config.setUriCompliance(UriCompliance.LEGACY);
config.setSecureScheme("https");
config.setSecurePort(getHTTPSPort());
config.addCustomizer(new SecureRequestCustomizer());

final SslContextFactory contextFactory = new SslContextFactory.Server();
final SslContextFactory.Server contextFactory = new SslContextFactory.Server();
contextFactory.setKeyStorePath(getHTTPSKeyStorePath());
if (getHTTPSKeyStorePassword() != null) {
contextFactory.setKeyStorePassword(getHTTPSKeyStorePassword());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ private static class CustomBlobOutputStream

/**
* Constructor for an instance that writes into the given temporary
* blob. Upon closure, if the stream is {@link #isCompletelyWritten()
* blob. Upon closure, if the stream is {@link #isComplete()
* completely written}, the temporary blob is copied into place and
* deleted. Otherwise, the temporary blob is deleted.
*
Expand Down Expand Up @@ -96,7 +96,7 @@ public void close() throws IOException {
blobOutputStream.flush();
blobOutputStream.close();
if (container != null) {
if (isCompletelyWritten()) {
if (isComplete()) {
// Copy the temporary blob into place.
CloudBlockBlob destBlob =
container.getBlockBlobReference(blobKey);
Expand Down Expand Up @@ -144,6 +144,8 @@ public void write(byte[] b, int off, int len) throws IOException {
private static final Logger LOGGER =
LoggerFactory.getLogger(AzureStorageCache.class);

private static final String INFO_EXTENSION = ".json";

private static CloudBlobClient client;

/**
Expand Down Expand Up @@ -197,7 +199,7 @@ private static Instant getEarliestValidInstant() {

@Override
public Optional<Info> getInfo(Identifier identifier) throws IOException {
final String containerName = getContainerName();
final String containerName = getContainerName();
final CloudBlobClient client = getClientInstance();

try {
Expand All @@ -211,6 +213,12 @@ public Optional<Info> getInfo(Identifier identifier) throws IOException {
if (isValid(blob)) {
try (InputStream is = blob.openInputStream()) {
Info info = Info.fromJSON(is);
// Populate the serialization timestamp if it is not
// already, as suggested by the method contract.
if (info.getSerializationTimestamp() == null) {
info.setSerializationTimestamp(
blob.getProperties().getLastModified().toInstant());
}
LOGGER.debug("getInfo(): read {} from container {} in {}",
objectKey, containerName, watch);
return Optional.of(info);
Expand Down Expand Up @@ -290,7 +298,7 @@ public InputStream newDerivativeImageInputStream(OperationList opList)
*/
String getObjectKey(Identifier identifier) {
return getObjectKeyPrefix() + "info/" +
StringUtils.md5(identifier.toString()) + ".json";
StringUtils.md5(identifier.toString()) + INFO_EXTENSION;
}

/**
Expand Down Expand Up @@ -390,6 +398,32 @@ private void purgeAsync(CloudBlob blob) {
});
}

@Override
public void purgeInfos() throws IOException {
final String containerName = getContainerName();
final CloudBlobClient client = getClientInstance();
try {
final CloudBlobContainer container =
client.getContainerReference(containerName);
int count = 0, deletedCount = 0;
for (ListBlobItem item : container.listBlobs(getObjectKeyPrefix(), true)) {
if (item instanceof CloudBlob) {
CloudBlob blob = (CloudBlob) item;
count++;
if (blob.getName().endsWith(INFO_EXTENSION)) {
if (blob.deleteIfExists()) {
deletedCount++;
}
}
}
}
LOGGER.debug("purgeInfos(): deleted {} of {} items",
deletedCount, count);
} catch (URISyntaxException | StorageException e) {
throw new IOException(e.getMessage(), e);
}
}

@Override
public void purgeInvalid() throws IOException {
final String containerName = getContainerName();
Expand Down
Loading

0 comments on commit 2841244

Please sign in to comment.