diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index aa82ab06aa770..4219177f8a409 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -205,7 +205,7 @@ 1.0-beta-1 900 1.12.720 - 2.29.52 + 2.31.12 3.1.1 1.0.0 1.0.1 diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java index 98745b295b5d3..09a5f7154083a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java @@ -22,12 +22,22 @@ import software.amazon.awssdk.arns.Arn; +import java.util.Optional; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + /** * Represents an Arn Resource, this can be an accesspoint or bucket. */ public final class ArnResource { private final static String S3_ACCESSPOINT_ENDPOINT_FORMAT = "s3-accesspoint.%s.amazonaws.com"; private final static String S3_OUTPOSTS_ACCESSPOINT_ENDPOINT_FORMAT = "s3-outposts.%s.amazonaws.com"; + private final static String S3_EXPRESS_ACCESSPOINT_ENDPOINT_FORMAT = "s3express-%s.%s.amazonaws.com"; + // bucket example: mybucket--usw2-az1--x-s3 + // access point example: myaccesspoint--usw2-az1--xa-s3 + public final static Pattern S3_EXPRESS_RESOURCE_FORMAT_REGEX = Pattern.compile( + String.format("^(?[a-z0-9]([a-z0-9\\-]*[a-z0-9])?)--(?[a-z0-9\\-]+)--(?x|xa)-s3$") + ); /** * Resource name. @@ -54,6 +64,11 @@ public final class ArnResource { */ private final String partition; + /** + * Service for the resource. Allowed services: s3, s3-outposts, s3express + */ + private final String service; + /** * Because of the different ways an endpoint can be constructed depending on partition we're * relying on the AWS SDK to produce the endpoint. In this case we need a region key of the form @@ -61,12 +76,13 @@ public final class ArnResource { */ private final String accessPointRegionKey; - private ArnResource(String name, String owner, String region, String partition, String fullArn) { + private ArnResource(String name, String owner, String region, String partition, String fullArn, String service) { this.name = name; this.ownerAccountId = owner; this.region = region; this.partition = partition; this.fullArn = fullArn; + this.service = service; this.accessPointRegionKey = String.format("accesspoint-%s", region); } @@ -74,6 +90,10 @@ private boolean isOutposts(){ return fullArn.contains("s3-outposts"); } + private boolean isExpress(){ + return fullArn.contains("s3express"); + } + /** * Resource name. * @return resource name. @@ -106,13 +126,35 @@ public String getFullArn() { return fullArn; } + /** + * Service for resource. + * @return service for resource. + */ + public String getService() { + return service; + } + /** * Formatted endpoint for the resource. * @return resource endpoint. */ public String getEndpoint() { - String format = isOutposts() ? S3_OUTPOSTS_ACCESSPOINT_ENDPOINT_FORMAT : S3_ACCESSPOINT_ENDPOINT_FORMAT; - return String.format(format, region); + String format; + if(isExpress()) { + Optional zoneId = getZoneIdFromResourceName(name); + if(!zoneId.isPresent()) { + throw new IllegalArgumentException("Zone ID could not be extracted from S3Express resource name: " + name); + } + + format = S3_EXPRESS_ACCESSPOINT_ENDPOINT_FORMAT; + return String.format(format, zoneId.get(), region); + } else if (isOutposts()) { + format = S3_OUTPOSTS_ACCESSPOINT_ENDPOINT_FORMAT; + return String.format(format, region); + } else { + format = S3_ACCESSPOINT_ENDPOINT_FORMAT; + return String.format(format, region); + } } /** @@ -134,6 +176,14 @@ public static ArnResource accessPointFromArn(String arn) throws IllegalArgumentE String resourceName = parsed.resource().resource(); return new ArnResource(resourceName, parsed.accountId().get(), parsed.region().get(), - parsed.partition(), arn); + parsed.partition(), arn, parsed.service()); + } + + private static Optional getZoneIdFromResourceName(final String resourceName) { + return Optional.ofNullable(resourceName) + .map(name -> { + Matcher matcher = S3_EXPRESS_RESOURCE_FORMAT_REGEX.matcher(name); + return matcher.matches() ? matcher.group("zoneId") : null; + }); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 304ba032b416a..6ad3bc5bacc61 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -598,7 +598,13 @@ public void initialize(URI name, Configuration originalConf) if (!configuredArn.isEmpty()) { accessPoint = ArnResource.accessPointFromArn(configuredArn); LOG.info("Using AccessPoint ARN \"{}\" for bucket {}", configuredArn, bucket); - bucket = accessPoint.getFullArn(); + + // s3express does not support ARNs in requests, but instead takes in access point name as bucket paramater + if(accessPoint.getService().equals("s3express")) { + bucket = accessPoint.getName(); + } else { + bucket = accessPoint.getFullArn(); + } } else if (conf.getBoolean(AWS_S3_ACCESSPOINT_REQUIRED, false)) { LOG.warn("Access Point usage is required because \"{}\" is enabled," + " but not configured for the bucket: {}", AWS_S3_ACCESSPOINT_REQUIRED, bucket); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java index 3f2852181f4b2..df77f13823506 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestArnResource.java @@ -61,6 +61,7 @@ public void parseAccessPointFromArn() throws IllegalArgumentException { assertEquals(accessPoint, resource.getName(), "Access Point name does not match"); assertEquals(MOCK_ACCOUNT, resource.getOwnerAccountId(), "Account Id does not match"); assertEquals(region, resource.getRegion(), "Region does not match"); + assertEquals("s3", resource.getService(), "Service does not match"); } } @@ -90,6 +91,27 @@ public void makeSureS3OutpostsEndpointHasTheCorrectFormat() { .isEqualTo(expected); } + @Test + public void makeSureS3ExpressEndpointHasTheCorrectFormat() { + ArnResource accessPoint = getArnResourceFrom("aws", "s3express", "us-west-2", MOCK_ACCOUNT, + "test--usw2-az1--xa-s3"); + String expected = "s3express-usw2-az1.us-west-2.amazonaws.com"; + + assertThat(accessPoint.getEndpoint()) + .describedAs("Endpoint has invalid format. Access Point requests will not work") + .isEqualTo(expected); + } + + @Test + public void getEndpointFromInvalidS3ExpressAccessPointNameMustThrow() throws Exception { + ArnResource accessPoint = getArnResourceFrom("aws", "s3express", "us-west-2", MOCK_ACCOUNT, + "test"); + describe("Using an invalid access point name format must throw when getting an endpoint."); + + intercept(IllegalArgumentException.class, () -> + accessPoint.getEndpoint()); + } + @Test public void invalidARNsMustThrow() throws Exception { describe("Using an invalid ARN format must throw when initializing an ArnResource.");