Skip to content

HADOOP-19588. S3A support for S3 express access points #7746

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: trunk
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion hadoop-project/pom.xml
Original file line number Diff line number Diff line change
@@ -205,7 +205,7 @@
<make-maven-plugin.version>1.0-beta-1</make-maven-plugin.version>
<surefire.fork.timeout>900</surefire.fork.timeout>
<aws-java-sdk.version>1.12.720</aws-java-sdk.version>
<aws-java-sdk-v2.version>2.29.52</aws-java-sdk-v2.version>
<aws-java-sdk-v2.version>2.31.12</aws-java-sdk-v2.version>
<amazon-s3-encryption-client-java.version>3.1.1</amazon-s3-encryption-client-java.version>
<amazon-s3-analyticsaccelerator-s3.version>1.0.0</amazon-s3-analyticsaccelerator-s3.version>
<aws.eventstream.version>1.0.1</aws.eventstream.version>
Original file line number Diff line number Diff line change
@@ -22,12 +22,22 @@

import software.amazon.awssdk.arns.Arn;

import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* Represents an Arn Resource, this can be an accesspoint or bucket.
*/
public final class ArnResource {
private final static String S3_ACCESSPOINT_ENDPOINT_FORMAT = "s3-accesspoint.%s.amazonaws.com";
private final static String S3_OUTPOSTS_ACCESSPOINT_ENDPOINT_FORMAT = "s3-outposts.%s.amazonaws.com";
private final static String S3_EXPRESS_ACCESSPOINT_ENDPOINT_FORMAT = "s3express-%s.%s.amazonaws.com";
// bucket example: mybucket--usw2-az1--x-s3
// access point example: myaccesspoint--usw2-az1--xa-s3
public final static Pattern S3_EXPRESS_RESOURCE_FORMAT_REGEX = Pattern.compile(
String.format("^(?<apname>[a-z0-9]([a-z0-9\\-]*[a-z0-9])?)--(?<zoneId>[a-z0-9\\-]+)--(?<resource>x|xa)-s3$")
);

/**
* Resource name.
@@ -54,26 +64,36 @@
*/
private final String partition;

/**
* Service for the resource. Allowed services: s3, s3-outposts, s3express
*/
private final String service;

/**
* Because of the different ways an endpoint can be constructed depending on partition we're
* relying on the AWS SDK to produce the endpoint. In this case we need a region key of the form
* {@code String.format("accesspoint-%s", awsRegion)}
*/
private final String accessPointRegionKey;

private ArnResource(String name, String owner, String region, String partition, String fullArn) {
private ArnResource(String name, String owner, String region, String partition, String fullArn, String service) {
this.name = name;
this.ownerAccountId = owner;
this.region = region;
this.partition = partition;
this.fullArn = fullArn;
this.service = service;
this.accessPointRegionKey = String.format("accesspoint-%s", region);
}

private boolean isOutposts(){
return fullArn.contains("s3-outposts");
}

private boolean isExpress(){
return fullArn.contains("s3express");
}

/**
* Resource name.
* @return resource name.
@@ -106,13 +126,35 @@
return fullArn;
}

/**
* Service for resource.
* @return service for resource.
*/
public String getService() {
return service;
}

/**
* Formatted endpoint for the resource.
* @return resource endpoint.
*/
public String getEndpoint() {
String format = isOutposts() ? S3_OUTPOSTS_ACCESSPOINT_ENDPOINT_FORMAT : S3_ACCESSPOINT_ENDPOINT_FORMAT;
return String.format(format, region);
String format;
if(isExpress()) {
Optional<String> zoneId = getZoneIdFromResourceName(name);
if(!zoneId.isPresent()) {
throw new IllegalArgumentException("Zone ID could not be extracted from S3Express resource name: " + name);
}

format = S3_EXPRESS_ACCESSPOINT_ENDPOINT_FORMAT;
return String.format(format, zoneId.get(), region);
} else if (isOutposts()) {
format = S3_OUTPOSTS_ACCESSPOINT_ENDPOINT_FORMAT;
return String.format(format, region);
} else {
format = S3_ACCESSPOINT_ENDPOINT_FORMAT;
return String.format(format, region);
}
}

/**
@@ -134,6 +176,14 @@

String resourceName = parsed.resource().resource();
return new ArnResource(resourceName, parsed.accountId().get(), parsed.region().get(),
parsed.partition(), arn);
parsed.partition(), arn, parsed.service());
}

private static Optional<String> getZoneIdFromResourceName(final String resourceName) {

Check failure on line 182 in hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java

ASF Cloudbees Jenkins ci-hadoop / Apache Yetus

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ArnResource.java#L182

blanks: end of line
return Optional.ofNullable(resourceName)
.map(name -> {
Matcher matcher = S3_EXPRESS_RESOURCE_FORMAT_REGEX.matcher(name);
return matcher.matches() ? matcher.group("zoneId") : null;
});
}
}
Original file line number Diff line number Diff line change
@@ -598,7 +598,13 @@ public void initialize(URI name, Configuration originalConf)
if (!configuredArn.isEmpty()) {
accessPoint = ArnResource.accessPointFromArn(configuredArn);
LOG.info("Using AccessPoint ARN \"{}\" for bucket {}", configuredArn, bucket);
bucket = accessPoint.getFullArn();

// s3express does not support ARNs in requests, but instead takes in access point name as bucket paramater
if(accessPoint.getService().equals("s3express")) {
bucket = accessPoint.getName();
} else {
bucket = accessPoint.getFullArn();
}
} else if (conf.getBoolean(AWS_S3_ACCESSPOINT_REQUIRED, false)) {
LOG.warn("Access Point usage is required because \"{}\" is enabled," +
" but not configured for the bucket: {}", AWS_S3_ACCESSPOINT_REQUIRED, bucket);
Original file line number Diff line number Diff line change
@@ -61,6 +61,7 @@ public void parseAccessPointFromArn() throws IllegalArgumentException {
assertEquals(accessPoint, resource.getName(), "Access Point name does not match");
assertEquals(MOCK_ACCOUNT, resource.getOwnerAccountId(), "Account Id does not match");
assertEquals(region, resource.getRegion(), "Region does not match");
assertEquals("s3", resource.getService(), "Service does not match");
}
}

@@ -90,6 +91,27 @@ public void makeSureS3OutpostsEndpointHasTheCorrectFormat() {
.isEqualTo(expected);
}

@Test
public void makeSureS3ExpressEndpointHasTheCorrectFormat() {
ArnResource accessPoint = getArnResourceFrom("aws", "s3express", "us-west-2", MOCK_ACCOUNT,
"test--usw2-az1--xa-s3");
String expected = "s3express-usw2-az1.us-west-2.amazonaws.com";

assertThat(accessPoint.getEndpoint())
.describedAs("Endpoint has invalid format. Access Point requests will not work")
.isEqualTo(expected);
}

@Test
public void getEndpointFromInvalidS3ExpressAccessPointNameMustThrow() throws Exception {
ArnResource accessPoint = getArnResourceFrom("aws", "s3express", "us-west-2", MOCK_ACCOUNT,
"test");
describe("Using an invalid access point name format must throw when getting an endpoint.");

intercept(IllegalArgumentException.class, () ->
accessPoint.getEndpoint());
}

@Test
public void invalidARNsMustThrow() throws Exception {
describe("Using an invalid ARN format must throw when initializing an ArnResource.");