Skip to content

Commit c9ba0c7

Browse files
committed
fix: allow Unicode object names
This allows all Unicode characters that are also valid in XML 1.0 documents. See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html See: https://www.w3.org/TR/REC-xml/#charsets
1 parent 6f58fe2 commit c9ba0c7

File tree

12 files changed

+481
-9
lines changed

12 files changed

+481
-9
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
ALTER TABLE "storage"."objects"
2+
ADD CONSTRAINT objects_name_check
3+
CHECK (name SIMILAR TO '[\x09\x0A\x0D\x20-\xD7FF\xE000-\xFFFD\x00010000-\x0010ffff]+');
4+

src/http/plugins/xml.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ export const xmlParser = fastifyPlugin(
2121
isArray: (_: string, jpath: string) => {
2222
return opts.parseAsArray?.includes(jpath)
2323
},
24+
tagValueProcessor: (name: string, value: string) =>
25+
value.replace(/&#x([0-9a-fA-F]{1,6});/g, (_, str: string) =>
26+
String.fromCharCode(Number.parseInt(str, 16))
27+
),
2428
})
2529
}
2630

src/internal/database/migrations/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,5 @@ export const DBMigration = {
2424
'optimize-search-function': 22,
2525
'operation-function': 23,
2626
'custom-metadata': 24,
27+
'unicode-object-names': 25,
2728
} as const

src/internal/errors/codes.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ export const ERRORS = {
265265
code: ErrorCode.InvalidKey,
266266
resource: key,
267267
httpStatusCode: 400,
268-
message: `Invalid key: ${key}`,
268+
message: `Invalid key: ${encodeURIComponent(key)}`,
269269
originalError: e,
270270
}),
271271

src/storage/database/knex.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -877,7 +877,11 @@ export class DBError extends StorageBackendError implements RenderableError {
877877
code: pgError.code,
878878
})
879879
default:
880-
return ERRORS.DatabaseError(pgError.message, pgError).withMetadata({
880+
const errorMessage =
881+
pgError.code === '23514' && pgError.constraint === 'objects_name_check'
882+
? 'Invalid object name'
883+
: pgError.message
884+
return ERRORS.DatabaseError(errorMessage, pgError).withMetadata({
881885
query,
882886
code: pgError.code,
883887
})

src/storage/limits.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,15 @@ export async function isImageTransformationEnabled(tenantId: string) {
4747
* @param key
4848
*/
4949
export function isValidKey(key: string): boolean {
50-
// only allow s3 safe characters and characters which require special handling for now
51-
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
52-
return key.length > 0 && /^(\w|\/|!|-|\.|\*|'|\(|\)| |&|\$|@|=|;|:|\+|,|\?)*$/.test(key)
50+
// Allow any sequence of Unicode characters with UTF-8 encoding,
51+
// except characters not allowed in XML 1.0.
52+
// See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
53+
// See: https://www.w3.org/TR/REC-xml/#charsets
54+
//
55+
const regex =
56+
/[\0-\x08\x0B\f\x0E-\x1F\uFFFE\uFFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]/
57+
58+
return key.length > 0 && !regex.test(key)
5359
}
5460

5561
/**

src/storage/protocols/s3/s3-handler.ts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,9 @@ export class S3ProtocolHandler {
506506
throw ERRORS.InvalidUploadId()
507507
}
508508

509+
mustBeValidBucketName(Bucket)
510+
mustBeValidKey(Key)
511+
509512
await uploader.canUpload({
510513
bucketId: Bucket as string,
511514
objectName: Key as string,
@@ -601,6 +604,9 @@ export class S3ProtocolHandler {
601604
throw ERRORS.MissingContentLength()
602605
}
603606

607+
mustBeValidBucketName(Bucket)
608+
mustBeValidKey(Key)
609+
604610
const bucket = await this.storage.asSuperUser().findBucket(Bucket, 'file_size_limit')
605611
const maxFileSize = await getFileSizeLimit(this.storage.db.tenantId, bucket?.file_size_limit)
606612

@@ -755,6 +761,9 @@ export class S3ProtocolHandler {
755761
throw ERRORS.MissingParameter('Key')
756762
}
757763

764+
mustBeValidBucketName(Bucket)
765+
mustBeValidKey(Key)
766+
758767
const multipart = await this.storage.db
759768
.asSuperUser()
760769
.findMultipartUpload(UploadId, 'id,version')
@@ -797,6 +806,9 @@ export class S3ProtocolHandler {
797806
throw ERRORS.MissingParameter('Bucket')
798807
}
799808

809+
mustBeValidBucketName(Bucket)
810+
mustBeValidKey(Key)
811+
800812
const object = await this.storage
801813
.from(Bucket)
802814
.findObject(Key, 'metadata,user_metadata,created_at,updated_at')
@@ -836,6 +848,9 @@ export class S3ProtocolHandler {
836848
throw ERRORS.MissingParameter('Key')
837849
}
838850

851+
mustBeValidBucketName(Bucket)
852+
mustBeValidKey(Key)
853+
839854
const object = await this.storage.from(Bucket).findObject(Key, 'id')
840855

841856
if (!object) {
@@ -864,6 +879,9 @@ export class S3ProtocolHandler {
864879
const bucket = command.Bucket as string
865880
const key = command.Key as string
866881

882+
mustBeValidBucketName(bucket)
883+
mustBeValidKey(key)
884+
867885
const object = await this.storage.from(bucket).findObject(key, 'version,user_metadata')
868886
const response = await this.storage.backend.getObject(
869887
storageS3Bucket,
@@ -916,6 +934,9 @@ export class S3ProtocolHandler {
916934
throw ERRORS.MissingParameter('Key')
917935
}
918936

937+
mustBeValidBucketName(Bucket)
938+
mustBeValidKey(Key)
939+
919940
await this.storage.from(Bucket).deleteObject(Key)
920941

921942
return {}
@@ -947,6 +968,9 @@ export class S3ProtocolHandler {
947968
return {}
948969
}
949970

971+
mustBeValidBucketName(Bucket)
972+
Delete.Objects.forEach((o) => mustBeValidKey(o.Key))
973+
950974
const deletedResult = await this.storage
951975
.from(Bucket)
952976
.deleteObjects(Delete.Objects.map((o) => o.Key || ''))
@@ -1017,6 +1041,11 @@ export class S3ProtocolHandler {
10171041
command.MetadataDirective = 'COPY'
10181042
}
10191043

1044+
mustBeValidBucketName(Bucket)
1045+
mustBeValidKey(Key)
1046+
mustBeValidBucketName(sourceBucket)
1047+
mustBeValidKey(sourceKey)
1048+
10201049
const copyResult = await this.storage.from(sourceBucket).copyObject({
10211050
sourceKey,
10221051
destinationBucket: Bucket,
@@ -1147,6 +1176,11 @@ export class S3ProtocolHandler {
11471176
throw ERRORS.NoSuchKey('')
11481177
}
11491178

1179+
mustBeValidBucketName(Bucket)
1180+
mustBeValidKey(Key)
1181+
mustBeValidBucketName(sourceBucketName)
1182+
mustBeValidKey(sourceKey)
1183+
11501184
// Check if copy source exists
11511185
const copySource = await this.storage.db.findObject(
11521186
sourceBucketName,

src/test/common.ts

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,42 @@ export const adminApp = app({})
88

99
const ENV = process.env
1010

11+
/**
12+
* Should support all Unicode characters with UTF-8 encoding according to AWS S3 object naming guide, including:
13+
* - Safe characters: 0-9 a-z A-Z !-_.*'()
14+
* - Characters that might require special handling: &$@=;/:+,? and Space and ASCII characters \t, \n, and \r.
15+
* - Characters: \{}^%`[]"<>~#| and non-printable ASCII characters (128–255 decimal characters).
16+
*
17+
* The following characters are not allowed:
18+
* - ASCII characters 0x00–0x1F, except 0x09, 0x0A, and 0x0D.
19+
* - Unicode \u{FFFE} and \u{FFFF}.
20+
* - Lone surrogates characters.
21+
* See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
22+
* See: https://www.w3.org/TR/REC-xml/#charsets
23+
*/
24+
export function getUnicodeObjectName(): string {
25+
const objectName = 'test'
26+
.concat("!-_*.'()")
27+
// Characters that might require special handling
28+
.concat('&$@=;:+,? \x09\x0A\x0D')
29+
// Characters to avoid
30+
.concat('\\{}^%`[]"<>~#|\xFF')
31+
// MinIO max. length for each '/' separated segment is 255
32+
.concat('/')
33+
.concat([...Array(127).keys()].map((i) => String.fromCodePoint(i + 128)).join(''))
34+
.concat('/')
35+
// Some special Unicode characters
36+
.concat('\u2028\u202F\u{0001FFFF}')
37+
// Some other Unicode characters
38+
.concat('일이삼\u{0001f642}')
39+
40+
return objectName
41+
}
42+
43+
export function getInvalidObjectName(): string {
44+
return 'test\x01\x02\x03.txt'
45+
}
46+
1147
export function useMockQueue() {
1248
const queueSpy: jest.SpyInstance | undefined = undefined
1349
beforeEach(() => {

src/test/object.test.ts

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@ import app from '../app'
66
import { getConfig, mergeConfig } from '../config'
77
import { signJWT } from '@internal/auth'
88
import { Obj, backends } from '../storage'
9-
import { useMockObject, useMockQueue } from './common'
9+
import { getInvalidObjectName, getUnicodeObjectName, useMockObject, useMockQueue } from './common'
1010
import { getServiceKeyUser, getPostgresConnection } from '@internal/database'
1111
import { Knex } from 'knex'
1212
import { ErrorCode, StorageBackendError } from '@internal/errors'
13+
import { randomUUID } from 'crypto'
1314

1415
const { jwtSecret, serviceKey, tenantId } = getConfig()
1516
const anonKey = process.env.ANON_KEY || ''
@@ -2456,3 +2457,90 @@ describe('testing list objects', () => {
24562457
expect(responseJSON[1].name).toBe('sadcat-upload23.png')
24572458
})
24582459
})
2460+
2461+
describe('Object key with Unicode characters', () => {
2462+
test('can upload, get and list', async () => {
2463+
const prefix = `test-utf8-${randomUUID()}`
2464+
const objectName = getUnicodeObjectName()
2465+
2466+
const form = new FormData()
2467+
form.append('file', fs.createReadStream(`./src/test/assets/sadcat.jpg`))
2468+
const headers = Object.assign({}, form.getHeaders(), {
2469+
authorization: `Bearer ${serviceKey}`,
2470+
'x-upsert': 'true',
2471+
})
2472+
2473+
const uploadResponse = await app().inject({
2474+
method: 'POST',
2475+
url: `/object/bucket2/${prefix}/${encodeURIComponent(objectName)}`,
2476+
headers: {
2477+
...headers,
2478+
...form.getHeaders(),
2479+
},
2480+
payload: form,
2481+
})
2482+
expect(uploadResponse.statusCode).toBe(200)
2483+
2484+
const getResponse = await app().inject({
2485+
method: 'GET',
2486+
url: `/object/bucket2/${prefix}/${encodeURIComponent(objectName)}`,
2487+
headers: {
2488+
authorization: `Bearer ${serviceKey}`,
2489+
},
2490+
})
2491+
expect(getResponse.statusCode).toBe(200)
2492+
expect(getResponse.headers['etag']).toBe('abc')
2493+
expect(getResponse.headers['last-modified']).toBe('Thu, 12 Aug 2021 16:00:00 GMT')
2494+
expect(getResponse.headers['cache-control']).toBe('no-cache')
2495+
2496+
const listResponse = await app().inject({
2497+
method: 'POST',
2498+
url: `/object/list/bucket2`,
2499+
headers: {
2500+
authorization: `Bearer ${serviceKey}`,
2501+
},
2502+
payload: { prefix },
2503+
})
2504+
expect(listResponse.statusCode).toBe(200)
2505+
const listResponseJSON = JSON.parse(listResponse.body)
2506+
expect(listResponseJSON).toHaveLength(1)
2507+
expect(listResponseJSON[0].name).toBe(objectName.split('/')[0])
2508+
2509+
const deleteResponse = await app().inject({
2510+
method: 'DELETE',
2511+
url: `/object/bucket2/${prefix}/${encodeURIComponent(objectName)}`,
2512+
headers: {
2513+
authorization: `Bearer ${serviceKey}`,
2514+
},
2515+
})
2516+
expect(deleteResponse.statusCode).toBe(200)
2517+
})
2518+
2519+
test('should not be allowed to upload objects with invalid names', async () => {
2520+
const invalidObjectName = getInvalidObjectName()
2521+
const form = new FormData()
2522+
form.append('file', fs.createReadStream(`./src/test/assets/sadcat.jpg`))
2523+
const headers = Object.assign({}, form.getHeaders(), {
2524+
authorization: `Bearer ${serviceKey}`,
2525+
'x-upsert': 'true',
2526+
})
2527+
const uploadResponse = await app().inject({
2528+
method: 'POST',
2529+
url: `/object/bucket2/${encodeURIComponent(invalidObjectName)}`,
2530+
headers: {
2531+
...headers,
2532+
...form.getHeaders(),
2533+
},
2534+
payload: form,
2535+
})
2536+
expect(uploadResponse.statusCode).toBe(400)
2537+
expect(S3Backend.prototype.uploadObject).not.toHaveBeenCalled()
2538+
expect(uploadResponse.body).toBe(
2539+
JSON.stringify({
2540+
statusCode: '400',
2541+
error: 'InvalidKey',
2542+
message: `Invalid key: ${encodeURIComponent(invalidObjectName)}`,
2543+
})
2544+
)
2545+
})
2546+
})

0 commit comments

Comments
 (0)