Skip to content

Commit

Permalink
fix: Special chars not handled correctly (#398)
Browse files Browse the repository at this point in the history
* fix: Special chars not handled correctly

Signed-off-by: Xuanwo <[email protected]>

* Fix test for azblob

Signed-off-by: Xuanwo <[email protected]>

* Add more test cases

Signed-off-by: Xuanwo <[email protected]>
  • Loading branch information
Xuanwo authored Jun 27, 2022
1 parent 98d19ce commit 4b9d0e6
Show file tree
Hide file tree
Showing 6 changed files with 213 additions and 12 deletions.
5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ authors = ["Databend Authors <[email protected]>"]
categories = ["filesystem"]
description = "Open Data Access Layer that connect the whole world together."
edition = "2021"
exclude = ["docs/"]
homepage = "https://opendal.databend.rs/"
keywords = ["storage", "data", "s3", "fs", "azblob"]
license = "Apache-2.0"
name = "opendal"
repository = "https://github.com/datafuselabs/opendal"
version = "0.9.0"
exclude = ["docs/"]

[package.metadata.docs.rs]
all-features = true
Expand Down Expand Up @@ -60,10 +60,11 @@ metrics = "0.19.0"
minitrace = "0.4.0"
once_cell = "1.10.0"
parking_lot = "0.12.0"
percent-encoding = "2.1.0"
pin-project = "1.0.10"
quick-xml = { version = "0.23.0", features = ["serialize"] }
radix_trie = { version = "0.2.1", optional = true }
reqsign = "0.1.0"
reqsign = "0.1.1"
serde = { version = "1.0.136", features = ["derive"] }
thiserror = "1.0.30"
time = "0.3.9"
Expand Down
56 changes: 56 additions & 0 deletions src/io_util/http_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use percent_encoding::{utf8_percent_encode, AsciiSet, NON_ALPHANUMERIC};
use std::io::ErrorKind;
use std::ops::Deref;

Expand Down Expand Up @@ -54,3 +55,58 @@ pub fn parse_error_kind(err: &hyper::Error) -> ErrorKind {
ErrorKind::Other
}
}

/// PATH_ENCODE_SET is the encode set for http url path.
///
/// This set follows [encodeURIComponent](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURIComponent) which will encode all non-ASCII characters except `A-Z a-z 0-9 - _ . ! ~ * ' ( )`
static PATH_ENCODE_SET: AsciiSet = NON_ALPHANUMERIC
.remove(b'-')
.remove(b'_')
.remove(b'.')
.remove(b'!')
.remove(b'~')
.remove(b'*')
.remove(b'\'')
.remove(b'(')
.remove(b')');

/// percent_encode_path will do percent encoding for http encode path.
///
/// Follows [encodeURIComponent](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURIComponent) which will encode all non-ASCII characters except `A-Z a-z 0-9 - _ . ! ~ * ' ( )`
pub fn percent_encode_path(path: &str) -> String {
utf8_percent_encode(path, &PATH_ENCODE_SET).to_string()
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_percent_encode_path() {
let cases = vec![
(
"Reserved Characters",
";,/?:@&=+$",
"%3B%2C%2F%3F%3A%40%26%3D%2B%24",
),
("Unescaped Characters", "-_.!~*'()", "-_.!~*'()"),
("Number Sign", "#", "%23"),
(
"Alphanumeric Characters + Space",
"ABC abc 123",
"ABC%20abc%20123",
),
(
"Unicode",
"你好,世界!❤",
"%E4%BD%A0%E5%A5%BD%EF%BC%8C%E4%B8%96%E7%95%8C%EF%BC%81%E2%9D%A4",
),
];

for (name, input, expected) in cases {
let actual = percent_encode_path(input);

assert_eq!(actual, expected, "{name}");
}
}
}
1 change: 1 addition & 0 deletions src/io_util/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ pub(crate) use http_body::HttpBodyWriter;

mod http_client;
pub(crate) use http_client::parse_error_kind;
pub(crate) use http_client::percent_encode_path;
pub(crate) use http_client::HttpClient;

mod http_header;
Expand Down
31 changes: 26 additions & 5 deletions src/services/azblob/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ use crate::io_util::parse_error_kind as parse_http_error_kind;
use crate::io_util::parse_error_response;
use crate::io_util::parse_etag;
use crate::io_util::parse_last_modified;
use crate::io_util::percent_encode_path;
use crate::io_util::HttpBodyWriter;
use crate::io_util::HttpClient;
use crate::object::ObjectMetadata;
Expand Down Expand Up @@ -464,7 +465,12 @@ impl Backend {
offset: Option<u64>,
size: Option<u64>,
) -> Result<hyper::Response<hyper::Body>> {
let url = format!("{}/{}/{}", self.endpoint, self.container, path);
let url = format!(
"{}/{}/{}",
self.endpoint,
self.container,
percent_encode_path(path)
);

let mut req = hyper::Request::get(&url);

Expand Down Expand Up @@ -509,7 +515,12 @@ impl Backend {
size: u64,
body: Body,
) -> Result<hyper::Request<hyper::Body>> {
let url = format!("{}/{}/{}", self.endpoint, self.container, path);
let url = format!(
"{}/{}/{}",
self.endpoint,
self.container,
percent_encode_path(path)
);

let mut req = hyper::Request::put(&url);

Expand Down Expand Up @@ -544,7 +555,12 @@ impl Backend {
&self,
path: &str,
) -> Result<hyper::Response<hyper::Body>> {
let url = format!("{}/{}/{}", self.endpoint, self.container, path);
let url = format!(
"{}/{}/{}",
self.endpoint,
self.container,
percent_encode_path(path)
);

let req = hyper::Request::head(&url);

Expand Down Expand Up @@ -577,7 +593,12 @@ impl Backend {

#[trace("delete_blob")]
pub(crate) async fn delete_blob(&self, path: &str) -> Result<hyper::Response<hyper::Body>> {
let url = format!("{}/{}/{}", self.endpoint, self.container, path);
let url = format!(
"{}/{}/{}",
self.endpoint,
self.container,
percent_encode_path(path)
);

let req = hyper::Request::delete(&url);

Expand Down Expand Up @@ -619,7 +640,7 @@ impl Backend {
self.endpoint, self.container
);
if !path.is_empty() {
url.push_str(&format!("&prefix={path}"))
url.push_str(&format!("&prefix={}", percent_encode_path(path)))
}
if !next_marker.is_empty() {
url.push_str(&format!("&marker={next_marker}"))
Expand Down
15 changes: 10 additions & 5 deletions src/services/s3/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ use crate::io_util::parse_error_kind as parse_http_error_kind;
use crate::io_util::parse_error_response;
use crate::io_util::parse_etag;
use crate::io_util::parse_last_modified;
use crate::io_util::percent_encode_path;
use crate::io_util::HttpBodyWriter;
use crate::io_util::HttpClient;
use crate::ops::BytesRange;
Expand Down Expand Up @@ -1026,7 +1027,7 @@ impl Backend {
offset: Option<u64>,
size: Option<u64>,
) -> Result<hyper::Response<hyper::Body>> {
let url = format!("{}/{}", self.endpoint, path);
let url = format!("{}/{}", self.endpoint, percent_encode_path(path));

let mut req = hyper::Request::get(&url);

Expand Down Expand Up @@ -1075,7 +1076,7 @@ impl Backend {
size: u64,
body: hyper::Body,
) -> Result<hyper::Request<hyper::Body>> {
let url = format!("{}/{}", self.endpoint, path);
let url = format!("{}/{}", self.endpoint, percent_encode_path(path));

let mut req = hyper::Request::put(&url);

Expand Down Expand Up @@ -1109,7 +1110,7 @@ impl Backend {

#[trace("head_object")]
pub(crate) async fn head_object(&self, path: &str) -> Result<hyper::Response<hyper::Body>> {
let url = format!("{}/{}", self.endpoint, path);
let url = format!("{}/{}", self.endpoint, percent_encode_path(path));

let mut req = hyper::Request::head(&url);

Expand Down Expand Up @@ -1146,7 +1147,7 @@ impl Backend {

#[trace("delete_object")]
pub(crate) async fn delete_object(&self, path: &str) -> Result<hyper::Response<hyper::Body>> {
let url = format!("{}/{}", self.endpoint, path);
let url = format!("{}/{}", self.endpoint, percent_encode_path(path));

let mut req = hyper::Request::delete(&url)
.body(hyper::Body::empty())
Expand Down Expand Up @@ -1184,7 +1185,11 @@ impl Backend {
path: &str,
continuation_token: &str,
) -> Result<hyper::Response<hyper::Body>> {
let mut url = format!("{}?list-type=2&delimiter=/&prefix={}", self.endpoint, path);
let mut url = format!(
"{}?list-type=2&delimiter=/&prefix={}",
self.endpoint,
percent_encode_path(path)
);
if !continuation_token.is_empty() {
url.push_str(&format!("&continuation-token={continuation_token}"))
}
Expand Down
Loading

1 comment on commit 4b9d0e6

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Deploy preview for opendal ready!

✅ Preview
https://opendal-eji0w1upm-databend.vercel.app

Built with commit 4b9d0e6.
This pull request is being automatically deployed with vercel-action

Please sign in to comment.