Skip to content

Commit c089d92

Browse files
author
wuliang
committed
fix: task ID of image blobs
1 parent 118416e commit c089d92

File tree

2 files changed

+100
-1
lines changed

2 files changed

+100
-1
lines changed

dragonfly-client-config/src/dfdaemon.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,6 +1105,12 @@ impl Default for Rule {
11051105
}
11061106
}
11071107

1108+
/// default_proxy_registry_mirror_enable_blob_based_task_id is the default value for enable_blob_based_task_id.
1109+
#[inline]
1110+
fn default_proxy_registry_mirror_enable_blob_based_task_id() -> bool {
1111+
true
1112+
}
1113+
11081114
/// RegistryMirror is the registry mirror configuration.
11091115
#[derive(Debug, Clone, Validate, Deserialize)]
11101116
#[serde(default, rename_all = "camelCase")]
@@ -1120,6 +1126,14 @@ pub struct RegistryMirror {
11201126
/// If registry use self-signed cert, the client should set the
11211127
/// cert for the registry mirror.
11221128
pub cert: Option<PathBuf>,
1129+
1130+
/// enable_blob_based_task_id indicates whether to calculate the task ID based on the blob's SHA256 digest
1131+
/// for OCI registry blob download URLs. When enabled, if the download URL is for an image blob
1132+
/// (e.g., /v2/<name>/blobs/sha256:<digest>), the task ID will be calculated based on the blob's digest
1133+
/// instead of the full URL. This allows the same blob from different registries to share the same task ID,
1134+
/// avoiding redundant downloads and storage.
1135+
#[serde(default = "default_proxy_registry_mirror_enable_blob_based_task_id")]
1136+
pub enable_blob_based_task_id: bool,
11231137
}
11241138

11251139
/// RegistryMirror implements Default.
@@ -1128,6 +1142,7 @@ impl Default for RegistryMirror {
11281142
Self {
11291143
addr: default_proxy_registry_mirror_addr(),
11301144
cert: None,
1145+
enable_blob_based_task_id: default_proxy_registry_mirror_enable_blob_based_task_id(),
11311146
}
11321147
}
11331148
}

dragonfly-client/src/proxy/mod.rs

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1097,6 +1097,34 @@ fn make_registry_mirror_request(
10971097
Ok(request)
10981098
}
10991099

1100+
/// extract_blob_digest_from_url extracts the blob digest from a registry blob URL.
1101+
/// Returns the digest if the URL is a blob URL, otherwise returns None.
1102+
///
1103+
/// Example blob URLs:
1104+
/// - /v2/<name>/blobs/sha256:<digest>
1105+
/// - /v2/<namespace>/<repo>/blobs/sha256:<digest>
1106+
fn extract_blob_digest_from_url(path: &str) -> Option<String> {
1107+
// Check if the path contains /blobs/sha256:
1108+
if let Some(blobs_idx) = path.find("/blobs/sha256:") {
1109+
// Extract everything after "/blobs/sha256:"
1110+
let after_blobs = &path[blobs_idx + "/blobs/sha256:".len()..];
1111+
1112+
// The digest should be 64 hex characters (SHA256)
1113+
// It might be followed by query parameters or nothing
1114+
let digest = after_blobs
1115+
.split(&['?', '#'][..])
1116+
.next()
1117+
.unwrap_or("");
1118+
1119+
// Validate that it looks like a SHA256 hash (64 hex characters)
1120+
if digest.len() == 64 && digest.chars().all(|c| c.is_ascii_hexdigit()) {
1121+
return Some(format!("sha256:{}", digest));
1122+
}
1123+
}
1124+
1125+
None
1126+
}
1127+
11001128
/// make_download_task_request makes a download task request by the request.
11011129
fn make_download_task_request(
11021130
config: Arc<Config>,
@@ -1121,6 +1149,21 @@ fn make_download_task_request(
11211149
}
11221150
}
11231151

1152+
// Determine the content for calculating task ID.
1153+
// Priority:
1154+
// 1. Explicit header value (X-Dragonfly-Content-For-Calculating-Task-ID)
1155+
// 2. Blob digest from URL (if enable_blob_based_task_id is true)
1156+
// 3. None (will use URL-based calculation)
1157+
let content_for_calculating_task_id = header::get_content_for_calculating_task_id(&header)
1158+
.or_else(|| {
1159+
if config.proxy.registry_mirror.enable_blob_based_task_id {
1160+
let path = request.uri().path();
1161+
extract_blob_digest_from_url(path)
1162+
} else {
1163+
None
1164+
}
1165+
});
1166+
11241167
Ok(DownloadTaskRequest {
11251168
download: Some(Download {
11261169
url: make_download_url(request.uri(), rule.use_tls, rule.redirect.clone())?,
@@ -1149,7 +1192,7 @@ fn make_download_task_request(
11491192
is_prefetch: false,
11501193
need_piece_content: false,
11511194
force_hard_link: header::get_force_hard_link(&header),
1152-
content_for_calculating_task_id: header::get_content_for_calculating_task_id(&header),
1195+
content_for_calculating_task_id,
11531196
remote_ip: Some(remote_ip.to_string()),
11541197
concurrent_piece_count: Some(config.download.concurrent_piece_count),
11551198
overwrite: false,
@@ -1281,3 +1324,44 @@ fn empty() -> BoxBody<Bytes, ClientError> {
12811324
.map_err(|never| match never {})
12821325
.boxed()
12831326
}
1327+
1328+
#[cfg(test)]
1329+
mod tests {
1330+
use super::*;
1331+
1332+
#[test]
1333+
fn test_extract_blob_digest_from_url() {
1334+
// Test cases for valid blob URLs
1335+
let test_cases = vec![
1336+
(
1337+
"/v2/library/nginx/blobs/sha256:1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef",
1338+
Some("sha256:1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef".to_string()),
1339+
),
1340+
(
1341+
"/v2/myorg/myrepo/blobs/sha256:abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890",
1342+
Some("sha256:abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890".to_string()),
1343+
),
1344+
(
1345+
"/v2/namespace/repo/blobs/sha256:0000000000000000000000000000000000000000000000000000000000000000?query=param",
1346+
Some("sha256:0000000000000000000000000000000000000000000000000000000000000000".to_string()),
1347+
),
1348+
];
1349+
1350+
for (input, expected) in test_cases {
1351+
assert_eq!(extract_blob_digest_from_url(input), expected);
1352+
}
1353+
1354+
// Test cases for invalid URLs (should return None)
1355+
let invalid_cases = vec![
1356+
"/v2/library/nginx/manifests/latest",
1357+
"/v2/library/nginx/blobs/sha256:short",
1358+
"/v2/library/nginx/blobs/sha256:xyz", // Not hex
1359+
"/api/v1/some/other/path",
1360+
"",
1361+
];
1362+
1363+
for input in invalid_cases {
1364+
assert_eq!(extract_blob_digest_from_url(input), None);
1365+
}
1366+
}
1367+
}

0 commit comments

Comments
 (0)