Skip to content

Commit b5d85a7

Browse files
committed
#20 - use fixityAlgorithm from Dataverse (5.14+)
1 parent b8bf8da commit b5d85a7

File tree

2 files changed

+79
-19
lines changed

2 files changed

+79
-19
lines changed

src/main/java/org/sead/uploader/dataverse/DVUploader.java

Lines changed: 72 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ public class DVUploader extends AbstractUploader {
9999
private static boolean directUpload = true;
100100
private static boolean trustCerts = false;
101101
private static boolean singleFile = false;
102+
103+
private String fixityAlgorithm = "MD5";
102104

103105
private int timeout = 1200;
104106
private int httpConcurrency = 4;
@@ -225,7 +227,54 @@ public boolean parseCustomArg(String arg) {
225227
}
226228
return false;
227229
}
230+
231+
232+
@Override
233+
public void processRequests() {
234+
httpclient = getSharedHttpClient();
235+
236+
try {
237+
// This api call will check for the fixityAlgorithm. Before v5.14, Dataverse servers should respond with a 404 and we'll use the default.
238+
// http://$SERVER/api/files/fixityAlgorithm
239+
240+
String serviceUrl = server + "/api/files/fixityAlgorithm";
241+
HttpGet httpget = new HttpGet(serviceUrl);
228242

243+
CloseableHttpResponse response = httpclient.execute(httpget, getLocalContext());
244+
try {
245+
switch (response.getStatusLine().getStatusCode()) {
246+
case 200:
247+
HttpEntity resEntity = response.getEntity();
248+
if (resEntity != null) {
249+
String res = EntityUtils.toString(resEntity);
250+
String alg = (new JSONObject(res)).getJSONObject("data").getString("message");
251+
try{
252+
MessageDigest.getInstance(alg);
253+
fixityAlgorithm = alg;
254+
println("Using FixityAlgorithm configured for this Dataverse: " + fixityAlgorithm);
255+
} catch (NoSuchAlgorithmException e) {
256+
println("Unknown FixityAlgorithm requested by this Dataverse: " + alg + ", using the default: " + fixityAlgorithm);
257+
}
258+
}
259+
break;
260+
case 404:
261+
println("FixityAlgorithm API call not available, using the default: " + fixityAlgorithm);
262+
break;
263+
default:
264+
// Report unexpected errors and assume dataset doesn't exist
265+
println("Error response when checking for fixityAlgorithm: "
266+
+ response.getStatusLine().getReasonPhrase());
267+
break;
268+
}
269+
} finally {
270+
response.close();
271+
}
272+
} catch (IOException e) {
273+
println("Error processing fixityAlgorithm API request: " + e.getMessage());
274+
}
275+
super.processRequests();
276+
}
277+
229278
private ZipFile zf = null;
230279

231280
@Override
@@ -951,7 +1000,7 @@ private String multipartDirectFileUpload(Resource file, String path, int retries
9511000

9521001
httpput.addHeader("x-amz-tagging", "dv-state=temp");
9531002
try {
954-
MessageDigest messageDigest = MessageDigest.getInstance("MD5");
1003+
MessageDigest messageDigest = MessageDigest.getInstance(fixityAlgorithm);
9551004

9561005
try (InputStream inStream = file.getInputStream(); DigestInputStream digestInputStream = new DigestInputStream(inStream, messageDigest)) {
9571006
// This is hte new form for requests - keeping the example but won't update until we can change all
@@ -978,7 +1027,10 @@ private String multipartDirectFileUpload(Resource file, String path, int retries
9781027
jsonData.put("storageIdentifier", storageIdentifier);
9791028
jsonData.put("fileName", file.getName());
9801029
jsonData.put("mimeType", file.getMimeType());
981-
jsonData.put("md5Hash", localchecksum);
1030+
JSONObject inputChecksumObject = new JSONObject();
1031+
inputChecksumObject.put("@type", fixityAlgorithm);
1032+
inputChecksumObject.put("@value", localchecksum);
1033+
jsonData.put("checksum", inputChecksumObject);
9821034
jsonData.put("fileSize", file.length());
9831035
if (recurse) {
9841036
// Dataverse takes paths without an initial / and ending without a /
@@ -991,7 +1043,7 @@ private String multipartDirectFileUpload(Resource file, String path, int retries
9911043
}
9921044
}
9931045
file.setMetadata(jsonData);
994-
dataId = "md5:" + localchecksum;
1046+
dataId = fixityAlgorithm + ":" + localchecksum;
9951047
}
9961048
if (dataId != null) {
9971049
retries = 0;
@@ -1007,7 +1059,7 @@ private String multipartDirectFileUpload(Resource file, String path, int retries
10071059
}
10081060

10091061
} catch (NoSuchAlgorithmException nsae) {
1010-
println("MD5 algorithm not found: " + nsae.getMessage());
1062+
println("Fixity algorithm not found: " + nsae.getMessage());
10111063
}
10121064
} else {
10131065

@@ -1027,11 +1079,11 @@ private String multipartDirectFileUpload(Resource file, String path, int retries
10271079
HttpPartUploadJob.setHttpClientContext(getLocalContext());
10281080
HttpPartUploadJob.setPartSize(maxPartSize);
10291081

1030-
//Create a map to store the eTags from the parts and the md5 calculated for the whole file
1082+
//Create a map to store the eTags from the parts and the fixityAlg calculated for the whole file
10311083
Map<String, String> mpUploadInfoMap = new HashMap<String, String>(uploadUrls.length() + 1);
1032-
//Setup a job to calculate the md5 hash of the file
1084+
//Setup a job to calculate the fixityAlg hash of the file
10331085
//Probably helpful to have it run in parallel, but it could be a pre or post step as well. If the network is fast relative to disk, we may want the executor to use one extra thread for this
1034-
MD5Job mjob = new MD5Job(file, mpUploadInfoMap);
1086+
DigestJob mjob = new DigestJob(file, mpUploadInfoMap, fixityAlgorithm);
10351087
executor.execute(mjob);
10361088

10371089
//Now set up upload jobs for each part
@@ -1075,15 +1127,15 @@ private String multipartDirectFileUpload(Resource file, String path, int retries
10751127
break;
10761128
}
10771129
}
1078-
//Technically, the uploads to S3 could succeed and only the md5 fails, but in this case we still want to abort the MP Upload, not complete it.
1079-
if (!mpUploadInfoMap.containsKey("md5")) {
1130+
//Technically, the uploads to S3 could succeed and only the fixityAlg fails, but in this case we still want to abort the MP Upload, not complete it.
1131+
if (!mpUploadInfoMap.containsKey(fixityAlgorithm)) {
10801132
fileUploadComplete = false;
10811133
}
10821134
if (fileUploadComplete) {
10831135
println("Part uploads Completed for " + storageIdentifier);
10841136
HttpPut completeUpload = new HttpPut(server + completeUrl + "&key=" + apiKey);
10851137
JSONObject eTags = new JSONObject();
1086-
((Set<String>) mpUploadInfoMap.keySet()).stream().filter(partNo -> (!partNo.equals("md5"))).forEachOrdered(partNo -> {
1138+
((Set<String>) mpUploadInfoMap.keySet()).stream().filter(partNo -> (!partNo.equals(fixityAlgorithm))).forEachOrdered(partNo -> {
10871139
eTags.put(partNo, mpUploadInfoMap.get(partNo));
10881140
});
10891141
StringEntity body = new StringEntity(eTags.toString());
@@ -1097,13 +1149,16 @@ private String multipartDirectFileUpload(Resource file, String path, int retries
10971149
if (status == 200) {
10981150
println("Successful upload of " + file.getAbsolutePath());
10991151
if (singleFile) {
1100-
dataId = registerFileWithDataverse(file, path, storageIdentifier, mpUploadInfoMap.get("md5"), retries);
1152+
dataId = registerFileWithDataverse(file, path, storageIdentifier, mpUploadInfoMap.get(fixityAlgorithm), retries);
11011153
} else {
11021154
JSONObject jsonData = new JSONObject();
11031155
jsonData.put("storageIdentifier", storageIdentifier);
11041156
jsonData.put("fileName", file.getName());
11051157
jsonData.put("mimeType", file.getMimeType());
1106-
jsonData.put("md5Hash", mpUploadInfoMap.get("md5"));
1158+
JSONObject inputChecksumObject = new JSONObject();
1159+
inputChecksumObject.put("@type", fixityAlgorithm);
1160+
inputChecksumObject.put("@value", mpUploadInfoMap.get(fixityAlgorithm));
1161+
jsonData.put("checksum", inputChecksumObject);
11071162
jsonData.put("fileSize", file.length());
11081163
if (recurse) {
11091164
// Dataverse takes paths without an initial / and ending without a /
@@ -1116,7 +1171,7 @@ private String multipartDirectFileUpload(Resource file, String path, int retries
11161171
}
11171172
}
11181173
file.setMetadata(jsonData);
1119-
dataId = "md5:" + mpUploadInfoMap.get("md5");
1174+
dataId = fixityAlgorithm + ":" + mpUploadInfoMap.get(fixityAlgorithm);
11201175
}
11211176
} else {
11221177
println("Partial upload of " + file.getAbsolutePath() + ", complete upload failed with status: " + status);
@@ -1171,7 +1226,10 @@ private String registerFileWithDataverse(Resource file, String path, String stor
11711226
jsonData.put("storageIdentifier", storageIdentifier);
11721227
jsonData.put("fileName", file.getName());
11731228
jsonData.put("mimeType", file.getMimeType());
1174-
jsonData.put("md5Hash", checksum);
1229+
JSONObject inputChecksumObject = new JSONObject();
1230+
inputChecksumObject.put("@type", fixityAlgorithm);
1231+
inputChecksumObject.put("@value", checksum);
1232+
jsonData.put("checksum", inputChecksumObject);
11751233
jsonData.put("fileSize", file.length());
11761234
if (recurse) {
11771235
// Dataverse takes paths without an initial / and ending without a /

src/main/java/org/sead/uploader/dataverse/MD5Job.java renamed to src/main/java/org/sead/uploader/dataverse/DigestJob.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,16 @@
1919
*
2020
* @author Jim
2121
*/
22-
public class MD5Job implements Runnable {
22+
public class DigestJob implements Runnable {
2323

2424
Resource file;
2525
Map infoMap;
26+
final String alg;
2627

27-
public MD5Job(Resource file, Map infoMap) throws IllegalStateException {
28+
public DigestJob(Resource file, Map infoMap, String alg) throws IllegalStateException {
2829
this.file = file;
2930
this.infoMap = infoMap;
31+
this.alg = alg;
3032
}
3133

3234
/*
@@ -37,21 +39,21 @@ public MD5Job(Resource file, Map infoMap) throws IllegalStateException {
3739
@Override
3840
public void run() {
3941
try {
40-
MessageDigest messageDigest = MessageDigest.getInstance("MD5");
42+
MessageDigest messageDigest = MessageDigest.getInstance(alg);
4143

4244
try (InputStream inStream = file.getInputStream(); DigestInputStream digestInputStream = new DigestInputStream(inStream, messageDigest)) {
4345
byte[] bytes;
4446
bytes = new byte[64*1024];
4547
while(digestInputStream.read(bytes) >= 0) {
4648
}
4749
String checksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest());
48-
infoMap.put("md5", checksum);
50+
infoMap.put(alg, checksum);
4951
} catch (IOException e) {
5052
e.printStackTrace(System.out);
5153
println("Error calculating digest for: " + file.getAbsolutePath() + " : " + e.getMessage());
5254
}
5355
} catch (NoSuchAlgorithmException nsae) {
54-
println("MD5 algorithm not found: " + nsae.getMessage());
56+
println("Fixity algorithm not found: " + nsae.getMessage());
5557
}
5658
}
5759
}

0 commit comments

Comments
 (0)