Skip to content

Commit

Permalink
Make SimpleDateFormat usages thread-local to fix date parsing corruption
Browse files Browse the repository at this point in the history
SimpleDateFormat is not threadsafe since it mutates fields internally
during calls to parse and format. This changes previously static instances
of SimpleDateFormat into static thread-local instances to avoid the the
possibility of data becoming corrupted as a result of concurrent accesses
to these instances from multiple threads
  • Loading branch information
pettyjamesm committed Oct 22, 2020
1 parent 5c7ca56 commit a9e2493
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 15 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
_Generated automatically by maven_

## Next Release
* *2020-08-27 14:37:38* [#228](https://github.com/rcongiu/Hive-JSON-Serde/pull/228) Make usages of SimpleDateFormat thread-local and therefore thread safe _[@pettyjamesm](https://github.com/pettjamesm)_
* *2017-10-06 10:04:22* Add configuration for explicit null value in the serialized output JSON _(lyair1)_

## 1.3.8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDateObjectInspector;

import java.sql.Date;
import java.text.SimpleDateFormat;

/**
* Created by rcongiu on 11/12/15.
*/
public class JavaStringDateObjectInspector extends AbstractPrimitiveJavaObjectInspector
implements SettableDateObjectInspector {

private static SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
private static final ThreadLocalSimpleDateFormat sdf = new ThreadLocalSimpleDateFormat("yyyy-MM-dd");

public JavaStringDateObjectInspector() {
super(TypeEntryShim.dateType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@
package org.openx.data.jsonserde.objectinspector.primitive;

import java.sql.Timestamp;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.TimeZone;
import java.util.regex.Pattern;
Expand All @@ -25,16 +22,9 @@ private ParsePrimitiveUtils() {
}

// timestamps are expected to be in UTC
public final static DateFormat UTC_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
public final static DateFormat OFFSET_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssX");
public final static DateFormat NON_UTC_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
static DateFormat[] dateFormats = { UTC_FORMAT, OFFSET_FORMAT,NON_UTC_FORMAT};
static {
TimeZone tz = TimeZone.getTimeZone("UTC");
for( DateFormat df : dateFormats) {
df.setTimeZone(tz);
}
}
public final static ThreadLocalSimpleDateFormat UTC_FORMAT = new ThreadLocalSimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", TimeZone.getTimeZone("UTC"));
public final static ThreadLocalSimpleDateFormat OFFSET_FORMAT = new ThreadLocalSimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssX", TimeZone.getTimeZone("UTC"));
public final static ThreadLocalSimpleDateFormat NON_UTC_FORMAT = new ThreadLocalSimpleDateFormat("yyyy-MM-dd HH:mm:ss", TimeZone.getTimeZone("UTC"));

static Pattern hasTZOffset = Pattern.compile(".+(\\+|-)\\d{2}:?\\d{2}$");

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package org.openx.data.jsonserde.objectinspector.primitive;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.TimeZone;

/**
* Wraps a {@link SimpleDateFormat} instance with a {@link ThreadLocal} since the former is not thread safe and
* mutates more than a few underlying fields directly during parsing. When used from multiple fields, this can
* corrupt data in a way that may or may not cause exceptions (ie: can cause silent corruption).
*/
final class ThreadLocalSimpleDateFormat {
private final ThreadLocal<SimpleDateFormat> threadLocal;

public ThreadLocalSimpleDateFormat(final String pattern) {
this(pattern, null);
}

public ThreadLocalSimpleDateFormat(final String pattern, final TimeZone timeZone) {
try {
SimpleDateFormat format = new SimpleDateFormat(pattern);
if (timeZone != null) {
format.setTimeZone(timeZone);
}
} catch (Exception e) {
throw new IllegalArgumentException("Failed to create ThreadLocalSimpleDateFormat with pattern: " + pattern, e);
}
this.threadLocal = new ThreadLocal<SimpleDateFormat>() {
@Override
public SimpleDateFormat initialValue() {
SimpleDateFormat format = new SimpleDateFormat(pattern);
if (timeZone != null) {
format.setTimeZone(timeZone);
}
return format;
}
};
}

public Date parse(String source) throws ParseException {
return threadLocal.get().parse(source);
}

public String format(Date date) {
return threadLocal.get().format(date);
}

public String format(long value) {
return threadLocal.get().format(value);
}
}

0 comments on commit a9e2493

Please sign in to comment.