Skip to content

Commit

Permalink
Make SimpleDateFormat usages thread-local to fix date parsing corruption
Browse files Browse the repository at this point in the history
SimpleDateFormat is not threadsafe since it mutates fields internally
during calls to parse and format. This changes previously static instances
of SimpleDateFormat into static thread-local instances to avoid the the
possibility of data becoming corrupted as a result of concurrent accesses
to these instances from multiple threads
  • Loading branch information
pettyjamesm committed Aug 27, 2020
1 parent 8d5c1bf commit cecf824
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
public class JavaStringDateObjectInspector extends AbstractPrimitiveJavaObjectInspector
implements SettableDateObjectInspector {

private static SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
private static final ThreadLocalSimpleDateFormat sdf = new ThreadLocalSimpleDateFormat("yyyy-MM-dd");

public JavaStringDateObjectInspector() {
super(TypeEntryShim.dateType);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package org.openx.data.jsonserde.objectinspector.primitive;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.TimeZone;

/**
* Wraps a {@link SimpleDateFormat} instance with a {@link ThreadLocal} since the former is not thread safe and
* mutates more than a few underlying fields directly during parsing. When used from multiple fields, this can
* corrupt data in a way that may or may not cause exceptions (ie: can cause silent corruption).
*/
final class ThreadLocalSimpleDateFormat {
private final ThreadLocal<SimpleDateFormat> threadLocal;

ThreadLocalSimpleDateFormat(final String pattern) {
this(pattern, null);
}

ThreadLocalSimpleDateFormat(final String pattern, final TimeZone timeZone) {
try {
SimpleDateFormat format = new SimpleDateFormat(pattern);
if (timeZone != null) {
format.setTimeZone(timeZone);
}
} catch (Exception e) {
throw new IllegalArgumentException("Failed to create ThreadLocalSimpleDateFormat with pattern: " + pattern, e);
}
this.threadLocal = new ThreadLocal<SimpleDateFormat>() {
@Override
public SimpleDateFormat initialValue() {
SimpleDateFormat format = new SimpleDateFormat(pattern);
if (timeZone != null) {
format.setTimeZone(timeZone);
}
return format;
}
};
}

public Date parse(String source) throws ParseException {
return threadLocal.get().parse(source);
}

public String format(Date date) {
return threadLocal.get().format(date);
}

public String format(long value) {
return threadLocal.get().format(value);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@
package org.openx.data.jsonserde.objectinspector.primitive;

import java.sql.Timestamp;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.TimeZone;
import java.util.regex.Pattern;
Expand All @@ -25,16 +22,9 @@ private ParsePrimitiveUtils() {
}

// timestamps are expected to be in UTC
public final static DateFormat UTC_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
public final static DateFormat OFFSET_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssX");
public final static DateFormat NON_UTC_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
static DateFormat[] dateFormats = { UTC_FORMAT, OFFSET_FORMAT,NON_UTC_FORMAT};
static {
TimeZone tz = TimeZone.getTimeZone("UTC");
for( DateFormat df : dateFormats) {
df.setTimeZone(tz);
}
}
public final static ThreadLocalSimpleDateFormat UTC_FORMAT = new ThreadLocalSimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", TimeZone.getTimeZone("UTC"));
public final static ThreadLocalSimpleDateFormat OFFSET_FORMAT = new ThreadLocalSimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssX", TimeZone.getTimeZone("UTC"));
public final static ThreadLocalSimpleDateFormat NON_UTC_FORMAT = new ThreadLocalSimpleDateFormat("yyyy-MM-dd HH:mm:ss", TimeZone.getTimeZone("UTC"));

static Pattern hasTZOffset = Pattern.compile(".+(\\+|-)\\d{2}:?\\d{2}$");

Expand Down

0 comments on commit cecf824

Please sign in to comment.