Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make SimpleDateFormat usages thread-local to fix date parsing corruption #228

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
_Generated automatically by maven_

## Next Release
* *2020-08-27 14:37:38* [#228](https://github.com/rcongiu/Hive-JSON-Serde/pull/228) Make usages of SimpleDateFormat thread-local and therefore thread safe _[@pettyjamesm](https://github.com/pettjamesm)_
* *2017-10-06 10:04:22* Add configuration for explicit null value in the serialized output JSON _(lyair1)_

## 1.3.8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@
import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDateObjectInspector;

import java.sql.Date;
import java.text.SimpleDateFormat;

/**
* Created by rcongiu on 11/12/15.
*/
public class JavaStringDateObjectInspector extends AbstractPrimitiveJavaObjectInspector
implements SettableDateObjectInspector {

private static SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
private static final ThreadLocalSimpleDateFormat sdf = new ThreadLocalSimpleDateFormat("yyyy-MM-dd");

public JavaStringDateObjectInspector() {
super(TypeEntryShim.dateType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@
package org.openx.data.jsonserde.objectinspector.primitive;

import java.sql.Timestamp;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.TimeZone;
import java.util.regex.Pattern;
Expand All @@ -25,16 +22,9 @@ private ParsePrimitiveUtils() {
}

// timestamps are expected to be in UTC
public final static DateFormat UTC_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
public final static DateFormat OFFSET_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssX");
public final static DateFormat NON_UTC_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
static DateFormat[] dateFormats = { UTC_FORMAT, OFFSET_FORMAT,NON_UTC_FORMAT};
static {
TimeZone tz = TimeZone.getTimeZone("UTC");
for( DateFormat df : dateFormats) {
df.setTimeZone(tz);
}
}
public final static ThreadLocalSimpleDateFormat UTC_FORMAT = new ThreadLocalSimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", TimeZone.getTimeZone("UTC"));
public final static ThreadLocalSimpleDateFormat OFFSET_FORMAT = new ThreadLocalSimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssX", TimeZone.getTimeZone("UTC"));
public final static ThreadLocalSimpleDateFormat NON_UTC_FORMAT = new ThreadLocalSimpleDateFormat("yyyy-MM-dd HH:mm:ss", TimeZone.getTimeZone("UTC"));

static Pattern hasTZOffset = Pattern.compile(".+(\\+|-)\\d{2}:?\\d{2}$");

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package org.openx.data.jsonserde.objectinspector.primitive;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.TimeZone;

/**
* Wraps a {@link SimpleDateFormat} instance with a {@link ThreadLocal} since the former is not thread safe and
* mutates more than a few underlying fields directly during parsing. When used from multiple fields, this can
* corrupt data in a way that may or may not cause exceptions (ie: can cause silent corruption).
*/
final class ThreadLocalSimpleDateFormat {
private final ThreadLocal<SimpleDateFormat> threadLocal;

public ThreadLocalSimpleDateFormat(final String pattern) {
this(pattern, null);
}

public ThreadLocalSimpleDateFormat(final String pattern, final TimeZone timeZone) {
try {
SimpleDateFormat format = new SimpleDateFormat(pattern);
if (timeZone != null) {
format.setTimeZone(timeZone);
}
} catch (Exception e) {
throw new IllegalArgumentException("Failed to create ThreadLocalSimpleDateFormat with pattern: " + pattern, e);
}
this.threadLocal = new ThreadLocal<SimpleDateFormat>() {
@Override
public SimpleDateFormat initialValue() {
SimpleDateFormat format = new SimpleDateFormat(pattern);
if (timeZone != null) {
format.setTimeZone(timeZone);
}
return format;
}
};
}

public Date parse(String source) throws ParseException {
return threadLocal.get().parse(source);
}

public String format(Date date) {
return threadLocal.get().format(date);
}

public String format(long value) {
return threadLocal.get().format(value);
}
}