Skip to content

Commit

Permalink
HIVE-27775: DirectSQL and JDO results are different when fetching par…
Browse files Browse the repository at this point in the history
…titions by timestamp in DST shift
  • Loading branch information
dengzhhu653 committed Dec 19, 2023
1 parent 4a057a7 commit 0001638
Show file tree
Hide file tree
Showing 9 changed files with 228 additions and 136 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1705,10 +1705,9 @@ private String generateJDOFilter(org.apache.hadoop.hive.metastore.api.Table tabl
assert table != null;
ExpressionTree.FilterBuilder filterBuilder = new ExpressionTree.FilterBuilder(true);
Map<String, Object> params = new HashMap<>();
exprTree.generateJDOFilterFragment(conf, params, filterBuilder, table.getPartitionKeys());
exprTree.accept(new ExpressionTree.JDOFilterGenerator(conf,
table.getPartitionKeys(), filterBuilder, params));
StringBuilder stringBuilder = new StringBuilder(filterBuilder.getFilter());
// replace leading &&
stringBuilder.replace(0, 4, "");
params.entrySet().stream().forEach(e -> {
int index = stringBuilder.indexOf(e.getKey());
stringBuilder.replace(index, index + e.getKey().length(), "\"" + e.getValue().toString() + "\"");
Expand Down
6 changes: 6 additions & 0 deletions ql/src/test/queries/clientpositive/partition_timestamp3.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
--! qt:timezone:Europe/Paris
DROP TABLE IF EXISTS payments;
CREATE EXTERNAL TABLE payments (card string) PARTITIONED BY(txn_datetime TIMESTAMP) STORED AS ORC;
INSERT into payments VALUES('3333-4444-2222-9999', '2023-03-26 02:30:00'), ('3333-4444-2222-9999', '2023-03-26 03:30:00');
SELECT * FROM payments WHERE txn_datetime = '2023-03-26 02:30:00';
SELECT * FROM payments WHERE txn_datetime = '2023-03-26 03:30:00';
48 changes: 48 additions & 0 deletions ql/src/test/results/clientpositive/llap/partition_timestamp3.q.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
PREHOOK: query: DROP TABLE IF EXISTS payments
PREHOOK: type: DROPTABLE
PREHOOK: Output: database:default
POSTHOOK: query: DROP TABLE IF EXISTS payments
POSTHOOK: type: DROPTABLE
POSTHOOK: Output: database:default
PREHOOK: query: CREATE EXTERNAL TABLE payments (card string) PARTITIONED BY(txn_datetime TIMESTAMP) STORED AS ORC
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@payments
POSTHOOK: query: CREATE EXTERNAL TABLE payments (card string) PARTITIONED BY(txn_datetime TIMESTAMP) STORED AS ORC
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@payments
PREHOOK: query: INSERT into payments VALUES('3333-4444-2222-9999', '2023-03-26 02:30:00'), ('3333-4444-2222-9999', '2023-03-26 03:30:00')
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@payments
POSTHOOK: query: INSERT into payments VALUES('3333-4444-2222-9999', '2023-03-26 02:30:00'), ('3333-4444-2222-9999', '2023-03-26 03:30:00')
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@payments
POSTHOOK: Output: default@payments@txn_datetime=2023-03-26 02%3A30%3A00
POSTHOOK: Output: default@payments@txn_datetime=2023-03-26 03%3A30%3A00
POSTHOOK: Lineage: payments PARTITION(txn_datetime=2023-03-26 02:30:00).card SCRIPT []
POSTHOOK: Lineage: payments PARTITION(txn_datetime=2023-03-26 03:30:00).card SCRIPT []
PREHOOK: query: SELECT * FROM payments WHERE txn_datetime = '2023-03-26 02:30:00'
PREHOOK: type: QUERY
PREHOOK: Input: default@payments
PREHOOK: Input: default@payments@txn_datetime=2023-03-26 02%3A30%3A00
#### A masked pattern was here ####
POSTHOOK: query: SELECT * FROM payments WHERE txn_datetime = '2023-03-26 02:30:00'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@payments
POSTHOOK: Input: default@payments@txn_datetime=2023-03-26 02%3A30%3A00
#### A masked pattern was here ####
3333-4444-2222-9999 2023-03-26 02:30:00
PREHOOK: query: SELECT * FROM payments WHERE txn_datetime = '2023-03-26 03:30:00'
PREHOOK: type: QUERY
PREHOOK: Input: default@payments
PREHOOK: Input: default@payments@txn_datetime=2023-03-26 03%3A30%3A00
#### A masked pattern was here ####
POSTHOOK: query: SELECT * FROM payments WHERE txn_datetime = '2023-03-26 03:30:00'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@payments
POSTHOOK: Input: default@payments@txn_datetime=2023-03-26 03%3A30%3A00
#### A masked pattern was here ####
3333-4444-2222-9999 2023-03-26 03:30:00
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

package org.apache.hadoop.hive.metastore;

import java.sql.Date;
import java.sql.SQLException;
import java.sql.SQLIntegrityConstraintViolationException;
import java.sql.SQLTransactionRollbackException;
Expand All @@ -36,6 +37,7 @@
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars;
import org.apache.hadoop.hive.metastore.txn.TxnUtils;
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.apache.hadoop.util.ReflectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -263,7 +265,9 @@ protected String toDate(String tableValue) {

protected String toTimestamp(String tableValue) {
if (isORACLE()) {
return "TO_TIMESTAMP(" + tableValue + ", 'YYYY-MM-DD HH:mm:ss')";
return "TO_TIMESTAMP(" + tableValue + ", 'YYYY-MM-DD HH:mi:ss')";
} else if (isSQLSERVER()) {
return "CONVERT(DATETIME, " + tableValue + ")";
} else {
return "cast(" + tableValue + " as TIMESTAMP)";
}
Expand Down Expand Up @@ -748,6 +752,26 @@ public Object getBoolean(boolean val) {
return val;
}

public Object convertDateValue(Object dateValue) {
assert dateValue instanceof String;
Date date = MetaStoreUtils.convertStringToDate((String)dateValue);
Object result = MetaStoreUtils.convertDateToString(date);
return result;
}

public Object convertTimestampValue(Object timestampValue) {
assert timestampValue instanceof String;
Timestamp timestamp = MetaStoreUtils.convertStringToTimestamp((String)timestampValue);
if (isPOSTGRES() || isSQLSERVER()) {
// The timestampValue looks valid now, for Postgres or SQLServer, return timestampValue as it is,
// otherwise we may run into different results on SQL and JDO, check the partition_timestamp3.q
// for such case.
return timestampValue;
}
Object result = MetaStoreUtils.convertTimestampToString(timestamp);
return result;
}

// This class implements the Configurable interface for the benefit
// of "plugin" instances created via reflection (see invocation of
// ReflectionUtils.newInstance in method determineDatabaseProduct)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,8 @@
import static org.apache.hadoop.hive.metastore.ColumnType.VARCHAR_TYPE_NAME;

import java.sql.Connection;
import java.sql.Date;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
Expand Down Expand Up @@ -1323,7 +1321,7 @@ public static FilterType fromClass(Object value){
@Override
public void visit(LeafNode node) throws MetaException {
int partColCount = partitionKeys.size();
int partColIndex = node.getPartColIndexForFilter(partitionKeys, filterBuffer);
int partColIndex = LeafNode.getPartColIndexForFilter(node.keyName, partitionKeys, filterBuffer);
if (filterBuffer.hasError()) {
return;
}
Expand All @@ -1341,29 +1339,34 @@ public void visit(LeafNode node) throws MetaException {
return;
}

String nodeValue0 = "?";
// if Filter.g does date parsing for quoted strings, we'd need to verify there's no
// type mismatch when string col is filtered by a string that looks like date.
if (colType == FilterType.Date && valType == FilterType.String) {
// Filter.g cannot parse a quoted date; try to parse date here too.
if (colType == FilterType.Date) {
try {
nodeValue = MetaStoreUtils.convertStringToDate((String)nodeValue);
nodeValue = dbType.convertDateValue(nodeValue);
valType = FilterType.Date;
} catch (Exception pe) { // do nothing, handled below - types will mismatch
if (dbType.isPOSTGRES()) {
nodeValue0 = "date '" + nodeValue + "'";
nodeValue = null;
}
} catch (Exception e) { // do nothing, handled below - types will mismatch
}
} else if (colType == FilterType.Timestamp) {
try {
if (dbType.isDERBY() || dbType.isMYSQL()) {
filterBuffer.setError("Filter pushdown not supported for timestamp on " + dbType.dbType.name());
return;
}
nodeValue = dbType.convertTimestampValue(nodeValue);
valType = FilterType.Timestamp;
if (dbType.isPOSTGRES()) {
nodeValue0 = "timestamp '" + nodeValue + "'";
nodeValue = null;
}
} catch (Exception e) {
// The nodeValue could be '__HIVE_DEFAULT_PARTITION__'
}
}

if (colType == FilterType.Timestamp && valType == FilterType.String) {
nodeValue = MetaStoreUtils.convertStringToTimestamp((String)nodeValue);
valType = FilterType.Timestamp;
}

// We format it so we are sure we are getting the right value
if (valType == FilterType.Date) {
// Format
nodeValue = MetaStoreUtils.convertDateToString((Date)nodeValue);
} else if (valType == FilterType.Timestamp) {
//format
nodeValue = MetaStoreUtils.convertTimestampToString((Timestamp) nodeValue);
}

boolean isDefaultPartition = (valType == FilterType.String) && defaultPartName.equals(nodeValue);
Expand Down Expand Up @@ -1393,8 +1396,7 @@ public void visit(LeafNode node) throws MetaException {
// Build the filter and add parameters linearly; we are traversing leaf nodes LTR.
String tableValue = "\"FILTER" + partColIndex + "\".\"PART_KEY_VAL\"";

String nodeValue0 = "?";
if (node.isReverseOrder) {
if (node.isReverseOrder && nodeValue != null) {
params.add(nodeValue);
}
String tableColumn = tableValue;
Expand Down Expand Up @@ -1424,14 +1426,9 @@ public void visit(LeafNode node) throws MetaException {
params.add(catName.toLowerCase());
}
tableValue += " then " + tableValue0 + " else null end)";

if (valType == FilterType.Date) {
tableValue = dbType.toDate(tableValue);
} else if (valType == FilterType.Timestamp) {
tableValue = dbType.toTimestamp(tableValue);
}
}
if (!node.isReverseOrder) {

if (!node.isReverseOrder && nodeValue != null) {
params.add(nodeValue);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4905,7 +4905,8 @@ private String makeQueryFilterString(String catName, String dbName, Table table,
params.put("catName", catName);
}

tree.generateJDOFilterFragment(getConf(), params, queryBuilder, table != null ? table.getPartitionKeys() : null);
tree.accept(new ExpressionTree.JDOFilterGenerator(getConf(),
table != null ? table.getPartitionKeys() : null, queryBuilder, params));
if (queryBuilder.hasError()) {
assert !isValidatedFilter;
LOG.debug("JDO filter pushdown cannot be used: {}", queryBuilder.getErrorMessage());
Expand All @@ -4925,7 +4926,7 @@ private String makeQueryFilterString(String catName, String dbName, String tblNa
params.put("t1", tblName);
params.put("t2", dbName);
params.put("t3", catName);
tree.generateJDOFilterFragment(getConf(), params, queryBuilder, partitionKeys);
tree.accept(new ExpressionTree.JDOFilterGenerator(getConf(), partitionKeys, queryBuilder, params));
if (queryBuilder.hasError()) {
assert !isValidatedFilter;
LOG.debug("JDO filter pushdown cannot be used: {}", queryBuilder.getErrorMessage());
Expand Down
Loading

0 comments on commit 0001638

Please sign in to comment.