-
Notifications
You must be signed in to change notification settings - Fork 4.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HIVE-28197: Add deserializer to convert JSON plans to RelNodes #5131
Conversation
97dfdee
to
7d7f9c2
Compare
This reverts commit cef3527.
…ion.deserialization.enabled=true
} | ||
return map; | ||
} | ||
} | ||
|
||
private Object toJson(RexNode node) { | ||
final Map<String, Object> map; | ||
switch (node.getKind()) { | ||
case FIELD_ACCESS: | ||
map = jsonBuilder.map(); | ||
final RexFieldAccess fieldAccess = (RexFieldAccess) node; | ||
map.put("field", fieldAccess.getField().getName()); | ||
map.put("expr", toJson(fieldAccess.getReferenceExpr())); | ||
return map; | ||
case LITERAL: | ||
return toJsonLiteral((RexLiteral) node); | ||
case INPUT_REF: | ||
case LOCAL_REF: | ||
map = jsonBuilder.map(); | ||
map.put("input", ((RexSlot) node).getIndex()); | ||
map.put("name", ((RexSlot) node).getName()); | ||
map.put("type", toJson(node.getType())); | ||
return map; | ||
case CORREL_VARIABLE: | ||
map = jsonBuilder.map(); | ||
map.put("correl", ((RexCorrelVariable) node).getName()); | ||
map.put("type", toJson(node.getType())); | ||
return map; | ||
case DYNAMIC_PARAM: | ||
map = jsonBuilder.map(); | ||
map.put("dynamic_param", true); | ||
map.put("index", ((RexDynamicParam) node).getIndex()); | ||
return map; | ||
default: | ||
Map<String, Object> mapRexCall = toJsonRexCall(node); | ||
if (mapRexCall != null) { | ||
return mapRexCall; | ||
} | ||
throw new UnsupportedOperationException("unknown rex " + node); | ||
} | ||
} | ||
|
||
@Nullable | ||
private Map<String, Object> toJsonRexCall(RexNode node) { | ||
Map<String, Object> map = null; | ||
if (node instanceof RexCall) { | ||
final RexCall call = (RexCall) node; | ||
map = jsonBuilder.map(); | ||
map.put("op", toJson(call.getOperator())); | ||
map.put("type", toJson(call.getType())); | ||
final List<Object> list = jsonBuilder.list(); | ||
for (RexNode operand : call.getOperands()) { | ||
list.add(toJson(operand)); | ||
} | ||
map.put(OPERANDS, list); | ||
switch (node.getKind()) { | ||
case CAST: | ||
case MAP_QUERY_CONSTRUCTOR: | ||
case MAP_VALUE_CONSTRUCTOR: | ||
case ARRAY_QUERY_CONSTRUCTOR: | ||
case ARRAY_VALUE_CONSTRUCTOR: | ||
map.put("type", toJson(node.getType())); | ||
break; | ||
default: | ||
break; | ||
} | ||
if (call.getOperator() instanceof SqlFunction && | ||
(((SqlFunction) call.getOperator()).getFunctionType().isUserDefined())) { | ||
SqlOperator op = call.getOperator(); | ||
map.put("class", op.getClass().getName()); | ||
map.put("type", toJson(node.getType())); | ||
map.put("deterministic", op.isDeterministic()); | ||
map.put("dynamic", op.isDynamicFunction()); | ||
} | ||
if (call instanceof RexOver) { | ||
RexOver over = (RexOver) call; | ||
map.put(DISTINCT, over.isDistinct()); | ||
map.put("type", toJson(node.getType())); | ||
map.put("window", toJson(over.getWindow())); | ||
map.put("ignoreNulls", toJson(over.ignoreNulls())); | ||
} | ||
} | ||
return map; | ||
} | ||
|
||
@NotNull | ||
private Map<String, Object> toJsonLiteral(RexLiteral literal) { | ||
final Map<String, Object> map; | ||
map = jsonBuilder.map(); | ||
final Object value; | ||
if (SqlTypeFamily.TIMESTAMP == literal.getTypeName().getFamily()) { | ||
// Had to do this to prevent millis or nanos from getting trimmed | ||
value = literal.computeDigest(RexDigestIncludeType.NO_TYPE); | ||
} else { | ||
value = literal.getValue3(); | ||
} | ||
map.put(LITERAL, RelEnumTypes.fromEnum(value)); | ||
map.put("type", toJson(literal.getType())); | ||
return map; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is the reason for not using the Calcite version of these methods?
Or is it possible to call super and add extensions if needed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have tried to use the Calcite code as much as possible, and only had to copy these when I needed to add something extra to the method or a part of the method and the super method is private (and thus not override-able). For example in this case, for TIMESTAMPs I needed to handle it differently than what was implemented in Calcite.
Also you can see that toJsonRexCall
has extra things like adding type to RexCalls, support for MAPs and ARRAYs which is missing in the super method, etc. These are needed in the serialized plan to get all the information we need to deserialize it.
To see an example of where I was able to reuse super method, please checkout this patch for AggregateCalls.
final Boolean nullable = (Boolean) o.get(NULLABLE); | ||
RelDataType keyType = toType(typeFactory, key); | ||
RelDataType valueType = toType(typeFactory, value); | ||
type = typeFactory.createMapType(keyType, valueType); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit.: move the declaration to this line
final Boolean nullable = (Boolean) o.get(NULLABLE); | ||
final Object component = o.get("component"); | ||
type = toType(typeFactory, component); | ||
type = typeFactory.createArrayType(type, -1); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit.: move the declaration to this line
.collect(Collectors.toList()); | ||
final boolean isDeterministic = (boolean) map.get("deterministic"); | ||
final boolean isDynamicFunction = (boolean) map.get("dynamic"); | ||
return new org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelJson.CalciteSqlFn( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit. Is full qualification of CalciteSqlFn
is required here?
//~ Constructors ------------------------------------------------------------- | ||
|
||
public HiveRelJsonImpl() { | ||
public HiveRelJsonImpl(boolean includeTableAndColumnStats) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since HiveRelJsonImpl
is an extension of RelJsonWriter
how about following the same pattern: extend HiveRelJsonImpl
and move stats related properties to the new class?
assert parser.currentToken() == JsonToken.START_OBJECT; | ||
parser.nextToken(); | ||
assert parser.currentToken() == JsonToken.FIELD_NAME; | ||
parser.nextToken(); | ||
assert parser.currentToken() == JsonToken.VALUE_STRING; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what happens in production code if one if these asserts fail? Should we throw exception instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adding a catch block for AssertionError
and re-throwing it as an IOException.
o = mapper.configure(DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS, true) | ||
.readValue(parser.getValueAsString(), TYPE_REF); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What are the benefits of using Map<String, Object> as a json element representation (DTO) instead of annotated POJOs?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we need the elements in Map<String, Object>
to use Calcite's RelJson
which expects elements to be in Map<String, Object>
.
… a different class
Quality Gate passedIssues Measures |
This pull request has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. |
What changes were proposed in this pull request?
Adds a deserializer that converts JSON plans to RelNodes
Why are the changes needed?
This is a new feature which can be used to get Calcite plans from JSON representation of the plan.
Does this PR introduce any user-facing change?
No
Is the change a dependency upgrade?
No
How was this patch tested?
I have added the property
set hive.test.cbo.plan.serialization.deserialization.enabled=true;
to all the plans that had benign changes after deserialization. All other tests didn't have any changes.Although I am hoping to get some feedback regarding what kind of tests to add. In one of the commits, I have tested by running all qtests through the deserializer - all of them pass and some of them have benign changes.