Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GLUTEN-8529][CH]Fix get_json_object when path has asterisk #8540

Merged
merged 4 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,17 @@ class GlutenFunctionValidateSuite extends GlutenClickHouseWholeStageTransformerS
" get_json_object(string_field1, '$.a') is not null") { _ => }
}

test("Test get_json_object 12") {
runQueryAndCompare(
"SELECT get_json_object(string_field1, '$.a[*].y') from json_test where int_field1 = 7") {
_ =>
}
runQueryAndCompare(
"select get_json_object(string_field1, '$.a[*].z.n.p') from json_test where int_field1 = 7") {
_ =>
}
}

test("Test covar_samp") {
runQueryAndCompare("SELECT covar_samp(double_field1, int_field1) from json_test") { _ => }
}
Expand Down
18 changes: 14 additions & 4 deletions cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,8 @@ class GetJsonObjectImpl

static size_t getNumberOfIndexArguments(const DB::ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }

bool insertResultToColumn(DB::IColumn & dest, const Element & root, DB::GeneratorJSONPath<JSONParser> & generator_json_path, bool)
{
bool insertResultToColumn(DB::IColumn & dest, const Element & root, DB::GeneratorJSONPath<JSONParser> & generator_json_path, bool path_has_asterisk)
{
Element current_element = root;
DB::VisitorStatus status;
std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
Expand Down Expand Up @@ -501,10 +501,14 @@ class GetJsonObjectImpl
if (elements[0].isString())
{
auto str = elements[0].getString();
if (path_has_asterisk)
{
str = "\"" + std::string(str) + "\"";
}
serializer.addRawString(str);
}
else
{
{
serializer.addElement(elements[0]);
}
}
Expand Down Expand Up @@ -684,6 +688,7 @@ class FlattenJSONStringOnRequiredFunction : public DB::IFunction
std::vector<DB::ASTPtr> json_path_asts;

std::vector<String> required_fields;
std::vector<bool> path_has_asterisk;
const auto & first_column = arguments[0];
if (const auto * required_fields_col = typeid_cast<const DB::ColumnConst *>(arguments[1].column.get()))
{
Expand All @@ -694,6 +699,11 @@ class FlattenJSONStringOnRequiredFunction : public DB::IFunction
{
auto normalized_field = JSONPathNormalizer::normalize(field);
// LOG_ERROR(getLogger("JSONPatch"), "xxx field {} -> {}", field, normalized_field);
if(normalized_field.find("[*]") != std::string::npos)
path_has_asterisk.emplace_back(true);
else
path_has_asterisk.emplace_back(false);

required_fields.push_back(normalized_field);
tuple_columns.emplace_back(str_type->createColumn());

Expand Down Expand Up @@ -776,7 +786,7 @@ class FlattenJSONStringOnRequiredFunction : public DB::IFunction
for (size_t j = 0; j < tuple_size; ++j)
{
generator_json_paths[j]->reinitialize();
if (!impl.insertResultToColumn(*tuple_columns[j], document, *generator_json_paths[j], true))
if (!impl.insertResultToColumn(*tuple_columns[j], document, *generator_json_paths[j], path_has_asterisk[j]))
{
tuple_columns[j]->insertDefault();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -757,16 +757,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-35728: Check multiply/divide of day-time intervals of any fields by numeric")
.exclude("SPARK-35778: Check multiply/divide of year-month intervals of any fields by numeric")
enableSuite[GlutenJsonExpressionsSuite]
.exclude("$.store.book[*]")
.exclude("$.store.book[*].category")
.exclude("$.store.book[*].isbn")
.exclude("$.store.basket[*]")
.exclude("$.store.basket[*][0]")
.exclude("$.store.basket[0][*]")
.exclude("$.store.basket[*][*]")
.exclude("$.store.basket[0][*].b")
.exclude("$.zip code")
.exclude("$.fb:testid")
.exclude("preserve newlines")
.exclude("escape")
.exclude("$..no_recursive")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -385,16 +385,9 @@ class ClickHouseTestSettings extends BackendTestSettings {
enableSuite[GlutenJoinSuite].exclude(
"SPARK-36794: Ignore duplicated key when building relation for semi/anti hash join")
enableSuite[GlutenJsonExpressionsSuite]
.exclude("$.store.book[*]")
.exclude("$.store.book[*].category")
.exclude("$.store.book[*].isbn")
.exclude("$.store.basket[*]")
.exclude("$.store.basket[*][0]")
.exclude("$.store.basket[0][*]")
.exclude("$.store.basket[*][*]")
.exclude("$.store.basket[0][*].b")
.exclude("$.zip code")
.exclude("$.fb:testid")
.exclude(
"$.store.basket[0][*].b"
) // issue: https://github.com/apache/incubator-gluten/issues/8529
.exclude("from_json - invalid data")
.exclude("from_json - input=object, schema=array, output=array of single row")
.exclude("from_json - input=empty object, schema=array, output=array of single row with null")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -385,16 +385,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
enableSuite[GlutenJoinSuite].exclude(
"SPARK-36794: Ignore duplicated key when building relation for semi/anti hash join")
enableSuite[GlutenJsonExpressionsSuite]
.exclude("$.store.book[*]")
.exclude("$.store.book[*].category")
.exclude("$.store.book[*].isbn")
.exclude("$.store.basket[*]")
.exclude("$.store.basket[*][0]")
.exclude("$.store.basket[0][*]")
.exclude("$.store.basket[*][*]")
.exclude("$.store.basket[0][*].b")
.exclude("$.zip code")
.exclude("$.fb:testid")
.exclude("from_json - invalid data")
.exclude("from_json - input=object, schema=array, output=array of single row")
.exclude("from_json - input=empty object, schema=array, output=array of single row with null")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -385,16 +385,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
enableSuite[GlutenJoinSuite].exclude(
"SPARK-36794: Ignore duplicated key when building relation for semi/anti hash join")
enableSuite[GlutenJsonExpressionsSuite]
.exclude("$.store.book[*]")
.exclude("$.store.book[*].category")
.exclude("$.store.book[*].isbn")
.exclude("$.store.basket[*]")
.exclude("$.store.basket[*][0]")
.exclude("$.store.basket[0][*]")
.exclude("$.store.basket[*][*]")
.exclude("$.store.basket[0][*].b")
.exclude("$.zip code")
.exclude("$.fb:testid")
.exclude("from_json - invalid data")
.exclude("from_json - input=object, schema=array, output=array of single row")
.exclude("from_json - input=empty object, schema=array, output=array of single row with null")
Expand Down
Loading