Skip to content

Commit

Permalink
[GLUTEN-8529][CH]Fix get_json_object when path has asterisk (#8540)
Browse files Browse the repository at this point in the history
* fix get_json_object diff

* fix get_json_object

* fix ut

* fix ci
  • Loading branch information
KevinyhZou authored Jan 17, 2025
1 parent 13d9a17 commit 303362f
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,17 @@ class GlutenFunctionValidateSuite extends GlutenClickHouseWholeStageTransformerS
" get_json_object(string_field1, '$.a') is not null") { _ => }
}

test("Test get_json_object 12") {
runQueryAndCompare(
"SELECT get_json_object(string_field1, '$.a[*].y') from json_test where int_field1 = 7") {
_ =>
}
runQueryAndCompare(
"select get_json_object(string_field1, '$.a[*].z.n.p') from json_test where int_field1 = 7") {
_ =>
}
}

test("Test covar_samp") {
runQueryAndCompare("SELECT covar_samp(double_field1, int_field1) from json_test") { _ => }
}
Expand Down
18 changes: 14 additions & 4 deletions cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,8 @@ class GetJsonObjectImpl

static size_t getNumberOfIndexArguments(const DB::ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; }

bool insertResultToColumn(DB::IColumn & dest, const Element & root, DB::GeneratorJSONPath<JSONParser> & generator_json_path, bool)
{
bool insertResultToColumn(DB::IColumn & dest, const Element & root, DB::GeneratorJSONPath<JSONParser> & generator_json_path, bool path_has_asterisk)
{
Element current_element = root;
DB::VisitorStatus status;
std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
Expand Down Expand Up @@ -501,10 +501,14 @@ class GetJsonObjectImpl
if (elements[0].isString())
{
auto str = elements[0].getString();
if (path_has_asterisk)
{
str = "\"" + std::string(str) + "\"";
}
serializer.addRawString(str);
}
else
{
{
serializer.addElement(elements[0]);
}
}
Expand Down Expand Up @@ -684,6 +688,7 @@ class FlattenJSONStringOnRequiredFunction : public DB::IFunction
std::vector<DB::ASTPtr> json_path_asts;

std::vector<String> required_fields;
std::vector<bool> path_has_asterisk;
const auto & first_column = arguments[0];
if (const auto * required_fields_col = typeid_cast<const DB::ColumnConst *>(arguments[1].column.get()))
{
Expand All @@ -694,6 +699,11 @@ class FlattenJSONStringOnRequiredFunction : public DB::IFunction
{
auto normalized_field = JSONPathNormalizer::normalize(field);
// LOG_ERROR(getLogger("JSONPatch"), "xxx field {} -> {}", field, normalized_field);
if(normalized_field.find("[*]") != std::string::npos)
path_has_asterisk.emplace_back(true);
else
path_has_asterisk.emplace_back(false);

required_fields.push_back(normalized_field);
tuple_columns.emplace_back(str_type->createColumn());

Expand Down Expand Up @@ -776,7 +786,7 @@ class FlattenJSONStringOnRequiredFunction : public DB::IFunction
for (size_t j = 0; j < tuple_size; ++j)
{
generator_json_paths[j]->reinitialize();
if (!impl.insertResultToColumn(*tuple_columns[j], document, *generator_json_paths[j], true))
if (!impl.insertResultToColumn(*tuple_columns[j], document, *generator_json_paths[j], path_has_asterisk[j]))
{
tuple_columns[j]->insertDefault();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -757,16 +757,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-35728: Check multiply/divide of day-time intervals of any fields by numeric")
.exclude("SPARK-35778: Check multiply/divide of year-month intervals of any fields by numeric")
enableSuite[GlutenJsonExpressionsSuite]
.exclude("$.store.book[*]")
.exclude("$.store.book[*].category")
.exclude("$.store.book[*].isbn")
.exclude("$.store.basket[*]")
.exclude("$.store.basket[*][0]")
.exclude("$.store.basket[0][*]")
.exclude("$.store.basket[*][*]")
.exclude("$.store.basket[0][*].b")
.exclude("$.zip code")
.exclude("$.fb:testid")
.exclude("preserve newlines")
.exclude("escape")
.exclude("$..no_recursive")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -385,16 +385,9 @@ class ClickHouseTestSettings extends BackendTestSettings {
enableSuite[GlutenJoinSuite].exclude(
"SPARK-36794: Ignore duplicated key when building relation for semi/anti hash join")
enableSuite[GlutenJsonExpressionsSuite]
.exclude("$.store.book[*]")
.exclude("$.store.book[*].category")
.exclude("$.store.book[*].isbn")
.exclude("$.store.basket[*]")
.exclude("$.store.basket[*][0]")
.exclude("$.store.basket[0][*]")
.exclude("$.store.basket[*][*]")
.exclude("$.store.basket[0][*].b")
.exclude("$.zip code")
.exclude("$.fb:testid")
.exclude(
"$.store.basket[0][*].b"
) // issue: https://github.com/apache/incubator-gluten/issues/8529
.exclude("from_json - invalid data")
.exclude("from_json - input=object, schema=array, output=array of single row")
.exclude("from_json - input=empty object, schema=array, output=array of single row with null")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -385,16 +385,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
enableSuite[GlutenJoinSuite].exclude(
"SPARK-36794: Ignore duplicated key when building relation for semi/anti hash join")
enableSuite[GlutenJsonExpressionsSuite]
.exclude("$.store.book[*]")
.exclude("$.store.book[*].category")
.exclude("$.store.book[*].isbn")
.exclude("$.store.basket[*]")
.exclude("$.store.basket[*][0]")
.exclude("$.store.basket[0][*]")
.exclude("$.store.basket[*][*]")
.exclude("$.store.basket[0][*].b")
.exclude("$.zip code")
.exclude("$.fb:testid")
.exclude("from_json - invalid data")
.exclude("from_json - input=object, schema=array, output=array of single row")
.exclude("from_json - input=empty object, schema=array, output=array of single row with null")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -385,16 +385,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
enableSuite[GlutenJoinSuite].exclude(
"SPARK-36794: Ignore duplicated key when building relation for semi/anti hash join")
enableSuite[GlutenJsonExpressionsSuite]
.exclude("$.store.book[*]")
.exclude("$.store.book[*].category")
.exclude("$.store.book[*].isbn")
.exclude("$.store.basket[*]")
.exclude("$.store.basket[*][0]")
.exclude("$.store.basket[0][*]")
.exclude("$.store.basket[*][*]")
.exclude("$.store.basket[0][*].b")
.exclude("$.zip code")
.exclude("$.fb:testid")
.exclude("from_json - invalid data")
.exclude("from_json - input=object, schema=array, output=array of single row")
.exclude("from_json - input=empty object, schema=array, output=array of single row with null")
Expand Down

0 comments on commit 303362f

Please sign in to comment.