From 98f34322657b0d3882badb9747d3e3d65f7a950b Mon Sep 17 00:00:00 2001
From: Jason Lowe <jlowe@nvidia.com>
Date: Wed, 10 Apr 2024 11:35:14 -0500
Subject: [PATCH 1/6] Avoid auto-merging thirdparty/cudf-pins (#1928)

* Avoid auto-merging thirdparty/cudf-pins

Signed-off-by: Jason Lowe <jlowe@nvidia.com>

* Update auto-merge.yml

* Add comments to show what we want to do here

We want to merge all spark-rapids-jni HEAD(24.04) commit to BASE(24.06),

But we do not want to merge submodules(24.04 cudf cudf-pin) to BASE(24.06)

As we need pin cudf/cudf-pin submodules to 24.06 instead of 24.04

---------

Signed-off-by: Jason Lowe <jlowe@nvidia.com>
Co-authored-by: Tim Liu <timl@nvidia.com>
---
 .github/workflows/auto-merge.yml | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/auto-merge.yml b/.github/workflows/auto-merge.yml
index fc1b22e3c2..417d24d67e 100755
--- a/.github/workflows/auto-merge.yml
+++ b/.github/workflows/auto-merge.yml
@@ -42,13 +42,15 @@ jobs:
           git config user.email "70000568+nvauto@users.noreply.github.com "
           git fetch origin ${HEAD} ${BASE}
           git checkout -b ${INTERMEDIATE_HEAD} origin/${HEAD}
-          OUT=$(git --no-pager diff --name-only origin/${BASE} | grep "${FILE_USE_BASE}" || true)
-          [[ ! -z "${OUT}" ]] && git checkout origin/${BASE} -- ${FILE_USE_BASE} && \
-            git commit -s -am "Auto-merge use submodule in BASE ref"
+          # Use commits from HEAD, but keek submodule files(FILE_USE_BASE) from BASE
+          git checkout origin/${BASE} -- ${FILE_USE_BASE}
+          # If any submodule file is updaged from HEAD, always change to BASE ones
+          [ ! -z "$(git status --porcelain=v1 --untracked=no)" ] && \
+            git commit -s -am "Auto-merge use ${BASE} versions"
           git push origin ${INTERMEDIATE_HEAD} -f
         env:
           INTERMEDIATE_HEAD: bot-auto-merge-${{ env.HEAD }}
-          FILE_USE_BASE: thirdparty/cudf
+          FILE_USE_BASE: thirdparty/cudf thirdparty/cudf-pins
 
       - name: auto-merge job
         uses: ./.github/workflows/action-helper

From 95db19a63e5d607476995b44e3332b52079e4a4e Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Thu, 11 Apr 2024 01:42:12 +0800
Subject: [PATCH 2/6] Update submodule cudf to
 94726ad056e2473c836f47d310e2584bdf44d1f9 (#1949)

Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
 thirdparty/cudf                       | 2 +-
 thirdparty/cudf-pins/rapids-cmake.sha | 2 +-
 thirdparty/cudf-pins/versions.json    | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/thirdparty/cudf b/thirdparty/cudf
index e6cfd4503a..94726ad056 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit e6cfd4503af063d3bba28954ab7ec67dbbb44e71
+Subproject commit 94726ad056e2473c836f47d310e2584bdf44d1f9
diff --git a/thirdparty/cudf-pins/rapids-cmake.sha b/thirdparty/cudf-pins/rapids-cmake.sha
index fd9b77a7c7..aee5915dc4 100644
--- a/thirdparty/cudf-pins/rapids-cmake.sha
+++ b/thirdparty/cudf-pins/rapids-cmake.sha
@@ -1 +1 @@
-096ae3c0a6b2c593f8fdb38468be527027bf79d7
+69f5222465ec3c8c54f107fcf8750f040034e156
diff --git a/thirdparty/cudf-pins/versions.json b/thirdparty/cudf-pins/versions.json
index cf97df89ae..4261acab1a 100644
--- a/thirdparty/cudf-pins/versions.json
+++ b/thirdparty/cudf-pins/versions.json
@@ -62,7 +62,7 @@
     {
       "always_download" : true,
       "git_shallow" : false,
-      "git_tag" : "745b1847f56c8f4b0c4e094f93837c2a91e18318",
+      "git_tag" : "5e69e97c42504c17a333a36e1796dce4b83150a4",
       "git_url" : "https://github.com/rapidsai/kvikio.git",
       "version" : "24.04"
     },
@@ -131,7 +131,7 @@
     {
       "always_download" : true,
       "git_shallow" : false,
-      "git_tag" : "0651edf0fce5ebf53528382b475fc29a2f3afa67",
+      "git_tag" : "e14a2291301ce9c8ef76b2b2404eb02336584724",
       "git_url" : "https://github.com/rapidsai/rmm.git",
       "version" : "24.04"
     },

From e5cae7670302ea1f1878c98591f90747b1761fc4 Mon Sep 17 00:00:00 2001
From: Chong Gao <chongg@nvidia.com>
Date: Thu, 11 Apr 2024 09:57:18 +0800
Subject: [PATCH 3/6] getJsonObject: improve performance for new version of
 getJsonObject (#1930)

* Move device code in get_json_object to cu or cuh (#1915)

Signed-off-by: Haoyang Li <haoyangl@nvidia.com>

* Refactor/simplify json generator

Signed-off-by: Chong Gao <res_life@163.com>

* Remove purge non-empty nulls step

* Refactor josn parser: remove useless variables

* Use 64 bits to store JSON nest depth context to save memory

* Refine push/pop/peek logic

* Refactor: use less functions; change max path length from 32 to 8

* Fix nvbug: get-json-obj get incorrect result on some GPUs(H100, V100)

* Revert "Fix nvbug: get-json-obj get incorrect result on some GPUs(H100, V100)"

This reverts commit ff2db7e0b7f623e381fe937f019fefd68501251e.

* Fix a redefined variable

* Update MAX_PATH_DEPTH = 8 in Java to keep consistent

* Fix test cases; Change max path depth from 8 to 16

* Minor change: add pragma once in header

---------

Signed-off-by: Haoyang Li <haoyangl@nvidia.com>
Signed-off-by: Chong Gao <res_life@163.com>
Co-authored-by: Haoyang Li <haoyangl@nvidia.com>
Co-authored-by: Chong Gao <res_life@163.com>
---
 src/main/cpp/src/get_json_object.cu           | 1152 ++++++++++++++++-
 src/main/cpp/src/get_json_object.hpp          | 1144 +---------------
 .../src/{json_parser.hpp => json_parser.cuh}  |  196 ++-
 .../nvidia/spark/rapids/jni/JSONUtils.java    |    3 +
 .../spark/rapids/jni/GetJsonObjectTest.java   |    8 +-
 5 files changed, 1184 insertions(+), 1319 deletions(-)
 rename src/main/cpp/src/{json_parser.hpp => json_parser.cuh} (91%)

diff --git a/src/main/cpp/src/get_json_object.cu b/src/main/cpp/src/get_json_object.cu
index e944337861..c7c6c242b8 100644
--- a/src/main/cpp/src/get_json_object.cu
+++ b/src/main/cpp/src/get_json_object.cu
@@ -50,6 +50,1060 @@ namespace spark_rapids_jni {
 
 namespace detail {
 
+/**
+ * write JSON style
+ */
+enum class write_style { raw_style, quoted_style, flatten_style };
+
+/**
+ * path instruction
+ */
+struct path_instruction {
+  __device__ inline path_instruction(path_instruction_type _type) : type(_type) {}
+
+  path_instruction_type type;
+
+  // used when type is named type
+  cudf::string_view name;
+
+  // used when type is index
+  int index{-1};
+};
+
+/**
+ * JSON generator is used to write out JSON content.
+ * Because of get_json_object only outputs JSON object as a whole item,
+ * it's no need to store internal state for JSON object when outputing,
+ * only need to store internal state for JSON array.
+ */
+class json_generator {
+ public:
+  __device__ json_generator(char* _output) : output(_output), output_len(0) {}
+  __device__ json_generator() : output(nullptr), output_len(0) {}
+
+  // create a nested child generator based on this parent generator,
+  // child generator is a view, parent and child share the same byte array
+  __device__ json_generator new_child_generator()
+  {
+    if (nullptr == output) {
+      return json_generator();
+    } else {
+      return json_generator(output + output_len);
+    }
+  }
+
+  // write [
+  // add an extra comma if needed,
+  // e.g.: when JSON content is: [[1,2,3]
+  // writing a new [ should result: [[1,2,3],[
+  __device__ void write_start_array()
+  {
+    try_write_comma();
+
+    if (output) { *(output + output_len) = '['; }
+
+    output_len++;
+    array_depth++;
+    // new array is empty
+    is_curr_array_empty = true;
+  }
+
+  // write ]
+  __device__ void write_end_array()
+  {
+    if (output) { *(output + output_len) = ']'; }
+    output_len++;
+
+    // point to parent array
+    array_depth--;
+
+    // set parent array as non-empty because already had a closed child item.
+    is_curr_array_empty = false;
+  }
+
+  // write first start array without output, only update internal state
+  __device__ void write_first_start_array_without_output()
+  {
+    // hide the outer start array token
+    // Note: do not inc output_len
+    array_depth++;
+    // new array is empty
+    is_curr_array_empty = true;
+  }
+
+  // return true if it's in a array context and it's not writing the first item.
+  __device__ inline bool need_comma() { return (array_depth > 0 && !is_curr_array_empty); }
+
+  /**
+   * write comma accroding to current generator state
+   */
+  __device__ void try_write_comma()
+  {
+    if (need_comma()) {
+      // in array context and writes first item
+      if (output) { *(output + output_len) = ','; }
+      output_len++;
+    }
+  }
+
+  /**
+   * copy current structure when parsing. If current token is start
+   * object/array, then copy to corresponding matched end object/array. return
+   * false if JSON format is invalid return true if JSON format is valid
+   */
+  __device__ bool copy_current_structure(json_parser& parser)
+  {
+    // first try add comma
+    try_write_comma();
+
+    if (array_depth > 0) { is_curr_array_empty = false; }
+
+    if (nullptr != output) {
+      auto copy_to       = output + output_len;
+      auto [b, copy_len] = parser.copy_current_structure(copy_to);
+      output_len += copy_len;
+      return b;
+    } else {
+      char* copy_to      = nullptr;
+      auto [b, copy_len] = parser.copy_current_structure(copy_to);
+      output_len += copy_len;
+      return b;
+    }
+  }
+
+  /**
+   * Get current text from JSON parser and then write the text
+   * Note: Because JSON strings contains '\' to do escape,
+   * JSON parser should do unescape to remove '\' and JSON parser
+   * then can not return a pointer and length pair (char *, len),
+   * For number token, JSON parser can return a pair (char *, len)
+   */
+  __device__ void write_raw(json_parser& parser)
+  {
+    if (array_depth > 0) { is_curr_array_empty = false; }
+
+    if (nullptr != output) {
+      auto copied = parser.write_unescaped_text(output + output_len);
+      output_len += copied;
+    } else {
+      auto len = parser.compute_unescaped_len();
+      output_len += len;
+    }
+  }
+
+  /**
+   * write child raw value
+   * e.g.:
+   *
+   * write_outer_array_tokens = false
+   * need_comma = true
+   * [1,2,3]1,2,3
+   *        ^
+   *        |
+   *    child pointer
+   * ==>>
+   * [1,2,3],1,2,3
+   *
+   *
+   * write_outer_array_tokens = true
+   * need_comma = true
+   *   [12,3,4
+   *     ^
+   *     |
+   * child pointer
+   * ==>>
+   *   [1,[2,3,4]
+   *
+   * For more information about param write_outer_array_tokens, refer to
+   * `write_first_start_array_without_output`
+   * @param child_block_begin
+   * @param child_block_len
+   * @param write_outer_array_tokens whether write outer array tokens for child
+   * block
+   */
+  __device__ void write_child_raw_value(char* child_block_begin,
+                                        size_t child_block_len,
+                                        bool write_outer_array_tokens)
+  {
+    bool insert_comma = need_comma();
+
+    if (array_depth > 0) { is_curr_array_empty = false; }
+
+    if (nullptr != output) {
+      if (write_outer_array_tokens) {
+        if (insert_comma) {
+          *(child_block_begin + child_block_len + 2) = ']';
+          move_forward(child_block_begin, child_block_len, 2);
+          *(child_block_begin + 1) = '[';
+          *(child_block_begin)     = ',';
+        } else {
+          *(child_block_begin + child_block_len + 1) = ']';
+          move_forward(child_block_begin, child_block_len, 1);
+          *(child_block_begin) = '[';
+        }
+      } else {
+        if (insert_comma) {
+          move_forward(child_block_begin, child_block_len, 1);
+          *(child_block_begin) = ',';
+        } else {
+          // do not need comma && do not need write outer array tokens
+          // do nothing, because child generator buff is directly after the
+          // parent generator
+        }
+      }
+    }
+
+    // update length
+    if (insert_comma) { output_len++; }
+    if (write_outer_array_tokens) { output_len += 2; }
+    output_len += child_block_len;
+  }
+
+  // move memory block forward by specified bytes
+  // e.g.:  memory is: 1 2 0 0, begin is 1, len is 2, after moving,
+  // memory is: 1 2 1 2.
+  // e.g.:  memory is: 1 2 0 0, begin is 1, len is 1, after moving,
+  // memory is: 1 1 2 0.
+  // Note: should move from end to begin to avoid overwrite buffer
+  __device__ void move_forward(char* begin, size_t len, int forward)
+  {
+    // TODO copy by 8 bytes
+    char* pos = begin + len + forward - 1;
+    char* e   = begin + forward - 1;
+    while (pos > e) {
+      *pos = *(pos - forward);
+      pos--;
+    }
+  }
+
+  __device__ void reset() { output_len = 0; }
+
+  __device__ inline size_t get_output_len() const { return output_len; }
+  __device__ inline char* get_output_start_position() const { return output; }
+  __device__ inline char* get_current_output_position() const { return output + output_len; }
+
+  /**
+   * generator may contain trash output, e.g.: generator writes some output,
+   * then JSON format is invalid, the previous output becomes trash.
+   */
+  __device__ inline void set_output_len_zero() { output_len = 0; }
+
+  __device__ inline void set_output_len(size_t len) { output_len = len; }
+
+ private:
+  char* output;
+  size_t output_len;
+
+  // whether already worte a item in current array
+  // used to decide whether add a comma before writing out a new item.
+  bool is_curr_array_empty;
+  int array_depth = 0;
+};
+
+/**
+ * path evaluator which can run on both CPU and GPU
+ */
+__device__ inline bool path_is_empty(size_t path_size) { return path_size == 0; }
+
+__device__ inline bool path_match_element(path_instruction const* path_ptr,
+                                          size_t path_size,
+                                          path_instruction_type path_type0)
+{
+  if (path_size < 1) { return false; }
+  return path_ptr[0].type == path_type0;
+}
+
+__device__ inline bool path_match_elements(path_instruction const* path_ptr,
+                                           size_t path_size,
+                                           path_instruction_type path_type0,
+                                           path_instruction_type path_type1)
+{
+  if (path_size < 2) { return false; }
+  return path_ptr[0].type == path_type0 && path_ptr[1].type == path_type1;
+}
+
+__device__ inline bool path_match_elements(path_instruction const* path_ptr,
+                                           size_t path_size,
+                                           path_instruction_type path_type0,
+                                           path_instruction_type path_type1,
+                                           path_instruction_type path_type2,
+                                           path_instruction_type path_type3)
+{
+  if (path_size < 4) { return false; }
+  return path_ptr[0].type == path_type0 && path_ptr[1].type == path_type1 &&
+         path_ptr[2].type == path_type2 && path_ptr[3].type == path_type3;
+}
+
+__device__ inline thrust::tuple<bool, int> path_match_subscript_index(
+  path_instruction const* path_ptr, size_t path_size)
+{
+  auto match = path_match_elements(
+    path_ptr, path_size, path_instruction_type::SUBSCRIPT, path_instruction_type::INDEX);
+  if (match) {
+    return thrust::make_tuple(true, path_ptr[1].index);
+  } else {
+    return thrust::make_tuple(false, 0);
+  }
+}
+
+__device__ inline thrust::tuple<bool, cudf::string_view> path_match_named(
+  path_instruction const* path_ptr, size_t path_size)
+{
+  auto match = path_match_element(path_ptr, path_size, path_instruction_type::NAMED);
+  if (match) {
+    return thrust::make_tuple(true, path_ptr[0].name);
+  } else {
+    return thrust::make_tuple(false, cudf::string_view());
+  }
+}
+
+__device__ inline thrust::tuple<bool, int> path_match_subscript_index_subscript_wildcard(
+  path_instruction const* path_ptr, size_t path_size)
+{
+  auto match = path_match_elements(path_ptr,
+                                   path_size,
+                                   path_instruction_type::SUBSCRIPT,
+                                   path_instruction_type::INDEX,
+                                   path_instruction_type::SUBSCRIPT,
+                                   path_instruction_type::WILDCARD);
+  if (match) {
+    return thrust::make_tuple(true, path_ptr[1].index);
+  } else {
+    return thrust::make_tuple(false, 0);
+  }
+}
+
+/**
+ *
+ * The following commented function is recursive version,
+ * The next function below is the rewritten version,
+ * Keep version here is for review purpuse, because rewritten version(iterative)
+ * is not human friendly.
+ *
+ */
+// __device__ bool evaluate_path(json_parser& p,
+//                                            json_generator& g,
+//                                            write_style style,
+//                                            path_instruction const* path_ptr,
+//                                            int path_size)
+// {
+//   auto token = p.get_current_token();
+
+//   // case (VALUE_STRING, Nil) if style == RawStyle
+//   // case path 1
+//   if (json_token::VALUE_STRING == token && path_is_empty(path_size) &&
+//       style == write_style::raw_style) {
+//     // there is no array wildcard or slice parent, emit this string without
+//     // quotes write current string in parser to generator
+//     g.write_raw(p);
+//     return true;
+//   }
+//   // case (START_ARRAY, Nil) if style == FlattenStyle
+//   // case path 2
+//   else if (json_token::START_ARRAY == token && path_is_empty(path_size) &&
+//            style == write_style::flatten_style) {
+//     // flatten this array into the parent
+//     bool dirty = false;
+//     while (json_token::END_ARRAY != p.next_token()) {
+//       // JSON validation check
+//       if (json_token::ERROR == p.get_current_token()) { return false; }
+
+//       dirty |= path_evaluator::evaluate_path(p, g, style, nullptr, 0);
+//     }
+//     return dirty;
+//   }
+//   // case (_, Nil)
+//   // case path 3
+//   else if (path_is_empty(path_size)) {
+//     // general case: just copy the child tree verbatim
+//     return g.copy_current_structure(p);
+//   }
+//   // case (START_OBJECT, Key :: xs)
+//   // case path 4
+//   else if (json_token::START_OBJECT == token &&
+//            path_match_element(path_ptr, path_size, path_instruction_type::KEY)) {
+//     bool dirty = false;
+//     while (json_token::END_OBJECT != p.next_token()) {
+//       // JSON validation check
+//       if (json_token::ERROR == p.get_current_token()) { return false; }
+
+//       if (dirty) {
+//         // once a match has been found we can skip other fields
+//         if (!p.try_skip_children()) {
+//           // JSON validation check
+//           return false;
+//         }
+//       } else {
+//         dirty = path_evaluator::evaluate_path(p, g, style, path_ptr + 1, path_size - 1);
+//       }
+//     }
+//     return dirty;
+//   }
+//   // case (START_ARRAY, Subscript :: Wildcard :: Subscript :: Wildcard :: xs)
+//   // case path 5
+//   else if (json_token::START_ARRAY == token &&
+//            path_match_elements(path_ptr,
+//                                path_size,
+//                                path_instruction_type::SUBSCRIPT,
+//                                path_instruction_type::WILDCARD,
+//                                path_instruction_type::SUBSCRIPT,
+//                                path_instruction_type::WILDCARD)) {
+//     // special handling for the non-structure preserving double wildcard
+//     // behavior in Hive
+//     bool dirty = false;
+//     g.write_start_array();
+//     while (p.next_token() != json_token::END_ARRAY) {
+//       // JSON validation check
+//       if (json_token::ERROR == p.get_current_token()) { return false; }
+
+//       dirty |= path_evaluator::evaluate_path(
+//         p, g, write_style::flatten_style, path_ptr + 4, path_size - 4);
+//     }
+//     g.write_end_array();
+//     return dirty;
+//   }
+//   // case (START_ARRAY, Subscript :: Wildcard :: xs) if style != QuotedStyle
+//   // case path 6
+//   else if (json_token::START_ARRAY == token &&
+//            path_match_elements(path_ptr,
+//                                path_size,
+//                                path_instruction_type::SUBSCRIPT,
+//                                path_instruction_type::WILDCARD) &&
+//            style != write_style::quoted_style) {
+//     // retain Flatten, otherwise use Quoted... cannot use Raw within an array
+//     write_style next_style = write_style::raw_style;
+//     switch (style) {
+//       case write_style::raw_style: next_style = write_style::quoted_style; break;
+//       case write_style::flatten_style: next_style = write_style::flatten_style; break;
+//       case write_style::quoted_style: next_style = write_style::quoted_style;  // never happen
+//     }
+
+//     // temporarily buffer child matches, the emitted json will need to be
+//     // modified slightly if there is only a single element written
+
+//     int dirty = 0;
+//     // create a child generator with hide outer array tokens mode.
+//     auto child_g = g.new_child_generator(/*hide_outer_array_tokens*/ true);
+
+//     // Note: child generator does not actually write the outer start array
+//     // token into buffer it only updates internal nested state
+//     child_g.write_start_array();
+
+//     while (p.next_token() != json_token::END_ARRAY) {
+//       // JSON validation check
+//       if (json_token::ERROR == p.get_current_token()) { return false; }
+
+//       // track the number of array elements and only emit an outer array if
+//       // we've written more than one element, this matches Hive's behavior
+//       dirty +=
+//         (path_evaluator::evaluate_path(p, child_g, next_style, path_ptr + 2, path_size - 2) ? 1
+//                                                                                             :
+//                                                                                             0);
+//     }
+
+//     // Note: child generator does not actually write the outer end array token
+//     // into buffer it only updates internal nested state
+//     child_g.write_end_array();
+
+//     char* child_g_start = child_g.get_output_start_position();
+//     size_t child_g_len  = child_g.get_output_len();  // len already excluded outer [ ]
+
+//     if (dirty > 1) {
+//       // add outer array tokens
+//       g.write_child_raw_value(child_g_start, child_g_len, true);
+//     } else if (dirty == 1) {
+//       // remove outer array tokens
+//       g.write_child_raw_value(child_g_start, child_g_len, false);
+//     }  // else do not write anything
+
+//     return dirty > 0;
+//   }
+//   // case (START_ARRAY, Subscript :: Wildcard :: xs)
+//   // case path 7
+//   else if (json_token::START_ARRAY == token &&
+//            path_match_elements(path_ptr,
+//                                path_size,
+//                                path_instruction_type::SUBSCRIPT,
+//                                path_instruction_type::WILDCARD)) {
+//     bool dirty = false;
+//     g.write_start_array();
+//     while (p.next_token() != json_token::END_ARRAY) {
+//       // JSON validation check
+//       if (json_token::ERROR == p.get_current_token()) { return false; }
+
+//       // wildcards can have multiple matches, continually update the dirty
+//       // count
+//       dirty |= path_evaluator::evaluate_path(
+//         p, g, write_style::quoted_style, path_ptr + 2, path_size - 2);
+//     }
+//     g.write_end_array();
+
+//     return dirty;
+//   }
+//   /* case (START_ARRAY, Subscript :: Index(idx) :: (xs@Subscript :: Wildcard :: _)) */
+//   // case path 8
+//   else if (json_token::START_ARRAY == token &&
+//            thrust::get<0>(path_match_subscript_index_subscript_wildcard(path_ptr, path_size)))
+//            {
+//     int idx = thrust::get<1>(path_match_subscript_index_subscript_wildcard(path_ptr,
+//     path_size)); p.next_token();
+//     // JSON validation check
+//     if (json_token::ERROR == p.get_current_token()) { return false; }
+
+//     int i = idx;
+//     while (i >= 0) {
+//       if (p.get_current_token() == json_token::END_ARRAY) {
+//         // terminate, nothing has been written
+//         return false;
+//       }
+//       if (0 == i) {
+//         bool dirty = path_evaluator::evaluate_path(
+//           p, g, write_style::quoted_style, path_ptr + 2, path_size - 2);
+//         while (p.next_token() != json_token::END_ARRAY) {
+//           // JSON validation check
+//           if (json_token::ERROR == p.get_current_token()) { return false; }
+
+//           // advance the token stream to the end of the array
+//           if (!p.try_skip_children()) { return false; }
+//         }
+//         return dirty;
+//       } else {
+//         // i > 0
+//         if (!p.try_skip_children()) { return false; }
+
+//         p.next_token();
+//         // JSON validation check
+//         if (json_token::ERROR == p.get_current_token()) { return false; }
+//       }
+//       --i;
+//     }
+//     // path parser guarantees idx >= 0
+//     // will never reach to here
+//     return false;
+//   }
+//   // case (START_ARRAY, Subscript :: Index(idx) :: xs)
+//   // case path 9
+//   else if (json_token::START_ARRAY == token &&
+//            thrust::get<0>(path_match_subscript_index(path_ptr, path_size))) {
+//     int idx = thrust::get<1>(path_match_subscript_index(path_ptr, path_size));
+//     p.next_token();
+//     // JSON validation check
+//     if (json_token::ERROR == p.get_current_token()) { return false; }
+
+//     int i = idx;
+//     while (i >= 0) {
+//       if (p.get_current_token() == json_token::END_ARRAY) {
+//         // terminate, nothing has been written
+//         return false;
+//       }
+//       if (0 == i) {
+//         bool dirty = path_evaluator::evaluate_path(p, g, style, path_ptr + 2, path_size - 2);
+//         while (p.next_token() != json_token::END_ARRAY) {
+//           // JSON validation check
+//           if (json_token::ERROR == p.get_current_token()) { return false; }
+
+//           // advance the token stream to the end of the array
+//           if (!p.try_skip_children()) { return false; }
+//         }
+//         return dirty;
+//       } else {
+//         // i > 0
+//         if (!p.try_skip_children()) { return false; }
+
+//         p.next_token();
+//         // JSON validation check
+//         if (json_token::ERROR == p.get_current_token()) { return false; }
+//       }
+//       --i;
+//     }
+//     // path parser guarantees idx >= 0
+//     // will never reach to here
+//     return false;
+//   }
+//   // case (FIELD_NAME, Named(name) :: xs) if p.getCurrentName == name
+//   // case path 10
+//   else if (json_token::FIELD_NAME == token &&
+//            thrust::get<0>(path_match_named(path_ptr, path_size)) &&
+//            p.match_current_field_name(thrust::get<1>(path_match_named(path_ptr, path_size)))) {
+//     if (p.next_token() != json_token::VALUE_NULL) {
+//       // JSON validation check
+//       if (json_token::ERROR == p.get_current_token()) { return false; }
+
+//       return path_evaluator::evaluate_path(p, g, style, path_ptr + 1, path_size - 1);
+//     } else {
+//       return false;
+//     }
+//   }
+//   // case (FIELD_NAME, Wildcard :: xs)
+//   // case path 11
+//   else if (json_token::FIELD_NAME == token &&
+//            path_match_element(path_ptr, path_size, path_instruction_type::WILDCARD)) {
+//     p.next_token();
+//     // JSON validation check
+//     if (json_token::ERROR == p.get_current_token()) { return false; }
+
+//     return path_evaluator::evaluate_path(p, g, style, path_ptr + 1, path_size - 1);
+//   }
+//   // case _ =>
+//   // case path 12
+//   else {
+//     if (!p.try_skip_children()) { return false; }
+//     return false;
+//   }
+// }
+
+/**
+ *
+ * This function is rewritten from above commented recursive function.
+ * this function is equivalent to the above commented recursive function.
+ */
+__device__ bool evaluate_path(json_parser& p,
+                              json_generator& root_g,
+                              write_style root_style,
+                              path_instruction const* root_path_ptr,
+                              int root_path_size)
+{
+  // manually maintained context stack in lieu of calling evaluate_path recursively.
+  struct context {
+    // current token
+    json_token token;
+
+    // which case path that this task is from
+    int case_path;
+
+    // used to save current generator
+    json_generator g;
+
+    write_style style;
+    path_instruction const* path_ptr;
+    int path_size;
+
+    // is this context task is done
+    bool task_is_done;
+
+    // whether written output
+    // if dirty > 0, indicates success
+    int dirty;
+
+    // for some case paths
+    bool is_first_enter;
+
+    // used to save child JSON generator for case path 8
+    json_generator child_g;
+  };
+
+  // path max depth limitation
+  // There is a same constant in JSONUtil.java, keep them consistent when changing
+  // Note: Spark-Rapids should guarantee the path depth is less or equal to this limit,
+  // or GPU reports cudaErrorIllegalAddress
+  constexpr int max_path_depth = 16;
+
+  // define stack; plus 1 indicates root context task needs an extra memory
+  context stack[max_path_depth + 1];
+  int stack_pos = 0;
+
+  // push context function
+  auto push_context = [&stack, &stack_pos](json_token _token,
+                                           int _case_path,
+                                           json_generator _g,
+                                           write_style _style,
+                                           path_instruction const* _path_ptr,
+                                           int _path_size) {
+    // no need to check stack is full
+    // because Spark-Rapids already checked maximum length of `path_instruction`
+    auto& ctx          = stack[stack_pos];
+    ctx.token          = _token;
+    ctx.case_path      = _case_path;
+    ctx.g              = _g;
+    ctx.style          = _style;
+    ctx.path_ptr       = _path_ptr;
+    ctx.path_size      = _path_size;
+    ctx.task_is_done   = false;
+    ctx.dirty          = 0;
+    ctx.is_first_enter = true;
+
+    stack_pos++;
+  };
+
+  // put the first context task
+  push_context(p.get_current_token(), -1, root_g, root_style, root_path_ptr, root_path_size);
+
+  while (stack_pos > 0) {
+    auto& ctx = stack[stack_pos - 1];
+    if (!ctx.task_is_done) {
+      // task is not done.
+
+      // case (VALUE_STRING, Nil) if style == RawStyle
+      // case path 1
+      if (json_token::VALUE_STRING == ctx.token && path_is_empty(ctx.path_size) &&
+          ctx.style == write_style::raw_style) {
+        // there is no array wildcard or slice parent, emit this string without
+        // quotes write current string in parser to generator
+        ctx.g.write_raw(p);
+        ctx.dirty        = 1;
+        ctx.task_is_done = true;
+      }
+      // case (START_ARRAY, Nil) if style == FlattenStyle
+      // case path 2
+      else if (json_token::START_ARRAY == ctx.token && path_is_empty(ctx.path_size) &&
+               ctx.style == write_style::flatten_style) {
+        // flatten this array into the parent
+        if (json_token::END_ARRAY != p.next_token()) {
+          // JSON validation check
+          if (json_token::ERROR == p.get_current_token()) { return false; }
+          // push back task
+          // add child task
+          push_context(p.get_current_token(), 2, ctx.g, ctx.style, nullptr, 0);
+        } else {
+          // END_ARRAY
+          ctx.task_is_done = true;
+        }
+      }
+      // case (_, Nil)
+      // case path 3
+      else if (path_is_empty(ctx.path_size)) {
+        // general case: just copy the child tree verbatim
+        if (!(ctx.g.copy_current_structure(p))) {
+          // JSON validation check
+          return false;
+        }
+        ctx.dirty        = 1;
+        ctx.task_is_done = true;
+      }
+      // case (START_OBJECT, Key :: xs)
+      // case path 4
+      else if (json_token::START_OBJECT == ctx.token &&
+               path_match_element(ctx.path_ptr, ctx.path_size, path_instruction_type::KEY)) {
+        if (json_token::END_OBJECT != p.next_token()) {
+          // JSON validation check
+          if (json_token::ERROR == p.get_current_token()) { return false; }
+
+          if (ctx.dirty > 0) {
+            // once a match has been found we can skip other fields
+            if (!p.try_skip_children()) {
+              // JSON validation check
+              return false;
+            }
+          } else {
+            // need to try more children
+            push_context(
+              p.get_current_token(), 4, ctx.g, ctx.style, ctx.path_ptr + 1, ctx.path_size - 1);
+          }
+        } else {
+          ctx.task_is_done = true;
+        }
+      }
+      // case (START_ARRAY, Subscript :: Wildcard :: Subscript :: Wildcard :: xs)
+      // case path 5
+      else if (json_token::START_ARRAY == ctx.token &&
+               path_match_elements(ctx.path_ptr,
+                                   ctx.path_size,
+                                   path_instruction_type::SUBSCRIPT,
+                                   path_instruction_type::WILDCARD,
+                                   path_instruction_type::SUBSCRIPT,
+                                   path_instruction_type::WILDCARD)) {
+        // special handling for the non-structure preserving double wildcard
+        // behavior in Hive
+        if (ctx.is_first_enter) {
+          ctx.is_first_enter = false;
+          ctx.g.write_start_array();
+        }
+
+        if (p.next_token() != json_token::END_ARRAY) {
+          // JSON validation check
+          if (json_token::ERROR == p.get_current_token()) { return false; }
+          push_context(p.get_current_token(),
+                       5,
+                       ctx.g,
+                       write_style::flatten_style,
+                       ctx.path_ptr + 4,
+                       ctx.path_size - 4);
+        } else {
+          ctx.g.write_end_array();
+          ctx.task_is_done = true;
+        }
+      }
+      // case (START_ARRAY, Subscript :: Wildcard :: xs) if style != QuotedStyle
+      // case path 6
+      else if (json_token::START_ARRAY == ctx.token &&
+               path_match_elements(ctx.path_ptr,
+                                   ctx.path_size,
+                                   path_instruction_type::SUBSCRIPT,
+                                   path_instruction_type::WILDCARD) &&
+               ctx.style != write_style::quoted_style) {
+        // retain Flatten, otherwise use Quoted... cannot use Raw within an array
+        write_style next_style = write_style::raw_style;
+        switch (ctx.style) {
+          case write_style::raw_style: next_style = write_style::quoted_style; break;
+          case write_style::flatten_style: next_style = write_style::flatten_style; break;
+          case write_style::quoted_style: next_style = write_style::quoted_style;  // never happen
+        }
+
+        // temporarily buffer child matches, the emitted json will need to be
+        // modified slightly if there is only a single element written
+
+        json_generator child_g;
+        if (ctx.is_first_enter) {
+          ctx.is_first_enter = false;
+          // create a child generator with hide outer array tokens mode.
+          child_g = ctx.g.new_child_generator();
+          // write first [ without output, without update len, only update internal state
+          child_g.write_first_start_array_without_output();
+        } else {
+          child_g = ctx.child_g;
+        }
+
+        if (p.next_token() != json_token::END_ARRAY) {
+          // JSON validation check
+          if (json_token::ERROR == p.get_current_token()) { return false; }
+          // track the number of array elements and only emit an outer array if
+          // we've written more than one element, this matches Hive's behavior
+          push_context(
+            p.get_current_token(), 6, child_g, next_style, ctx.path_ptr + 2, ctx.path_size - 2);
+        } else {
+          char* child_g_start = child_g.get_output_start_position();
+          size_t child_g_len  = child_g.get_output_len();
+
+          if (ctx.dirty > 1) {
+            // add outer array tokens
+            ctx.g.write_child_raw_value(
+              child_g_start, child_g_len, /* write_outer_array_tokens */ true);
+            ctx.task_is_done = true;
+          } else if (ctx.dirty == 1) {
+            // remove outer array tokens
+            ctx.g.write_child_raw_value(
+              child_g_start, child_g_len, /* write_outer_array_tokens */ false);
+            ctx.task_is_done = true;
+          }  // else do not write anything
+        }
+      }
+      // case (START_ARRAY, Subscript :: Wildcard :: xs)
+      // case path 7
+      else if (json_token::START_ARRAY == ctx.token &&
+               path_match_elements(ctx.path_ptr,
+                                   ctx.path_size,
+                                   path_instruction_type::SUBSCRIPT,
+                                   path_instruction_type::WILDCARD)) {
+        if (ctx.is_first_enter) {
+          ctx.is_first_enter = false;
+          ctx.g.write_start_array();
+        }
+
+        if (p.next_token() != json_token::END_ARRAY) {
+          // JSON validation check
+          if (json_token::ERROR == p.get_current_token()) { return false; }
+
+          // wildcards can have multiple matches, continually update the dirty
+          // count
+          push_context(p.get_current_token(),
+                       7,
+                       ctx.g,
+                       write_style::quoted_style,
+                       ctx.path_ptr + 2,
+                       ctx.path_size - 2);
+        } else {
+          ctx.g.write_end_array();
+          ctx.task_is_done = true;
+        }
+      }
+      /* case (START_ARRAY, Subscript :: Index(idx) :: (xs@Subscript :: Wildcard :: _)) */
+      // case path 8
+      else if (json_token::START_ARRAY == ctx.token &&
+               thrust::get<0>(
+                 path_match_subscript_index_subscript_wildcard(ctx.path_ptr, ctx.path_size))) {
+        int idx = thrust::get<1>(
+          path_match_subscript_index_subscript_wildcard(ctx.path_ptr, ctx.path_size));
+
+        p.next_token();
+        // JSON validation check
+        if (json_token::ERROR == p.get_current_token()) { return false; }
+        ctx.is_first_enter = false;
+
+        int i = idx;
+        while (i > 0) {
+          if (p.get_current_token() == json_token::END_ARRAY) {
+            // terminate, nothing has been written
+            return false;
+          }
+
+          if (!p.try_skip_children()) { return false; }
+
+          p.next_token();
+          // JSON validation check
+          if (json_token::ERROR == p.get_current_token()) { return false; }
+
+          --i;
+        }
+
+        // i == 0
+        push_context(p.get_current_token(),
+                     8,
+                     ctx.g,
+                     write_style::quoted_style,
+                     ctx.path_ptr + 2,
+                     ctx.path_size - 2);
+      }
+      // case (START_ARRAY, Subscript :: Index(idx) :: xs)
+      // case path 9
+      else if (json_token::START_ARRAY == ctx.token &&
+               thrust::get<0>(path_match_subscript_index(ctx.path_ptr, ctx.path_size))) {
+        int idx = thrust::get<1>(path_match_subscript_index(ctx.path_ptr, ctx.path_size));
+
+        p.next_token();
+        // JSON validation check
+        if (json_token::ERROR == p.get_current_token()) { return false; }
+
+        int i = idx;
+        while (i > 0) {
+          if (p.get_current_token() == json_token::END_ARRAY) {
+            // terminate, nothing has been written
+            return false;
+          }
+
+          if (!p.try_skip_children()) { return false; }
+
+          p.next_token();
+          // JSON validation check
+          if (json_token::ERROR == p.get_current_token()) { return false; }
+
+          --i;
+        }
+
+        // i == 0
+        push_context(
+          p.get_current_token(), 9, ctx.g, ctx.style, ctx.path_ptr + 2, ctx.path_size - 2);
+      }
+      // case (FIELD_NAME, Named(name) :: xs) if p.getCurrentName == name
+      // case path 10
+      else if (json_token::FIELD_NAME == ctx.token &&
+               thrust::get<0>(path_match_named(ctx.path_ptr, ctx.path_size)) &&
+               p.match_current_field_name(
+                 thrust::get<1>(path_match_named(ctx.path_ptr, ctx.path_size)))) {
+        if (p.next_token() != json_token::VALUE_NULL) {
+          // JSON validation check
+          if (json_token::ERROR == p.get_current_token()) { return false; }
+          push_context(
+            p.get_current_token(), 10, ctx.g, ctx.style, ctx.path_ptr + 1, ctx.path_size - 1);
+        } else {
+          return false;
+        }
+      }
+      // case (FIELD_NAME, Wildcard :: xs)
+      // case path 11
+      else if (json_token::FIELD_NAME == ctx.token &&
+               path_match_element(ctx.path_ptr, ctx.path_size, path_instruction_type::WILDCARD)) {
+        p.next_token();
+        // JSON validation check
+        if (json_token::ERROR == p.get_current_token()) { return false; }
+        push_context(
+          p.get_current_token(), 11, ctx.g, ctx.style, ctx.path_ptr + 1, ctx.path_size - 1);
+      }
+      // case _ =>
+      // case path 12
+      else {
+        if (!p.try_skip_children()) { return false; }
+        // default case path, return false for this task
+        ctx.dirty        = 0;
+        ctx.task_is_done = true;
+      }
+    } else {
+      // current context is done.
+
+      // pop current top context
+      stack_pos--;
+
+      // pop parent task
+      // update parent task info according to current task result
+      if (stack_pos > 0) {
+        // peek parent context task
+        auto& p_ctx = stack[stack_pos - 1];
+
+        // case (VALUE_STRING, Nil) if style == RawStyle
+        // case path 1
+        if (1 == ctx.case_path) {
+          // never happen
+        }
+        // path 2: case (START_ARRAY, Nil) if style == FlattenStyle
+        // path 5: case (START_ARRAY, Subscript :: Wildcard :: Subscript :: Wildcard :: xs)
+        // path 7: case (START_ARRAY, Subscript :: Wildcard :: xs)
+        else if (2 == ctx.case_path || 5 == ctx.case_path || 7 == ctx.case_path) {
+          // collect result from child task
+          p_ctx.dirty += ctx.dirty;
+          // copy generator states to parent task;
+          p_ctx.g = ctx.g;
+        }
+        // case (START_OBJECT, Key :: xs)
+        // case path 4
+        else if (4 == ctx.case_path) {
+          if (p_ctx.dirty < 1 && ctx.dirty > 0) { p_ctx.dirty = ctx.dirty; }
+          // copy generator states to parent task;
+          p_ctx.g = ctx.g;
+        }
+        // case (START_ARRAY, Subscript :: Wildcard :: xs) if style != QuotedStyle
+        // case path 6
+        else if (6 == ctx.case_path) {
+          // collect result from child task
+          p_ctx.dirty += ctx.dirty;
+          // update child generator for parent task
+          p_ctx.child_g = ctx.g;
+        }
+        /* case (START_ARRAY, Subscript :: Index(idx) :: (xs@Subscript :: Wildcard :: _)) */
+        // case path 8
+        // case (START_ARRAY, Subscript :: Index(idx) :: xs)
+        // case path 9
+        else if (8 == ctx.case_path || 9 == ctx.case_path) {
+          // collect result from child task
+          p_ctx.dirty += ctx.dirty;
+
+          // post logic:
+          while (p.next_token() != json_token::END_ARRAY) {
+            // JSON validation check
+            if (json_token::ERROR == p.get_current_token()) { return false; }
+            // advance the token stream to the end of the array
+            if (!p.try_skip_children()) { return false; }
+          }
+          // task is done
+          p_ctx.task_is_done = true;
+          // copy generator states to parent task;
+          p_ctx.g = ctx.g;
+        }
+        // case (FIELD_NAME, Named(name) :: xs) if p.getCurrentName == name
+        // case path 10
+        else if (10 == ctx.case_path) {
+          // collect result from child task
+          p_ctx.dirty += ctx.dirty;
+          // task is done
+          p_ctx.task_is_done = true;
+          // copy generator states to parent task;
+          p_ctx.g = ctx.g;
+        }
+        // case (FIELD_NAME, Wildcard :: xs)
+        // case path 11
+        else if (11 == ctx.case_path) {
+          // collect result from child task
+          p_ctx.dirty += ctx.dirty;
+          // task is done
+          p_ctx.task_is_done = true;
+          // copy generator states to parent task;
+          p_ctx.g = ctx.g;
+        }
+        // case path 3: case (_, Nil)
+        // case path 12: case _ =>
+        // others
+        else {
+          // never happen
+        }
+      } else {
+        // has no parent task, stack is empty, will exit
+      }
+    }
+  }
+
+  // copy output len
+  root_g.set_output_len(stack[0].g.get_output_len());
+  return stack[0].dirty > 0;
+}
+
 rmm::device_uvector<path_instruction> construct_path_commands(
   std::vector<std::tuple<path_instruction_type, std::string, int64_t>> const& instructions,
   cudf::string_scalar const& all_names_scalar,
@@ -92,28 +1146,6 @@ rmm::device_uvector<path_instruction> construct_path_commands(
   return cudf::detail::make_device_uvector_sync(path_commands, stream, mr);
 }
 
-/**
- * @brief Parse a single json string using the provided command buffer
- *
- * @param j_parser The incoming json string and associated parser
- * @param path_ptr The command buffer to be applied to the string.
- * @param path_size Command buffer size
- * @param output Buffer used to store the results of the query
- * @returns A result code indicating success/fail/empty.
- */
-__device__ inline bool parse_json_path(json_parser<>& j_parser,
-                                       path_instruction const* path_ptr,
-                                       size_t path_size,
-                                       json_generator<>& output)
-{
-  j_parser.next_token();
-  // JSON validation check
-  if (json_token::ERROR == j_parser.get_current_token()) { return false; }
-
-  return path_evaluator::evaluate_path(
-    j_parser, output, write_style::raw_style, path_ptr, path_size);
-}
-
 /**
  * @brief Parse a single json string using the provided command buffer
  *
@@ -127,7 +1159,7 @@ __device__ inline bool parse_json_path(json_parser<>& j_parser,
  * @param out_buf_size Size of the output buffer
  * @returns A pair containing the result code and the output buffer.
  */
-__device__ thrust::pair<bool, json_generator<>> get_json_object_single(
+__device__ thrust::pair<bool, size_t> get_json_object_single(
   char const* input,
   cudf::size_type input_len,
   path_instruction const* path_commands_ptr,
@@ -135,35 +1167,28 @@ __device__ thrust::pair<bool, json_generator<>> get_json_object_single(
   char* out_buf,
   size_t out_buf_size)
 {
-  char* actual_output;
-  if (nullptr == out_buf) {
-    // First step: preprocess sizes
-    actual_output = out_buf;
-  } else {
-    // Second step: writes output
-    // if output buf size is zero, pass in nullptr to avoid generator writing trash output
-    actual_output = (0 == out_buf_size) ? nullptr : out_buf;
-  }
-
   json_parser j_parser(input, input_len);
-  json_generator generator(actual_output);
+  j_parser.next_token();
+  // JSON validation check
+  if (json_token::ERROR == j_parser.get_current_token()) { return {false, 0}; }
 
-  if (!out_buf) {
-    // First step: preprocess sizes
-    bool success = parse_json_path(j_parser, path_commands_ptr, path_commands_size, generator);
+  // First pass: preprocess sizes.
+  // Second pass: writes output.
+  // The generator automatically determines which pass based on `out_buf`.
+  // If `out_buf_size` is zero, pass in `nullptr` to avoid generator writing trash output.
+  json_generator generator((out_buf == nullptr || out_buf_size == 0) ? nullptr : out_buf);
 
-    if (!success) {
-      // generator may contain trash output, e.g.: generator writes some output,
-      // then JSON format is invalid, the previous output becomes trash.
-      // set output as zero to tell second step
-      generator.set_output_len_zero();
-    }
-    return {success, std::move(generator)};
-  } else {
-    // Second step: writes output
-    bool success = parse_json_path(j_parser, path_commands_ptr, path_commands_size, generator);
-    return {success, std::move(generator)};
+  bool const success = evaluate_path(
+    j_parser, generator, write_style::raw_style, path_commands_ptr, path_commands_size);
+
+  if (nullptr == out_buf && !success) {
+    // generator may contain trash output, e.g.: generator writes some output,
+    // then JSON format is invalid, the previous output becomes trash.
+    // set output as zero to tell second step
+    generator.set_output_len_zero();
   }
+
+  return {success, generator.get_output_len()};
 }
 
 /**
@@ -202,22 +1227,24 @@ __launch_bounds__(block_size) CUDF_KERNEL
   while (tid < col.size()) {
     bool is_valid               = false;
     cudf::string_view const str = col.element<cudf::string_view>(tid);
-    cudf::size_type output_size = 0;
     if (str.size_bytes() > 0) {
       char* dst = out_buf != nullptr ? out_buf + output_offsets[tid] : nullptr;
       size_t const dst_size =
         out_buf != nullptr ? output_offsets[tid + 1] - output_offsets[tid] : 0;
 
       // process one single row
-      auto [result, out] = get_json_object_single(
+      auto [result, output_size] = get_json_object_single(
         str.data(), str.size_bytes(), path_commands_ptr, path_commands_size, dst, dst_size);
-      output_size = out.get_output_len();
       if (result) { is_valid = true; }
-    }
 
-    // filled in only during the precompute step. during the compute step, the
-    // offsets are fed back in so we do -not- want to write them out
-    if (out_buf == nullptr) { d_sizes[tid] = output_size; }
+      // filled in only during the precompute step. during the compute step, the
+      // offsets are fed back in so we do -not- want to write them out
+      if (out_buf == nullptr) { d_sizes[tid] = static_cast<cudf::size_type>(output_size); }
+    } else {
+      // valid JSON length is always greater than 0
+      // if `str` size len is zero, output len is 0 and `is_valid` is false
+      if (out_buf == nullptr) { d_sizes[tid] = 0; }
+    }
 
     // validity filled in only during the output step
     if (out_validity != nullptr) {
@@ -305,16 +1332,11 @@ std::unique_ptr<cudf::column> get_json_object(
       static_cast<cudf::bitmask_type*>(validity.data()),
       d_valid_count.data());
 
-  auto result = make_strings_column(input.size(),
-                                    std::move(offsets),
-                                    chars.release(),
-                                    input.size() - d_valid_count.value(stream),
-                                    std::move(validity));
-  // unmatched array query may result in unsanitized '[' value in the result
-  if (auto const result_cv = result->view(); cudf::detail::has_nonempty_nulls(result_cv, stream)) {
-    result = cudf::detail::purge_nonempty_nulls(result_cv, stream, mr);
-  }
-  return result;
+  return make_strings_column(input.size(),
+                             std::move(offsets),
+                             chars.release(),
+                             input.size() - d_valid_count.value(stream),
+                             std::move(validity));
 }
 
 }  // namespace detail
diff --git a/src/main/cpp/src/get_json_object.hpp b/src/main/cpp/src/get_json_object.hpp
index 0522cabee5..cf1f0c3470 100644
--- a/src/main/cpp/src/get_json_object.hpp
+++ b/src/main/cpp/src/get_json_object.hpp
@@ -14,7 +14,9 @@
  * limitations under the License.
  */
 
-#include "json_parser.hpp"
+#pragma once
+
+#include "json_parser.cuh"
 
 #include <cudf/strings/string_view.hpp>
 #include <cudf/strings/strings_column_view.hpp>
@@ -35,1146 +37,6 @@ namespace spark_rapids_jni {
  */
 enum class path_instruction_type { SUBSCRIPT, WILDCARD, KEY, INDEX, NAMED };
 
-namespace detail {
-
-/**
- * write JSON style
- */
-enum class write_style { raw_style, quoted_style, flatten_style };
-
-/**
- * path instruction
- */
-struct path_instruction {
-  __device__ inline path_instruction(path_instruction_type _type) : type(_type) {}
-
-  path_instruction_type type;
-
-  // used when type is named type
-  cudf::string_view name;
-
-  // used when type is index
-  int index{-1};
-};
-
-/**
- * JSON generator is used to write out JSON content.
- * Because of get_json_object only outputs JSON object as a whole item,
- * it's no need to store internal state for JSON object when outputing,
- * only need to store internal state for JSON array.
- */
-template <int max_json_nesting_depth = curr_max_json_nesting_depth>
-class json_generator {
- public:
-  __device__ json_generator(char* _output) : output(_output), output_len(0) {}
-  __device__ json_generator() : output(nullptr), output_len(0) {}
-
-  __device__ json_generator<>& operator=(json_generator<> const& other)
-  {
-    this->output      = other.output;
-    this->output_len  = other.output_len;
-    this->array_depth = other.array_depth;
-    for (size_t i = 0; i < max_json_nesting_depth; i++) {
-      this->is_first_item[i] = other.is_first_item[i];
-    }
-
-    return *this;
-  }
-
-  // create a nested child generator based on this parent generator,
-  // child generator is a view, parent and child share the same byte array
-  __device__ json_generator new_child_generator()
-  {
-    if (nullptr == output) {
-      return json_generator();
-    } else {
-      return json_generator(output + output_len);
-    }
-  }
-
-  // write [
-  // add an extra comma if needed,
-  // e.g.: when JSON content is: [[1,2,3]
-  // writing a new [ should result: [[1,2,3],[
-  __device__ void write_start_array()
-  {
-    try_write_comma();
-
-    // update internal state
-    if (array_depth > 0) { is_first_item[array_depth - 1] = false; }
-
-    if (output) { *(output + output_len) = '['; }
-
-    output_len++;
-    is_first_item[array_depth] = true;
-    array_depth++;
-  }
-
-  // write ]
-  __device__ void write_end_array()
-  {
-    if (output) { *(output + output_len) = ']'; }
-    output_len++;
-    array_depth--;
-  }
-
-  // write first start array without output, only update internal state
-  __device__ void write_first_start_array_without_output()
-  {
-    // hide the outer start array token
-    // Note: do not inc output_len
-    is_first_item[array_depth] = true;
-    array_depth++;
-  }
-
-  // return true if it's in a array context and it's not writing the first item.
-  __device__ bool need_comma() { return (array_depth > 0 && !is_first_item[array_depth - 1]); }
-
-  /**
-   * write comma accroding to current generator state
-   */
-  __device__ void try_write_comma()
-  {
-    if (need_comma()) {
-      // in array context and writes first item
-      if (output) { *(output + output_len) = ','; }
-      output_len++;
-    }
-  }
-
-  /**
-   * copy current structure when parsing. If current token is start
-   * object/array, then copy to corresponding matched end object/array. return
-   * false if JSON format is invalid return true if JSON format is valid
-   */
-  __device__ bool copy_current_structure(json_parser<>& parser)
-  {
-    // first try add comma
-    try_write_comma();
-
-    if (array_depth > 0) { is_first_item[array_depth - 1] = false; }
-
-    if (nullptr != output) {
-      auto copy_to       = output + output_len;
-      auto [b, copy_len] = parser.copy_current_structure(copy_to);
-      output_len += copy_len;
-      return b;
-    } else {
-      char* copy_to      = nullptr;
-      auto [b, copy_len] = parser.copy_current_structure(copy_to);
-      output_len += copy_len;
-      return b;
-    }
-  }
-
-  /**
-   * Get current text from JSON parser and then write the text
-   * Note: Because JSON strings contains '\' to do escape,
-   * JSON parser should do unescape to remove '\' and JSON parser
-   * then can not return a pointer and length pair (char *, len),
-   * For number token, JSON parser can return a pair (char *, len)
-   */
-  __device__ void write_raw(json_parser<>& parser)
-  {
-    if (array_depth > 0) { is_first_item[array_depth - 1] = false; }
-
-    if (nullptr != output) {
-      auto copied = parser.write_unescaped_text(output + output_len);
-      output_len += copied;
-    } else {
-      auto len = parser.compute_unescaped_len();
-      output_len += len;
-    }
-  }
-
-  /**
-   * write child raw value
-   * e.g.:
-   *
-   * write_outer_array_tokens = false
-   * need_comma = true
-   * [1,2,3]1,2,3
-   *        ^
-   *        |
-   *    child pointer
-   * ==>>
-   * [1,2,3],1,2,3
-   *
-   *
-   * write_outer_array_tokens = true
-   * need_comma = true
-   *   [12,3,4
-   *     ^
-   *     |
-   * child pointer
-   * ==>>
-   *   [1,[2,3,4]
-   *
-   * For more information about param write_outer_array_tokens, refer to
-   * `write_first_start_array_without_output`
-   * @param child_block_begin
-   * @param child_block_len
-   * @param write_outer_array_tokens whether write outer array tokens for child block
-   */
-  __device__ void write_child_raw_value(char* child_block_begin,
-                                        size_t child_block_len,
-                                        bool write_outer_array_tokens)
-  {
-    bool insert_comma = need_comma();
-
-    is_first_item[array_depth - 1] = false;
-
-    if (nullptr != output) {
-      if (write_outer_array_tokens) {
-        if (insert_comma) {
-          *(child_block_begin + child_block_len + 2) = ']';
-          move_forward(child_block_begin, child_block_len, 2);
-          *(child_block_begin + 1) = '[';
-          *(child_block_begin)     = ',';
-        } else {
-          *(child_block_begin + child_block_len + 1) = ']';
-          move_forward(child_block_begin, child_block_len, 1);
-          *(child_block_begin) = '[';
-        }
-      } else {
-        if (insert_comma) {
-          move_forward(child_block_begin, child_block_len, 1);
-          *(child_block_begin) = ',';
-        } else {
-          // do not need comma && do not need write outer array tokens
-          // do nothing, because child generator buff is directly after the
-          // parent generator
-        }
-      }
-    }
-
-    // update length
-    if (insert_comma) { output_len++; }
-    if (write_outer_array_tokens) { output_len += 2; }
-    output_len += child_block_len;
-  }
-
-  // move memory block forward by specified bytes
-  // e.g.:  memory is: 1 2 0 0, begin is 1, len is 2, after moving,
-  // memory is: 1 2 1 2.
-  // e.g.:  memory is: 1 2 0 0, begin is 1, len is 1, after moving,
-  // memory is: 1 1 2 0.
-  // Note: should move from end to begin to avoid overwrite buffer
-  __device__ void move_forward(char* begin, size_t len, int forward)
-  {
-    char* pos = begin + len + forward - 1;
-    char* e   = begin + forward - 1;
-    while (pos > e) {
-      *pos = *(pos - forward);
-      pos--;
-    }
-  }
-
-  __device__ void reset() { output_len = 0; }
-
-  __device__ inline size_t get_output_len() const { return output_len; }
-  __device__ inline char* get_output_start_position() const { return output; }
-  __device__ inline char* get_current_output_position() const { return output + output_len; }
-
-  /**
-   * generator may contain trash output, e.g.: generator writes some output,
-   * then JSON format is invalid, the previous output becomes trash.
-   */
-  __device__ inline void set_output_len_zero() { output_len = 0; }
-
-  __device__ inline void set_output_len(size_t len) { output_len = len; }
-
- private:
-  char* output;
-  size_t output_len;
-
-  bool is_first_item[max_json_nesting_depth];
-  int array_depth = 0;
-};
-
-/**
- * path evaluator which can run on both CPU and GPU
- */
-struct path_evaluator {
-  static __device__ inline bool path_is_empty(size_t path_size) { return path_size == 0; }
-
-  static __device__ inline bool path_match_element(path_instruction const* path_ptr,
-                                                   size_t path_size,
-                                                   path_instruction_type path_type0)
-  {
-    if (path_size < 1) { return false; }
-    return path_ptr[0].type == path_type0;
-  }
-
-  static __device__ inline bool path_match_elements(path_instruction const* path_ptr,
-                                                    size_t path_size,
-                                                    path_instruction_type path_type0,
-                                                    path_instruction_type path_type1)
-  {
-    if (path_size < 2) { return false; }
-    return path_ptr[0].type == path_type0 && path_ptr[1].type == path_type1;
-  }
-
-  static __device__ inline bool path_match_elements(path_instruction const* path_ptr,
-                                                    size_t path_size,
-                                                    path_instruction_type path_type0,
-                                                    path_instruction_type path_type1,
-                                                    path_instruction_type path_type2,
-                                                    path_instruction_type path_type3)
-  {
-    if (path_size < 4) { return false; }
-    return path_ptr[0].type == path_type0 && path_ptr[1].type == path_type1 &&
-           path_ptr[2].type == path_type2 && path_ptr[3].type == path_type3;
-  }
-
-  static __device__ inline thrust::tuple<bool, int> path_match_subscript_index(
-    path_instruction const* path_ptr, size_t path_size)
-  {
-    auto match = path_match_elements(
-      path_ptr, path_size, path_instruction_type::SUBSCRIPT, path_instruction_type::INDEX);
-    if (match) {
-      return thrust::make_tuple(true, path_ptr[1].index);
-    } else {
-      return thrust::make_tuple(false, 0);
-    }
-  }
-
-  static __device__ inline thrust::tuple<bool, cudf::string_view> path_match_named(
-    path_instruction const* path_ptr, size_t path_size)
-  {
-    auto match = path_match_element(path_ptr, path_size, path_instruction_type::NAMED);
-    if (match) {
-      return thrust::make_tuple(true, path_ptr[0].name);
-    } else {
-      return thrust::make_tuple(false, cudf::string_view());
-    }
-  }
-
-  static __device__ inline thrust::tuple<bool, int> path_match_subscript_index_subscript_wildcard(
-    path_instruction const* path_ptr, size_t path_size)
-  {
-    auto match = path_match_elements(path_ptr,
-                                     path_size,
-                                     path_instruction_type::SUBSCRIPT,
-                                     path_instruction_type::INDEX,
-                                     path_instruction_type::SUBSCRIPT,
-                                     path_instruction_type::WILDCARD);
-    if (match) {
-      return thrust::make_tuple(true, path_ptr[1].index);
-    } else {
-      return thrust::make_tuple(false, 0);
-    }
-  }
-
-  /**
-   *
-   * The following commented function is recursive version,
-   * The next function below is the rewritten version,
-   * Keep version here is for review purpuse, because rewritten version(iterative)
-   * is not human friendly.
-   *
-   */
-  // static __device__ bool evaluate_path(json_parser<>& p,
-  //                                            json_generator<>& g,
-  //                                            write_style style,
-  //                                            path_instruction const* path_ptr,
-  //                                            int path_size)
-  // {
-  //   auto token = p.get_current_token();
-
-  //   // case (VALUE_STRING, Nil) if style == RawStyle
-  //   // case path 1
-  //   if (json_token::VALUE_STRING == token && path_is_empty(path_size) &&
-  //       style == write_style::raw_style) {
-  //     // there is no array wildcard or slice parent, emit this string without
-  //     // quotes write current string in parser to generator
-  //     g.write_raw(p);
-  //     return true;
-  //   }
-  //   // case (START_ARRAY, Nil) if style == FlattenStyle
-  //   // case path 2
-  //   else if (json_token::START_ARRAY == token && path_is_empty(path_size) &&
-  //            style == write_style::flatten_style) {
-  //     // flatten this array into the parent
-  //     bool dirty = false;
-  //     while (json_token::END_ARRAY != p.next_token()) {
-  //       // JSON validation check
-  //       if (json_token::ERROR == p.get_current_token()) { return false; }
-
-  //       dirty |= path_evaluator::evaluate_path(p, g, style, nullptr, 0);
-  //     }
-  //     return dirty;
-  //   }
-  //   // case (_, Nil)
-  //   // case path 3
-  //   else if (path_is_empty(path_size)) {
-  //     // general case: just copy the child tree verbatim
-  //     return g.copy_current_structure(p);
-  //   }
-  //   // case (START_OBJECT, Key :: xs)
-  //   // case path 4
-  //   else if (json_token::START_OBJECT == token &&
-  //            path_match_element(path_ptr, path_size, path_instruction_type::KEY)) {
-  //     bool dirty = false;
-  //     while (json_token::END_OBJECT != p.next_token()) {
-  //       // JSON validation check
-  //       if (json_token::ERROR == p.get_current_token()) { return false; }
-
-  //       if (dirty) {
-  //         // once a match has been found we can skip other fields
-  //         if (!p.try_skip_children()) {
-  //           // JSON validation check
-  //           return false;
-  //         }
-  //       } else {
-  //         dirty = path_evaluator::evaluate_path(p, g, style, path_ptr + 1, path_size - 1);
-  //       }
-  //     }
-  //     return dirty;
-  //   }
-  //   // case (START_ARRAY, Subscript :: Wildcard :: Subscript :: Wildcard :: xs)
-  //   // case path 5
-  //   else if (json_token::START_ARRAY == token &&
-  //            path_match_elements(path_ptr,
-  //                                path_size,
-  //                                path_instruction_type::SUBSCRIPT,
-  //                                path_instruction_type::WILDCARD,
-  //                                path_instruction_type::SUBSCRIPT,
-  //                                path_instruction_type::WILDCARD)) {
-  //     // special handling for the non-structure preserving double wildcard
-  //     // behavior in Hive
-  //     bool dirty = false;
-  //     g.write_start_array();
-  //     while (p.next_token() != json_token::END_ARRAY) {
-  //       // JSON validation check
-  //       if (json_token::ERROR == p.get_current_token()) { return false; }
-
-  //       dirty |= path_evaluator::evaluate_path(
-  //         p, g, write_style::flatten_style, path_ptr + 4, path_size - 4);
-  //     }
-  //     g.write_end_array();
-  //     return dirty;
-  //   }
-  //   // case (START_ARRAY, Subscript :: Wildcard :: xs) if style != QuotedStyle
-  //   // case path 6
-  //   else if (json_token::START_ARRAY == token &&
-  //            path_match_elements(path_ptr,
-  //                                path_size,
-  //                                path_instruction_type::SUBSCRIPT,
-  //                                path_instruction_type::WILDCARD) &&
-  //            style != write_style::quoted_style) {
-  //     // retain Flatten, otherwise use Quoted... cannot use Raw within an array
-  //     write_style next_style = write_style::raw_style;
-  //     switch (style) {
-  //       case write_style::raw_style: next_style = write_style::quoted_style; break;
-  //       case write_style::flatten_style: next_style = write_style::flatten_style; break;
-  //       case write_style::quoted_style: next_style = write_style::quoted_style;  // never happen
-  //     }
-
-  //     // temporarily buffer child matches, the emitted json will need to be
-  //     // modified slightly if there is only a single element written
-
-  //     int dirty = 0;
-  //     // create a child generator with hide outer array tokens mode.
-  //     auto child_g = g.new_child_generator(/*hide_outer_array_tokens*/ true);
-
-  //     // Note: child generator does not actually write the outer start array
-  //     // token into buffer it only updates internal nested state
-  //     child_g.write_start_array();
-
-  //     while (p.next_token() != json_token::END_ARRAY) {
-  //       // JSON validation check
-  //       if (json_token::ERROR == p.get_current_token()) { return false; }
-
-  //       // track the number of array elements and only emit an outer array if
-  //       // we've written more than one element, this matches Hive's behavior
-  //       dirty +=
-  //         (path_evaluator::evaluate_path(p, child_g, next_style, path_ptr + 2, path_size - 2) ? 1
-  //                                                                                             :
-  //                                                                                             0);
-  //     }
-
-  //     // Note: child generator does not actually write the outer end array token
-  //     // into buffer it only updates internal nested state
-  //     child_g.write_end_array();
-
-  //     char* child_g_start = child_g.get_output_start_position();
-  //     size_t child_g_len  = child_g.get_output_len();  // len already excluded outer [ ]
-
-  //     if (dirty > 1) {
-  //       // add outer array tokens
-  //       g.write_child_raw_value(child_g_start, child_g_len, true);
-  //     } else if (dirty == 1) {
-  //       // remove outer array tokens
-  //       g.write_child_raw_value(child_g_start, child_g_len, false);
-  //     }  // else do not write anything
-
-  //     return dirty > 0;
-  //   }
-  //   // case (START_ARRAY, Subscript :: Wildcard :: xs)
-  //   // case path 7
-  //   else if (json_token::START_ARRAY == token &&
-  //            path_match_elements(path_ptr,
-  //                                path_size,
-  //                                path_instruction_type::SUBSCRIPT,
-  //                                path_instruction_type::WILDCARD)) {
-  //     bool dirty = false;
-  //     g.write_start_array();
-  //     while (p.next_token() != json_token::END_ARRAY) {
-  //       // JSON validation check
-  //       if (json_token::ERROR == p.get_current_token()) { return false; }
-
-  //       // wildcards can have multiple matches, continually update the dirty
-  //       // count
-  //       dirty |= path_evaluator::evaluate_path(
-  //         p, g, write_style::quoted_style, path_ptr + 2, path_size - 2);
-  //     }
-  //     g.write_end_array();
-
-  //     return dirty;
-  //   }
-  //   /* case (START_ARRAY, Subscript :: Index(idx) :: (xs@Subscript :: Wildcard :: _)) */
-  //   // case path 8
-  //   else if (json_token::START_ARRAY == token &&
-  //            thrust::get<0>(path_match_subscript_index_subscript_wildcard(path_ptr, path_size)))
-  //            {
-  //     int idx = thrust::get<1>(path_match_subscript_index_subscript_wildcard(path_ptr,
-  //     path_size)); p.next_token();
-  //     // JSON validation check
-  //     if (json_token::ERROR == p.get_current_token()) { return false; }
-
-  //     int i = idx;
-  //     while (i >= 0) {
-  //       if (p.get_current_token() == json_token::END_ARRAY) {
-  //         // terminate, nothing has been written
-  //         return false;
-  //       }
-  //       if (0 == i) {
-  //         bool dirty = path_evaluator::evaluate_path(
-  //           p, g, write_style::quoted_style, path_ptr + 2, path_size - 2);
-  //         while (p.next_token() != json_token::END_ARRAY) {
-  //           // JSON validation check
-  //           if (json_token::ERROR == p.get_current_token()) { return false; }
-
-  //           // advance the token stream to the end of the array
-  //           if (!p.try_skip_children()) { return false; }
-  //         }
-  //         return dirty;
-  //       } else {
-  //         // i > 0
-  //         if (!p.try_skip_children()) { return false; }
-
-  //         p.next_token();
-  //         // JSON validation check
-  //         if (json_token::ERROR == p.get_current_token()) { return false; }
-  //       }
-  //       --i;
-  //     }
-  //     // path parser guarantees idx >= 0
-  //     // will never reach to here
-  //     return false;
-  //   }
-  //   // case (START_ARRAY, Subscript :: Index(idx) :: xs)
-  //   // case path 9
-  //   else if (json_token::START_ARRAY == token &&
-  //            thrust::get<0>(path_match_subscript_index(path_ptr, path_size))) {
-  //     int idx = thrust::get<1>(path_match_subscript_index(path_ptr, path_size));
-  //     p.next_token();
-  //     // JSON validation check
-  //     if (json_token::ERROR == p.get_current_token()) { return false; }
-
-  //     int i = idx;
-  //     while (i >= 0) {
-  //       if (p.get_current_token() == json_token::END_ARRAY) {
-  //         // terminate, nothing has been written
-  //         return false;
-  //       }
-  //       if (0 == i) {
-  //         bool dirty = path_evaluator::evaluate_path(p, g, style, path_ptr + 2, path_size - 2);
-  //         while (p.next_token() != json_token::END_ARRAY) {
-  //           // JSON validation check
-  //           if (json_token::ERROR == p.get_current_token()) { return false; }
-
-  //           // advance the token stream to the end of the array
-  //           if (!p.try_skip_children()) { return false; }
-  //         }
-  //         return dirty;
-  //       } else {
-  //         // i > 0
-  //         if (!p.try_skip_children()) { return false; }
-
-  //         p.next_token();
-  //         // JSON validation check
-  //         if (json_token::ERROR == p.get_current_token()) { return false; }
-  //       }
-  //       --i;
-  //     }
-  //     // path parser guarantees idx >= 0
-  //     // will never reach to here
-  //     return false;
-  //   }
-  //   // case (FIELD_NAME, Named(name) :: xs) if p.getCurrentName == name
-  //   // case path 10
-  //   else if (json_token::FIELD_NAME == token &&
-  //            thrust::get<0>(path_match_named(path_ptr, path_size)) &&
-  //            p.match_current_field_name(thrust::get<1>(path_match_named(path_ptr, path_size)))) {
-  //     if (p.next_token() != json_token::VALUE_NULL) {
-  //       // JSON validation check
-  //       if (json_token::ERROR == p.get_current_token()) { return false; }
-
-  //       return path_evaluator::evaluate_path(p, g, style, path_ptr + 1, path_size - 1);
-  //     } else {
-  //       return false;
-  //     }
-  //   }
-  //   // case (FIELD_NAME, Wildcard :: xs)
-  //   // case path 11
-  //   else if (json_token::FIELD_NAME == token &&
-  //            path_match_element(path_ptr, path_size, path_instruction_type::WILDCARD)) {
-  //     p.next_token();
-  //     // JSON validation check
-  //     if (json_token::ERROR == p.get_current_token()) { return false; }
-
-  //     return path_evaluator::evaluate_path(p, g, style, path_ptr + 1, path_size - 1);
-  //   }
-  //   // case _ =>
-  //   // case path 12
-  //   else {
-  //     if (!p.try_skip_children()) { return false; }
-  //     return false;
-  //   }
-  // }
-
-  /**
-   *
-   * This function is rewritten from above commented recursive function.
-   * this function is equivalent to the above commented recursive function.
-   */
-  static __device__ bool evaluate_path(json_parser<>& p,
-                                       json_generator<>& root_g,
-                                       write_style root_style,
-                                       path_instruction const* root_path_ptr,
-                                       int root_path_size)
-  {
-    // manually maintained context stack in lieu of calling evaluate_path recursively.
-    struct context {
-      // current token
-      json_token token;
-
-      // which case path that this task is from
-      int case_path;
-
-      // used to save current generator
-      json_generator<> g;
-
-      write_style style;
-      path_instruction const* path_ptr;
-      int path_size;
-
-      // is this context task is done
-      bool task_is_done = false;
-
-      // whether written output
-      // if dirty > 0, indicates success
-      int dirty = 0;
-
-      // for some case paths
-      bool is_first_enter = true;
-
-      // used to save child JSON generator for case path 8
-      json_generator<> child_g;
-
-      __device__ context()
-        : token(json_token::INIT),
-          case_path(-1),
-          g(json_generator<>()),
-          style(write_style::raw_style),
-          path_ptr(nullptr),
-          path_size(0)
-      {
-      }
-
-      __device__ context(json_token _token,
-                         int _case_path,
-                         json_generator<> _g,
-                         write_style _style,
-                         path_instruction const* _path_ptr,
-                         int _path_size)
-        : token(_token),
-          case_path(_case_path),
-          g(_g),
-          style(_style),
-          path_ptr(_path_ptr),
-          path_size(_path_size)
-      {
-      }
-
-      __device__ context& operator=(context const&) = default;
-    };
-
-    // path max depth limitation
-    constexpr int max_path_depth = 32;
-
-    // stack
-    context stack[max_path_depth];
-    int stack_pos = 0;
-
-    // push context function
-    auto push_context = [&stack, &stack_pos](json_token _token,
-                                             int _case_path,
-                                             json_generator<> _g,
-                                             write_style _style,
-                                             path_instruction const* _path_ptr,
-                                             int _path_size) {
-      if (stack_pos == max_path_depth - 1) { return false; }
-      stack[stack_pos++] = context(_token, _case_path, _g, _style, _path_ptr, _path_size);
-      return true;
-    };
-
-    // push context function
-    auto push_ctx = [&stack, &stack_pos](context ctx) {
-      if (stack_pos == max_path_depth - 1) { return false; }
-      stack[stack_pos++] = ctx;
-      return true;
-    };
-
-    // pop context function
-    auto pop_context = [&stack, &stack_pos](context& c) {
-      if (stack_pos > 0) {
-        c = stack[--stack_pos];
-        return true;
-      }
-      return false;
-    };
-
-    // put the first context task
-    push_context(p.get_current_token(), -1, root_g, root_style, root_path_ptr, root_path_size);
-
-    // current context task
-    context ctx;
-
-    // parent context task
-    context p_ctx;
-
-    while (pop_context(ctx)) {
-      if (!ctx.task_is_done) {
-        // task is not done.
-
-        // case (VALUE_STRING, Nil) if style == RawStyle
-        // case path 1
-        if (json_token::VALUE_STRING == ctx.token && path_is_empty(ctx.path_size) &&
-            ctx.style == write_style::raw_style) {
-          // there is no array wildcard or slice parent, emit this string without
-          // quotes write current string in parser to generator
-          ctx.g.write_raw(p);
-          ctx.dirty        = 1;
-          ctx.task_is_done = true;
-          push_ctx(ctx);
-        }
-        // case (START_ARRAY, Nil) if style == FlattenStyle
-        // case path 2
-        else if (json_token::START_ARRAY == ctx.token && path_is_empty(ctx.path_size) &&
-                 ctx.style == write_style::flatten_style) {
-          // flatten this array into the parent
-          if (json_token::END_ARRAY != p.next_token()) {
-            // JSON validation check
-            if (json_token::ERROR == p.get_current_token()) { return false; }
-            // push back task
-            push_ctx(ctx);
-            // add child task
-            push_context(p.get_current_token(), 2, ctx.g, ctx.style, nullptr, 0);
-          } else {
-            // END_ARRAY
-            ctx.task_is_done = true;
-            push_ctx(ctx);
-          }
-        }
-        // case (_, Nil)
-        // case path 3
-        else if (path_is_empty(ctx.path_size)) {
-          // general case: just copy the child tree verbatim
-          if (!(ctx.g.copy_current_structure(p))) {
-            // JSON validation check
-            return false;
-          }
-          ctx.dirty        = 1;
-          ctx.task_is_done = true;
-          push_ctx(ctx);
-        }
-        // case (START_OBJECT, Key :: xs)
-        // case path 4
-        else if (json_token::START_OBJECT == ctx.token &&
-                 path_match_element(ctx.path_ptr, ctx.path_size, path_instruction_type::KEY)) {
-          if (json_token::END_OBJECT != p.next_token()) {
-            // JSON validation check
-            if (json_token::ERROR == p.get_current_token()) { return false; }
-
-            if (ctx.dirty > 0) {
-              // once a match has been found we can skip other fields
-              if (!p.try_skip_children()) {
-                // JSON validation check
-                return false;
-              }
-              push_ctx(ctx);
-            } else {
-              // need to try more children
-              push_ctx(ctx);
-              push_context(
-                p.get_current_token(), 4, ctx.g, ctx.style, ctx.path_ptr + 1, ctx.path_size - 1);
-            }
-          } else {
-            ctx.task_is_done = true;
-            push_ctx(ctx);
-          }
-        }
-        // case (START_ARRAY, Subscript :: Wildcard :: Subscript :: Wildcard :: xs)
-        // case path 5
-        else if (json_token::START_ARRAY == ctx.token &&
-                 path_match_elements(ctx.path_ptr,
-                                     ctx.path_size,
-                                     path_instruction_type::SUBSCRIPT,
-                                     path_instruction_type::WILDCARD,
-                                     path_instruction_type::SUBSCRIPT,
-                                     path_instruction_type::WILDCARD)) {
-          // special handling for the non-structure preserving double wildcard
-          // behavior in Hive
-          if (ctx.is_first_enter) {
-            ctx.is_first_enter = false;
-            ctx.g.write_start_array();
-          }
-
-          if (p.next_token() != json_token::END_ARRAY) {
-            // JSON validation check
-            if (json_token::ERROR == p.get_current_token()) { return false; }
-            push_ctx(ctx);
-            push_context(p.get_current_token(),
-                         5,
-                         ctx.g,
-                         write_style::flatten_style,
-                         ctx.path_ptr + 4,
-                         ctx.path_size - 4);
-          } else {
-            ctx.g.write_end_array();
-            ctx.task_is_done = true;
-            push_ctx(ctx);
-          }
-        }
-        // case (START_ARRAY, Subscript :: Wildcard :: xs) if style != QuotedStyle
-        // case path 6
-        else if (json_token::START_ARRAY == ctx.token &&
-                 path_match_elements(ctx.path_ptr,
-                                     ctx.path_size,
-                                     path_instruction_type::SUBSCRIPT,
-                                     path_instruction_type::WILDCARD) &&
-                 ctx.style != write_style::quoted_style) {
-          // retain Flatten, otherwise use Quoted... cannot use Raw within an array
-          write_style next_style = write_style::raw_style;
-          switch (ctx.style) {
-            case write_style::raw_style: next_style = write_style::quoted_style; break;
-            case write_style::flatten_style: next_style = write_style::flatten_style; break;
-            case write_style::quoted_style: next_style = write_style::quoted_style;  // never happen
-          }
-
-          // temporarily buffer child matches, the emitted json will need to be
-          // modified slightly if there is only a single element written
-
-          json_generator<> child_g;
-          if (ctx.is_first_enter) {
-            ctx.is_first_enter = false;
-            // create a child generator with hide outer array tokens mode.
-            child_g = ctx.g.new_child_generator();
-            // write first [ without output, without update len, only update internal state
-            child_g.write_first_start_array_without_output();
-          } else {
-            child_g = ctx.child_g;
-          }
-
-          if (p.next_token() != json_token::END_ARRAY) {
-            // JSON validation check
-            if (json_token::ERROR == p.get_current_token()) { return false; }
-
-            push_ctx(ctx);
-            // track the number of array elements and only emit an outer array if
-            // we've written more than one element, this matches Hive's behavior
-            push_context(
-              p.get_current_token(), 6, child_g, next_style, ctx.path_ptr + 2, ctx.path_size - 2);
-          } else {
-            char* child_g_start = child_g.get_output_start_position();
-            size_t child_g_len  = child_g.get_output_len();
-
-            if (ctx.dirty > 1) {
-              // add outer array tokens
-              ctx.g.write_child_raw_value(
-                child_g_start, child_g_len, /* write_outer_array_tokens */ true);
-              ctx.task_is_done = true;
-              push_ctx(ctx);
-            } else if (ctx.dirty == 1) {
-              // remove outer array tokens
-              ctx.g.write_child_raw_value(
-                child_g_start, child_g_len, /* write_outer_array_tokens */ false);
-              ctx.task_is_done = true;
-              push_ctx(ctx);
-            }  // else do not write anything
-          }
-        }
-        // case (START_ARRAY, Subscript :: Wildcard :: xs)
-        // case path 7
-        else if (json_token::START_ARRAY == ctx.token &&
-                 path_match_elements(ctx.path_ptr,
-                                     ctx.path_size,
-                                     path_instruction_type::SUBSCRIPT,
-                                     path_instruction_type::WILDCARD)) {
-          if (ctx.is_first_enter) {
-            ctx.is_first_enter = false;
-            ctx.g.write_start_array();
-          }
-
-          if (p.next_token() != json_token::END_ARRAY) {
-            // JSON validation check
-            if (json_token::ERROR == p.get_current_token()) { return false; }
-
-            // wildcards can have multiple matches, continually update the dirty
-            // count
-            push_ctx(ctx);
-            push_context(p.get_current_token(),
-                         7,
-                         ctx.g,
-                         write_style::quoted_style,
-                         ctx.path_ptr + 2,
-                         ctx.path_size - 2);
-          } else {
-            ctx.g.write_end_array();
-            ctx.task_is_done = true;
-            push_ctx(ctx);
-          }
-        }
-        /* case (START_ARRAY, Subscript :: Index(idx) :: (xs@Subscript :: Wildcard :: _)) */
-        // case path 8
-        else if (json_token::START_ARRAY == ctx.token &&
-                 thrust::get<0>(
-                   path_match_subscript_index_subscript_wildcard(ctx.path_ptr, ctx.path_size))) {
-          int idx = thrust::get<1>(
-            path_match_subscript_index_subscript_wildcard(ctx.path_ptr, ctx.path_size));
-
-          p.next_token();
-          // JSON validation check
-          if (json_token::ERROR == p.get_current_token()) { return false; }
-          ctx.is_first_enter = false;
-
-          int i = idx;
-          while (i > 0) {
-            if (p.get_current_token() == json_token::END_ARRAY) {
-              // terminate, nothing has been written
-              return false;
-            }
-
-            if (!p.try_skip_children()) { return false; }
-
-            p.next_token();
-            // JSON validation check
-            if (json_token::ERROR == p.get_current_token()) { return false; }
-
-            --i;
-          }
-
-          // i == 0
-          push_ctx(ctx);
-          push_context(p.get_current_token(),
-                       8,
-                       ctx.g,
-                       write_style::quoted_style,
-                       ctx.path_ptr + 2,
-                       ctx.path_size - 2);
-        }
-        // case (START_ARRAY, Subscript :: Index(idx) :: xs)
-        // case path 9
-        else if (json_token::START_ARRAY == ctx.token &&
-                 thrust::get<0>(path_match_subscript_index(ctx.path_ptr, ctx.path_size))) {
-          int idx = thrust::get<1>(path_match_subscript_index(ctx.path_ptr, ctx.path_size));
-
-          p.next_token();
-          // JSON validation check
-          if (json_token::ERROR == p.get_current_token()) { return false; }
-
-          int i = idx;
-          while (i > 0) {
-            if (p.get_current_token() == json_token::END_ARRAY) {
-              // terminate, nothing has been written
-              return false;
-            }
-
-            if (!p.try_skip_children()) { return false; }
-
-            p.next_token();
-            // JSON validation check
-            if (json_token::ERROR == p.get_current_token()) { return false; }
-
-            --i;
-          }
-
-          // i == 0
-          push_ctx(ctx);
-          push_context(
-            p.get_current_token(), 9, ctx.g, ctx.style, ctx.path_ptr + 2, ctx.path_size - 2);
-        }
-        // case (FIELD_NAME, Named(name) :: xs) if p.getCurrentName == name
-        // case path 10
-        else if (json_token::FIELD_NAME == ctx.token &&
-                 thrust::get<0>(path_match_named(ctx.path_ptr, ctx.path_size)) &&
-                 p.match_current_field_name(
-                   thrust::get<1>(path_match_named(ctx.path_ptr, ctx.path_size)))) {
-          if (p.next_token() != json_token::VALUE_NULL) {
-            // JSON validation check
-            if (json_token::ERROR == p.get_current_token()) { return false; }
-            push_ctx(ctx);
-            push_context(
-              p.get_current_token(), 10, ctx.g, ctx.style, ctx.path_ptr + 1, ctx.path_size - 1);
-          } else {
-            return false;
-          }
-        }
-        // case (FIELD_NAME, Wildcard :: xs)
-        // case path 11
-        else if (json_token::FIELD_NAME == ctx.token &&
-                 path_match_element(ctx.path_ptr, ctx.path_size, path_instruction_type::WILDCARD)) {
-          p.next_token();
-          // JSON validation check
-          if (json_token::ERROR == p.get_current_token()) { return false; }
-          push_ctx(ctx);
-          push_context(
-            p.get_current_token(), 11, ctx.g, ctx.style, ctx.path_ptr + 1, ctx.path_size - 1);
-        }
-        // case _ =>
-        // case path 12
-        else {
-          if (!p.try_skip_children()) { return false; }
-          // default case path, return false for this task
-          ctx.dirty        = 0;
-          ctx.task_is_done = true;
-          push_ctx(ctx);
-        }
-      } else {
-        // current context is done.
-
-        // pop parent task
-        // update parent task info according to current task result
-        if (pop_context(p_ctx)) {
-          // case (VALUE_STRING, Nil) if style == RawStyle
-          // case path 1
-          if (1 == ctx.case_path) {
-            // never happen
-          }
-          // case (START_ARRAY, Nil) if style == FlattenStyle
-          // case path 2
-          else if (2 == ctx.case_path) {
-            // collect result from child task
-            p_ctx.dirty += ctx.dirty;
-            // copy generator states to parent task;
-            p_ctx.g = ctx.g;
-            push_ctx(p_ctx);
-          }
-          // case (_, Nil)
-          // case path 3
-          else if (3 == ctx.case_path) {
-            // never happen
-          }
-          // case (START_OBJECT, Key :: xs)
-          // case path 4
-          else if (4 == ctx.case_path) {
-            if (p_ctx.dirty < 1 && ctx.dirty > 0) { p_ctx.dirty = ctx.dirty; }
-            // copy generator states to parent task;
-            p_ctx.g = ctx.g;
-            push_ctx(p_ctx);
-          }
-          // case (START_ARRAY, Subscript :: Wildcard :: Subscript :: Wildcard :: xs)
-          // case path 5
-          else if (5 == ctx.case_path) {
-            // collect result from child task
-            p_ctx.dirty += ctx.dirty;
-            // copy generator states to parent task;
-            p_ctx.g = ctx.g;
-            push_ctx(p_ctx);
-          }
-          // case (START_ARRAY, Subscript :: Wildcard :: xs) if style != QuotedStyle
-          // case path 6
-          else if (6 == ctx.case_path) {
-            // collect result from child task
-            p_ctx.dirty += ctx.dirty;
-            // update child generator for parent task
-            p_ctx.child_g = ctx.g;
-            push_ctx(p_ctx);
-          }
-          // case (START_ARRAY, Subscript :: Wildcard :: xs)
-          // case path 7
-          else if (7 == ctx.case_path) {
-            // collect result from child task
-            p_ctx.dirty += ctx.dirty;
-            // copy generator states to parent task;
-            p_ctx.g = ctx.g;
-            push_ctx(p_ctx);
-          }
-          /* case (START_ARRAY, Subscript :: Index(idx) :: (xs@Subscript :: Wildcard :: _)) */
-          // case path 8
-          // case (START_ARRAY, Subscript :: Index(idx) :: xs)
-          // case path 9
-          else if (8 == ctx.case_path || 9 == ctx.case_path) {
-            // collect result from child task
-            p_ctx.dirty += ctx.dirty;
-
-            // post logic:
-            while (p.next_token() != json_token::END_ARRAY) {
-              // JSON validation check
-              if (json_token::ERROR == p.get_current_token()) { return false; }
-              // advance the token stream to the end of the array
-              if (!p.try_skip_children()) { return false; }
-            }
-            // task is done
-            p_ctx.task_is_done = true;
-            // copy generator states to parent task;
-            p_ctx.g = ctx.g;
-            push_ctx(p_ctx);
-          }
-          // case (FIELD_NAME, Named(name) :: xs) if p.getCurrentName == name
-          // case path 10
-          else if (10 == ctx.case_path) {
-            // collect result from child task
-            p_ctx.dirty += ctx.dirty;
-            // task is done
-            p_ctx.task_is_done = true;
-            // copy generator states to parent task;
-            p_ctx.g = ctx.g;
-            push_ctx(p_ctx);
-          }
-          // case (FIELD_NAME, Wildcard :: xs)
-          // case path 11
-          else if (11 == ctx.case_path) {
-            // collect result from child task
-            p_ctx.dirty += ctx.dirty;
-            // task is done
-            p_ctx.task_is_done = true;
-            // copy generator states to parent task;
-            p_ctx.g = ctx.g;
-            push_ctx(p_ctx);
-          }
-          // case _ =>
-          // case path 12
-          else {
-            // never happen
-          }
-        } else {
-          // has no parent task, stack is empty, will exit
-        }
-      }
-    }
-
-    // copy output len
-    root_g.set_output_len(ctx.g.get_output_len());
-    return ctx.dirty > 0;
-  }
-};
-
-}  // namespace detail
-
 /**
  * Extracts json object from a json string based on json path specified, and
  * returns json string of the extracted json object. It will return null if the
diff --git a/src/main/cpp/src/json_parser.hpp b/src/main/cpp/src/json_parser.cuh
similarity index 91%
rename from src/main/cpp/src/json_parser.hpp
rename to src/main/cpp/src/json_parser.cuh
index 65c735d1ff..d6fb0df687 100644
--- a/src/main/cpp/src/json_parser.hpp
+++ b/src/main/cpp/src/json_parser.cuh
@@ -40,7 +40,7 @@ enum class write_style {
 
 // allow single quotes to represent strings in JSON
 // e.g.: {'k': 'v'} is valid when it's true
-constexpr bool curr_allow_single_quotes = true;
+constexpr bool allow_single_quotes = true;
 
 // Whether allow unescaped control characters in JSON Strings.
 // Unescaped control characters are ASCII characters with value less than 32,
@@ -50,17 +50,20 @@ constexpr bool curr_allow_single_quotes = true;
 // e.g., how to represent carriage return and newline characters:
 //   if true, allow "\n\r" two control characters without escape directly
 //   if false, "\n\r" are not allowed, should use escape characters: "\\n\\r"
-constexpr bool curr_allow_unescaped_control_chars = true;
+constexpr bool allow_unescaped_control_chars = true;
 
-// deep JSON nesting depth will consume more memory, we can tuning this in
-// future. we ever run into a limit of 254, here use a small value 64.
-constexpr int curr_max_json_nesting_depth = 64;
+/**
+ * @brief Maximum JSON nesting depth
+ * JSON with a greater depth is invalid
+ * If set this to be a greater value, should update `context_stack`
+ */
+constexpr int max_json_nesting_depth = 64;
 
 // Define the maximum JSON String length, counts utf8 bytes.
 // By default, maximum JSON String length is negative one, means no
 // limitation. e.g.: The length of String "\\n" is 1, JSON parser does not
 // count escape characters.
-constexpr int curr_max_string_utf8_bytes = 20000000;
+constexpr int max_string_utf8_bytes = 20000000;
 
 //
 /**
@@ -73,7 +76,7 @@ constexpr int curr_max_string_utf8_bytes = 20000000;
  * e.g.: The length of number -123.45e-67 is 7. if maximum JSON number length
  * is 6, then this number is a invalid number.
  */
-constexpr int curr_max_num_len = 1000;
+constexpr int max_num_len = 1000;
 
 /**
  * whether allow tailing useless sub-string in JSON.
@@ -82,7 +85,7 @@ constexpr int curr_max_num_len = 1000;
  * 'v'} is valid.
  *   {'k' : 'v'}_extra_tail_sub_string
  */
-constexpr bool curr_allow_tailing_sub_string = true;
+constexpr bool allow_tailing_sub_string = true;
 
 /**
  * JSON token enum
@@ -175,12 +178,6 @@ enum class json_token {
  * range: [0, 32)
  *
  */
-template <bool allow_single_quotes           = curr_allow_single_quotes,
-          bool allow_unescaped_control_chars = curr_allow_unescaped_control_chars,
-          int max_json_nesting_depth         = curr_max_json_nesting_depth,
-          int max_string_utf8_bytes          = curr_max_string_utf8_bytes,
-          int max_num_len                    = curr_max_num_len,
-          bool allow_tailing_sub_string      = curr_allow_tailing_sub_string>
 class json_parser {
  public:
   __device__ inline json_parser(char const* const _json_start_pos, cudf::size_type const _json_len)
@@ -191,6 +188,37 @@ class json_parser {
   }
 
  private:
+  /**
+   * @brief get the bit value for specified bit from a int64 number
+   */
+  __device__ inline bool get_bit_value(int64_t number, int bitIndex)
+  {
+    // Shift the number right by the bitIndex to bring the desired bit to the rightmost position
+    long shifted = number >> bitIndex;
+
+    // Extract the rightmost bit by performing a bitwise AND with 1
+    bool bit_value = shifted & 1;
+
+    return bit_value;
+  }
+
+  /**
+   * @brief set the bit value for specified bit to a int64 number
+   */
+  __device__ inline void set_bit_value(int64_t& number, int bit_index, bool bit_value)
+  {
+    // Create a mask with a 1 at the desired bit index
+    long mask = 1L << bit_index;
+
+    if (bit_value) {
+      // Set the bit to 1 by performing a bitwise OR with the mask
+      number |= mask;
+    } else {
+      // Set the bit to 0 by performing a bitwise AND with the complement of the mask
+      number &= ~mask;
+    }
+  }
+
   /**
    * is current position EOF
    */
@@ -258,8 +286,9 @@ class json_parser {
    */
   __device__ inline void push_context(json_token token)
   {
-    bool v                      = json_token::START_OBJECT == token ? true : false;
-    context_stack[stack_size++] = v;
+    bool v = json_token::START_OBJECT == token ? true : false;
+    set_bit_value(context_stack, stack_size, v);
+    stack_size++;
   }
 
   /**
@@ -267,7 +296,10 @@ class json_parser {
    * true is object, false is array
    * only has two contexts: object or array
    */
-  __device__ inline bool is_object_context() { return context_stack[stack_size - 1]; }
+  __device__ inline bool is_object_context()
+  {
+    return get_bit_value(context_stack, stack_size - 1);
+  }
 
   /**
    * pop top context from stack
@@ -1041,24 +1073,16 @@ class json_parser {
    */
   __device__ inline void parse_number()
   {
-    // reset the float parts
-    float_integer_len  = 0;
-    float_fraction_len = 0;
-    float_exp_len      = 0;
-    float_exp_has_sign = false;
-
     // parse sign
-    if (try_skip(curr_pos, '-')) {
-      float_sign = false;
-    } else {
-      float_sign = true;
-    }
-    float_integer_pos = curr_pos;
+    try_skip(curr_pos, '-');
 
     // parse unsigned number
     bool is_float = false;
-    if (try_unsigned_number(is_float)) {
-      if (check_max_num_len()) {
+    // store number digits length
+    // e.g.: +1.23e-45 length is 5
+    int number_digits_length = 0;
+    if (try_unsigned_number(is_float, number_digits_length)) {
+      if (check_max_num_len(number_digits_length)) {
         curr_token = (is_float ? json_token::VALUE_NUMBER_FLOAT : json_token::VALUE_NUMBER_INT);
         // success parsed a number, update the token length
         number_token_len = curr_pos - current_token_start_pos;
@@ -1071,21 +1095,16 @@ class json_parser {
   }
 
   /**
-   * verify max number length if enabled
-   * e.g.: -1.23e-456, int len is 1, fraction len is 2, exp digits len is 3
+   * verify max number digits length if enabled
+   * e.g.: +1.23e-45 length is 5
    */
-  __device__ inline bool check_max_num_len()
+  __device__ inline bool check_max_num_len(int number_digits_length)
   {
-    // exp part contains + or - sign char, do not count the exp sign
-    int exp_digit_len = float_exp_len;
-    if (float_exp_len > 0 && float_exp_has_sign) { exp_digit_len--; }
-
-    int sum_len = float_integer_len + float_fraction_len + exp_digit_len;
     return
       // disabled num len check
       max_num_len <= 0 ||
       // enabled num len check
-      (max_num_len > 0 && sum_len <= max_num_len);
+      (max_num_len > 0 && number_digits_length <= max_num_len);
   }
 
   /**
@@ -1106,20 +1125,20 @@ class json_parser {
    *
    * @param[out] is_float, if contains `.` or `e`, set true
    */
-  __device__ inline bool try_unsigned_number(bool& is_float)
+  __device__ inline bool try_unsigned_number(bool& is_float, int& number_digits_length)
   {
     if (!eof(curr_pos)) {
       char c = *curr_pos;
       if (c >= '1' && c <= '9') {
         curr_pos++;
-        float_integer_len++;
+        number_digits_length++;
         // first digit is [1-9]
         // path: INT = [1-9] [0-9]*
-        float_integer_len += skip_zero_or_more_digits();
-        return parse_number_from_fraction(is_float);
+        number_digits_length += skip_zero_or_more_digits();
+        return parse_number_from_fraction(is_float, number_digits_length);
       } else if (c == '0') {
         curr_pos++;
-        float_integer_len++;
+        number_digits_length++;
 
         // check leading zeros
         if (!eof(curr_pos)) {
@@ -1132,7 +1151,7 @@ class json_parser {
 
         // first digit is [0]
         // path: INT = '0'
-        return parse_number_from_fraction(is_float);
+        return parse_number_from_fraction(is_float, number_digits_length);
       } else {
         // first digit is non [0-9]
         return false;
@@ -1147,22 +1166,21 @@ class json_parser {
    * parse: ('.' [0-9]+)? EXP?
    * @param[is_float] is float
    */
-  __device__ inline bool parse_number_from_fraction(bool& is_float)
+  __device__ inline bool parse_number_from_fraction(bool& is_float, int& number_digits_length)
   {
     // parse fraction
     if (try_skip(curr_pos, '.')) {
       // has fraction
-      float_fraction_pos = curr_pos;
-      is_float           = true;
+      is_float = true;
       // try pattern: [0-9]+
-      if (!try_skip_one_or_more_digits(float_fraction_len)) { return false; }
+      if (!try_skip_one_or_more_digits(number_digits_length)) { return false; }
     }
 
     // parse exp
     if (!eof(curr_pos) && (*curr_pos == 'e' || *curr_pos == 'E')) {
       curr_pos++;
       is_float = true;
-      return try_parse_exp();
+      return try_parse_exp(number_digits_length);
     }
 
     return true;
@@ -1192,12 +1210,12 @@ class json_parser {
    * try skip one or more [0-9]
    * @param[out] len: skipped num of digits
    */
-  __device__ inline bool try_skip_one_or_more_digits(int& len)
+  __device__ inline bool try_skip_one_or_more_digits(int& number_digits_length)
   {
     if (!eof(curr_pos) && is_digit(*curr_pos)) {
       curr_pos++;
-      len++;
-      len += skip_zero_or_more_digits();
+      number_digits_length++;
+      number_digits_length += skip_zero_or_more_digits();
       return true;
     } else {
       return false;
@@ -1208,21 +1226,15 @@ class json_parser {
    * parse [eE][+-]?[0-9]+
    * @param[out] exp_len exp len
    */
-  __device__ inline bool try_parse_exp()
+  __device__ inline bool try_parse_exp(int& number_digits_length)
   {
     // already parsed [eE]
 
-    float_exp_pos = curr_pos;
-
     // parse [+-]?
-    if (!eof(curr_pos) && (*curr_pos == '+' || *curr_pos == '-')) {
-      float_exp_len++;
-      curr_pos++;
-      float_exp_has_sign = true;
-    }
+    if (!eof(curr_pos) && (*curr_pos == '+' || *curr_pos == '-')) { curr_pos++; }
 
     // parse [0-9]+
-    return try_skip_one_or_more_digits(float_exp_len);
+    return try_skip_one_or_more_digits(number_digits_length);
   }
 
   // =========== Parse number end ===========
@@ -1654,21 +1666,6 @@ class json_parser {
     stack_size = 0;
   }
 
-  /**
-   * get float parts, current token should be VALUE_NUMBER_FLOAT.
-   */
-  __device__ thrust::tuple<bool, char const*, int, char const*, int, char const*, int>
-  get_current_float_parts()
-  {
-    return thrust::make_tuple(float_sign,
-                              float_integer_pos,
-                              float_integer_len,
-                              float_fraction_pos,
-                              float_fraction_len,
-                              float_exp_pos,
-                              float_exp_len);
-  }
-
   /**
    * match field name string when current token is FIELD_NAME,
    * return true if current token is FIELD_NAME and match successfully.
@@ -1789,44 +1786,27 @@ class json_parser {
   char const* curr_pos;
   json_token curr_token{json_token::INIT};
 
-  // saves the nested contexts: JSON object context or JSON array context
-  // true is JSON object context; false is JSON array context
-  // When encounter EOF and this stack is non-empty, means non-closed JSON
-  // object/array, then parsing will fail.
-  bool context_stack[max_json_nesting_depth];
+  // 64 bits long saves the nested object/array contexts
+  // true(bit value 1) is JSON object context
+  // false(bit value 0) is JSON array context
+  // JSON parser checks array/object are mached, e.g.: [1,2) are wrong
+  int64_t context_stack;
   int stack_size = 0;
 
-  // save current token start pos, used by coping current row text
+  // save current token start pos, used by coping current token text
   char const* current_token_start_pos;
-  // used to copy int/float string verbatim, note: int/float have no escape
-  // chars
+  // used to store number token length
   cudf::size_type number_token_len;
 
-  // The following variables record number token informations.
-  // if current token is int/float, use the following variables to save
-  // float parts e.g.: -123.000456E-000789, sign is false; integer part is 123;
-  // fraction part is 000456; exp part is -000789. The following parts is used
-  // by normalization, e.g.: 0.001 => 1E-3
-  bool float_sign;
-  char const* float_integer_pos;
-  int float_integer_len;
-  char const* float_fraction_pos;
-  int float_fraction_len;
-  char const* float_exp_pos;
-  int float_exp_len;
-  // true indicates has '-' or '+' in the exp part;
-  // the exp sign char is not counted when checking the max number length
-  bool float_exp_has_sign;
-
   // Records string/field name token utf8 bytes size after unescaped
-  // e.g.: For JSON string "\\n", after unescaped, it ues 1 byte '\n'
-  // used by `write_unescaped_text` and `write_escaped_text` bytes
+  // e.g.: For JSON 4 chars string "\\n", after unescaped, get 1 char '\n'
   // used by checking the max string length
   int string_token_utf8_bytes;
-  // Records bytes diff for escape writing
-  // e.g.: "\\n" string_token_utf8_bytes is 1,
-  // when `write_escaped_text` bytes is 4: " \ n "
-  // this diff will be 4 - 1 = 3;
+
+  // Records bytes diff between escape writing and unescape writing
+  // e.g.: 4 chars string "\\n", string_token_utf8_bytes is 1,
+  // when `write_escaped_text`, will write out 4 chars: " \ n ",
+  // then this diff will be 4 - 1 = 3
   int bytes_diff_for_escape_writing;
 };
 
diff --git a/src/main/java/com/nvidia/spark/rapids/jni/JSONUtils.java b/src/main/java/com/nvidia/spark/rapids/jni/JSONUtils.java
index bd034651b7..4ff9c91a3f 100644
--- a/src/main/java/com/nvidia/spark/rapids/jni/JSONUtils.java
+++ b/src/main/java/com/nvidia/spark/rapids/jni/JSONUtils.java
@@ -23,6 +23,9 @@ public class JSONUtils {
     NativeDepsLoader.loadNativeDeps();
   }
 
+  // Keep the same with `max_path_depth` in `get_json_object.cu'
+  public static final int MAX_PATH_DEPTH = 16;
+
   public enum PathInstructionType {
     SUBSCRIPT,
     WILDCARD,
diff --git a/src/test/java/com/nvidia/spark/rapids/jni/GetJsonObjectTest.java b/src/test/java/com/nvidia/spark/rapids/jni/GetJsonObjectTest.java
index a5a519363b..b5379baa58 100644
--- a/src/test/java/com/nvidia/spark/rapids/jni/GetJsonObjectTest.java
+++ b/src/test/java/com/nvidia/spark/rapids/jni/GetJsonObjectTest.java
@@ -96,13 +96,11 @@ void getJsonObjectTest4() {
         keyPath(), namedPath("k5"),
         keyPath(), namedPath("k6"),
         keyPath(), namedPath("k7"),
-        keyPath(), namedPath("k8"),
-        keyPath(), namedPath("k9"),
-        keyPath(), namedPath("k10")
+        keyPath(), namedPath("k8")
     };
 
-    String JSON = "{\"k1\":{\"k2\":{\"k3\":{\"k4\":{\"k5\":{\"k6\":{\"k7\":{\"k8\":{\"k9\":{\"k10\":\"v10\"}}}}}}}}}}";
-    String expectedStr = "v10";
+    String JSON = "{\"k1\":{\"k2\":{\"k3\":{\"k4\":{\"k5\":{\"k6\":{\"k7\":{\"k8\":\"v8\"}}}}}}}}";
+    String expectedStr = "v8";
     try (
         ColumnVector jsonCv = ColumnVector.fromStrings(
             JSON, JSON, JSON, JSON, JSON, JSON, JSON);

From 71d7a938188bb9e7c0af5a714e26d68d1e2ac9f5 Mon Sep 17 00:00:00 2001
From: Tim Liu <timl@nvidia.com>
Date: Thu, 11 Apr 2024 11:13:00 +0800
Subject: [PATCH 4/6] Update comments and fix typo (#1954)

Signed-off-by: Tim Liu <timl@nvidia.com>
---
 .github/workflows/auto-merge.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/auto-merge.yml b/.github/workflows/auto-merge.yml
index 417d24d67e..c6de34764b 100755
--- a/.github/workflows/auto-merge.yml
+++ b/.github/workflows/auto-merge.yml
@@ -42,9 +42,11 @@ jobs:
           git config user.email "70000568+nvauto@users.noreply.github.com "
           git fetch origin ${HEAD} ${BASE}
           git checkout -b ${INTERMEDIATE_HEAD} origin/${HEAD}
-          # Use commits from HEAD, but keek submodule files(FILE_USE_BASE) from BASE
+          # Sync the $BASE branch with the commits from the $HEAD branch,
+          # excluding the paths defined as $FILE_USE_BASE (located under ./thirdparty).
           git checkout origin/${BASE} -- ${FILE_USE_BASE}
-          # If any submodule file is updaged from HEAD, always change to BASE ones
+          # If any submodule file is updated in the HEAD branch,
+          # always change it to the corresponding one from the BASE branch.
           [ ! -z "$(git status --porcelain=v1 --untracked=no)" ] && \
             git commit -s -am "Auto-merge use ${BASE} versions"
           git push origin ${INTERMEDIATE_HEAD} -f

From abf48f3b6926360cd2fb986ee4e8329c77c1f2a9 Mon Sep 17 00:00:00 2001
From: Haoyang Li <haoyangl@nvidia.com>
Date: Thu, 11 Apr 2024 17:35:14 +0800
Subject: [PATCH 5/6] Fix getJsonObject number normalization 0.0 <=> inf bug
 (#1944)

* Move device code in get_json_object to cu or cuh (#1915)

Signed-off-by: Haoyang Li <haoyangl@nvidia.com>

* Refactor/simplify json generator

Signed-off-by: Chong Gao <res_life@163.com>

* Remove purge non-empty nulls step

* Refactor josn parser: remove useless variables

* Use 64 bits to store JSON nest depth context to save memory

* Refine push/pop/peek logic

* Refactor: use less functions; change max path length from 32 to 8

* Fix nvbug: get-json-obj get incorrect result on some GPUs(H100, V100)

* Revert "Fix nvbug: get-json-obj get incorrect result on some GPUs(H100, V100)"

This reverts commit ff2db7e0b7f623e381fe937f019fefd68501251e.

* Fix a redefined variable

* Update MAX_PATH_DEPTH = 8 in Java to keep consistent

* Fix test cases; Change max path depth from 8 to 16

* Minor change: add pragma once in header

* Fix a bug in string_to_float and move it to jni temporarily

Signed-off-by: Haoyang Li <haoyangl@nvidia.com>

* Address comments

Signed-off-by: Haoyang Li <haoyangl@nvidia.com>

---------

Signed-off-by: Haoyang Li <haoyangl@nvidia.com>
Signed-off-by: Chong Gao <res_life@163.com>
Co-authored-by: Chong Gao <res_life@163.com>
---
 src/main/cpp/src/json_parser.cuh              |  18 ++-
 src/main/cpp/src/string_to_float_cudf.cuh     | 141 ++++++++++++++++++
 .../spark/rapids/jni/GetJsonObjectTest.java   |   4 +
 3 files changed, 155 insertions(+), 8 deletions(-)
 create mode 100644 src/main/cpp/src/string_to_float_cudf.cuh

diff --git a/src/main/cpp/src/json_parser.cuh b/src/main/cpp/src/json_parser.cuh
index d6fb0df687..ec0790aa6b 100644
--- a/src/main/cpp/src/json_parser.cuh
+++ b/src/main/cpp/src/json_parser.cuh
@@ -16,6 +16,7 @@
 #pragma once
 
 #include "ftos_converter.cuh"
+#include "string_to_float_cudf.cuh"
 
 #include <cudf/strings/detail/convert/string_to_float.cuh>
 #include <cudf/strings/string_view.hpp>
@@ -1512,12 +1513,12 @@ class json_parser {
         return number_token_len;
       case json_token::VALUE_NUMBER_FLOAT: {
         // number normalization:
-        // 0.03E-2 => 0.3E-5; infinity;
-        // 200.000 => 200.0, 351.980 => 351.98, 12345678900000000000.0
-        // => 1.23456789E19 0.0000000000003 => 3.0E-13; 0.003 => 0.003; 0.0003
-        // => 3.0E-4 leverage function: `get_current_float_parts`
-        double d_value =
-          cudf::strings::detail::stod(cudf::string_view(current_token_start_pos, number_token_len));
+        // 0.03E-2 => 0.3E-5, 200.000 => 200.0, 351.980 => 351.98,
+        // 12345678900000000000.0 => 1.23456789E19, 1E308 => 1.0E308
+        // 0.0000000000003 => 3.0E-13; 0.003 => 0.003; 0.0003 => 3.0E-4
+        // 1.0E309 => "Infinity", -1E309 => "-Infinity"
+        double d_value = spark_rapids_jni::detail::stod(
+          cudf::string_view(current_token_start_pos, number_token_len));
         return spark_rapids_jni::ftos_converter::double_normalization(d_value, destination);
       }
       case json_token::VALUE_TRUE:
@@ -1601,8 +1602,9 @@ class json_parser {
         }
         return number_token_len;
       case json_token::VALUE_NUMBER_FLOAT: {
-        double d_value =
-          cudf::strings::detail::stod(cudf::string_view(current_token_start_pos, number_token_len));
+        // number normalization:
+        double d_value = spark_rapids_jni::detail::stod(
+          cudf::string_view(current_token_start_pos, number_token_len));
         return spark_rapids_jni::ftos_converter::double_normalization(d_value, destination);
       }
       case json_token::VALUE_TRUE:
diff --git a/src/main/cpp/src/string_to_float_cudf.cuh b/src/main/cpp/src/string_to_float_cudf.cuh
new file mode 100644
index 0000000000..5a7824d495
--- /dev/null
+++ b/src/main/cpp/src/string_to_float_cudf.cuh
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/strings/detail/convert/is_float.cuh>
+#include <cudf/strings/string_view.cuh>
+
+#include <cmath>
+#include <limits>
+
+namespace spark_rapids_jni {
+namespace detail {
+
+/**
+ * @brief This function converts the given string into a
+ * floating point double value.
+ *
+ * This will also map strings containing "NaN", "Inf", etc.
+ * to the appropriate float values.
+ *
+ * This function will also handle scientific notation format.
+ *
+ * This function is a copy of cudf::strings::detail::stod with
+ * the namespace changed to spark_rapids_jni::detail and fixed
+ * an overflow bug of `exp_ten`. It is a short-term solution to
+ * resolve a bug in get_json_object. We should remove this file
+ * once the bug is fixed in cudf in long term.
+ * This diff is `if (exp_ten >= 1e8) break;`
+ */
+__device__ inline double stod(cudf::string_view const& d_str)
+{
+  char const* in_ptr = d_str.data();
+  char const* end    = in_ptr + d_str.size_bytes();
+  if (end == in_ptr) return 0.0;
+  double sign{1.0};
+  if (*in_ptr == '-' || *in_ptr == '+') {
+    sign = (*in_ptr == '-' ? -1 : 1);
+    ++in_ptr;
+  }
+
+  constexpr double infinity      = std::numeric_limits<double>::infinity();
+  constexpr uint64_t max_holding = (std::numeric_limits<uint64_t>::max() - 9L) / 10L;
+
+  // special strings: NaN, Inf
+  if ((in_ptr < end) && *in_ptr > '9') {
+    auto const inf_nan = cudf::string_view(in_ptr, static_cast<cudf::size_type>(end - in_ptr));
+    if (cudf::strings::detail::is_nan_str(inf_nan)) { return nan(""); }
+    if (cudf::strings::detail::is_inf_str(inf_nan)) { return sign * infinity; }
+  }
+
+  // Parse and store the mantissa as much as we can,
+  // until we are about to exceed the limit of uint64_t
+  uint64_t digits = 0;
+  int exp_off     = 0;
+  bool decimal    = false;
+  while (in_ptr < end) {
+    char ch = *in_ptr;
+    if (ch == '.') {
+      decimal = true;
+      ++in_ptr;
+      continue;
+    }
+    if (ch < '0' || ch > '9') break;
+    if (digits > max_holding)
+      exp_off += (int)!decimal;
+    else {
+      digits = (digits * 10L) + static_cast<uint64_t>(ch - '0');
+      if (digits > max_holding) {
+        digits = digits / 10L;
+        exp_off += (int)!decimal;
+      } else
+        exp_off -= (int)decimal;
+    }
+    ++in_ptr;
+  }
+  if (digits == 0) { return sign * static_cast<double>(0); }
+
+  // check for exponent char
+  int exp_ten  = 0;
+  int exp_sign = 1;
+  if (in_ptr < end) {
+    char ch = *in_ptr++;
+    if (ch == 'e' || ch == 'E') {
+      if (in_ptr < end) {
+        ch = *in_ptr;
+        if (ch == '-' || ch == '+') {
+          exp_sign = (ch == '-' ? -1 : 1);
+          ++in_ptr;
+        }
+        while (in_ptr < end) {
+          ch = *in_ptr++;
+          if (ch < '0' || ch > '9') break;
+          exp_ten = (exp_ten * 10) + (int)(ch - '0');
+          if (exp_ten >= 1e8) break;
+        }
+      }
+    }
+  }
+
+  int const num_digits = static_cast<int>(log10(static_cast<double>(digits))) + 1;
+  exp_ten *= exp_sign;
+  exp_ten += exp_off;
+  exp_ten += num_digits - 1;
+  if (exp_ten > std::numeric_limits<double>::max_exponent10) {
+    return sign > 0 ? infinity : -infinity;
+  }
+
+  double base = sign * static_cast<double>(digits);
+
+  exp_ten += 1 - num_digits;
+  // If 10^exp_ten would result in a subnormal value, the base and
+  // exponent should be adjusted so that 10^exp_ten is a normal value
+  auto const subnormal_shift = std::numeric_limits<double>::min_exponent10 - exp_ten;
+  if (subnormal_shift > 0) {
+    // Handle subnormal values. Ensure that both base and exponent are
+    // normal values before computing their product.
+    base = base / exp10(static_cast<double>(num_digits - 1 + subnormal_shift));
+    exp_ten += num_digits - 1;  // adjust exponent
+    auto const exponent = exp10(static_cast<double>(exp_ten + subnormal_shift));
+    return base * exponent;
+  }
+
+  double const exponent = exp10(static_cast<double>(std::abs(exp_ten)));
+  return exp_ten < 0 ? base / exponent : base * exponent;
+}
+
+}  // namespace detail
+}  // namespace spark_rapids_jni
diff --git a/src/test/java/com/nvidia/spark/rapids/jni/GetJsonObjectTest.java b/src/test/java/com/nvidia/spark/rapids/jni/GetJsonObjectTest.java
index b5379baa58..ea23c4c9ba 100644
--- a/src/test/java/com/nvidia/spark/rapids/jni/GetJsonObjectTest.java
+++ b/src/test/java/com/nvidia/spark/rapids/jni/GetJsonObjectTest.java
@@ -200,6 +200,8 @@ void getJsonObjectTest_Number_Normalization() {
             "[-0.0]",
             "[-0]",
             "[12345678999999999999999999]",
+            "[9.299999257686047e-0005603333574677677]",
+            "9.299999257686047e0005603333574677677",
             "[1E308]",
             "[1.0E309,-1E309,1E5000]",
             "0.3",
@@ -214,6 +216,8 @@ void getJsonObjectTest_Number_Normalization() {
             "[-0.0]",
             "[0]",
             "[12345678999999999999999999]",
+            "[0.0]",
+            "\"Infinity\"",
             "[1.0E308]",
             "[\"Infinity\",\"-Infinity\",\"Infinity\"]",
             "0.3",

From 13c82f5824c1f9ac222eeedb7912c6789afd2edf Mon Sep 17 00:00:00 2001
From: Jenkins Automation <70000568+nvauto@users.noreply.github.com>
Date: Thu, 11 Apr 2024 20:40:45 +0800
Subject: [PATCH 6/6] Update submodule cudf to
 578c240a20049a5c4b83b08f54c235aad5318f1a (#1956)

Signed-off-by: spark-rapids automation <70000568+nvauto@users.noreply.github.com>
---
 thirdparty/cudf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/thirdparty/cudf b/thirdparty/cudf
index 94726ad056..578c240a20 160000
--- a/thirdparty/cudf
+++ b/thirdparty/cudf
@@ -1 +1 @@
-Subproject commit 94726ad056e2473c836f47d310e2584bdf44d1f9
+Subproject commit 578c240a20049a5c4b83b08f54c235aad5318f1a