From b15ce9126dcc596c953fe202e6834cebdfca6080 Mon Sep 17 00:00:00 2001
From: Laughing <LouWazor@gmail.com>
Date: Wed, 3 Jul 2024 09:12:54 +0800
Subject: [PATCH] HIVE-28262:Single column use MultiDelimitSerDe parse column
 error (#5252)(Liu Weizheng, reviewed by Butao Zhang)

---
 .../hadoop/hive/serde2/lazy/LazyStruct.java   |  4 +-
 .../hive/serde2/lazy/TestLazyStruct.java      | 82 +++++++++++++++++++
 2 files changed, 84 insertions(+), 2 deletions(-)
 create mode 100644 serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyStruct.java
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java
index 2848d348e30d..3bd92cc7b5b8 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java
@@ -300,7 +300,7 @@ public void parseMultiDelimit(byte[] rawRow, byte[] fieldDelimit) {
     // first field always starts from 0, even when missing
     startPosition[0] = 0;
     for (int i = 1; i <= fields.length; i++) {
-      if (fields.length > 1 && delimitIndexes[i - 1] != -1) {
+      if (delimitIndexes[i - 1] != -1) {
         int start = delimitIndexes[i - 1] + fieldDelimit.length;
         startPosition[i] = start - i * diff;
       } else {
@@ -313,7 +313,7 @@ public void parseMultiDelimit(byte[] rawRow, byte[] fieldDelimit) {
 
   // find all the indexes of the sub byte[]
   private int[] findIndexes(byte[] array, byte[] target) {
-    if (fields.length <= 1) {
+    if (fields.length < 1) {
       return new int[0];
     }
     int[] indexes = new int[fields.length];
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyStruct.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyStruct.java
new file mode 100644
index 000000000000..a1f9b695f07a
--- /dev/null
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyStruct.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazy;
+
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.Text;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * TestLazyStruct.<br/>
+ */
+public class TestLazyStruct {
+
+    @Test
+    public void testParseMultiDelimit() throws Throwable {
+        try {
+            // single field named id
+            List<String> structFieldNames = new ArrayList<>();
+            structFieldNames.add("id");
+            // field type is string
+            List<TypeInfo> fieldTypes = new ArrayList<>();
+            PrimitiveTypeInfo primitiveTypeInfo = new PrimitiveTypeInfo();
+            primitiveTypeInfo.setTypeName("string");
+            fieldTypes.add(primitiveTypeInfo);
+            // separators + escapeChar => "|"
+            byte[] separators = new byte[]{124, 2, 3, 4, 5, 6, 7, 8};
+
+            // sequence =>"\N"
+            Text sequence = new Text();
+            sequence.set(new byte[]{92, 78});
+
+            // create lazy object inspector parameters
+            LazyObjectInspectorParameters lazyObjectInspectorParameters = new LazyObjectInspectorParametersImpl(false, (byte) '0',
+                    false, null, separators, sequence);
+            // create a lazy struct inspector
+            ObjectInspector lazyStructInspector = LazyFactory.createLazyStructInspector(structFieldNames, fieldTypes, lazyObjectInspectorParameters);
+            LazyStruct lazyStruct = (LazyStruct) LazyFactory.createLazyObject(lazyStructInspector);
+
+            // origin row data
+            String rowData = "1|@|";
+            // row field delimiter
+            String fieldDelimiter = "|@|";
+
+            // parse row use multi delimit
+            lazyStruct.parseMultiDelimit(rowData.getBytes(StandardCharsets.UTF_8),
+                    fieldDelimiter.getBytes(StandardCharsets.UTF_8));
+
+            // check the first field and second field start position index
+            // before fix result: 0,1
+            // after fix result: 0,2
+            Assert.assertArrayEquals(new int[]{0, 2}, lazyStruct.startPosition);
+        } catch (Throwable e) {
+            e.printStackTrace();
+            throw e;
+        }
+
+    }
+}
\ No newline at end of file