diff --git a/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java b/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java index 4af1a8693f..602cf53215 100644 --- a/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java +++ b/vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java @@ -165,7 +165,7 @@ static void retrieveIndexVector( for (int i = start; i < end; i++) { if (!indices.isNull(i)) { int indexAsInt = (int) indices.getValueAsLong(i); - if (indexAsInt > dictionaryCount) { + if (indexAsInt >= dictionaryCount) { throw new IllegalArgumentException( "Provided dictionary does not contain value for index " + indexAsInt); } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java b/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java index 0945919b91..e74c56b6da 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java @@ -403,7 +403,7 @@ public void testIntEquals() { } @Test - public void testVarcharEquals() { + public void testVarCharEquals() { try (final VarCharVector vector1 = new VarCharVector("varchar", allocator); final VarCharVector vector2 = new VarCharVector("varchar", allocator)) { @@ -1061,7 +1061,7 @@ public void testStructNoMemoryLeak() { try (final StructVector decode = StructSubfieldEncoder.decode(indices, provider, allocator)) { fail("There should be an exception when decoding"); } catch (Exception e) { - assertEquals("Provided dictionary does not contain value for index 3", e.getMessage()); + assertEquals("Provided dictionary does not contain value for index 1", e.getMessage()); } } assertEquals(0, allocator.getAllocatedMemory(), "struct decode memory leak"); @@ -1197,6 +1197,37 @@ public void testDictionaryUIntOverflow() { } } + /** + * Test related to Issue #1158 - it was possible to decode an encoded value 1 index outside the Dictionary's range. + */ + @Test + public void testReferencingIndexOutOfBounds() { + // Index at which the original value will be stored at in the dictionary + int encodedIndex = 0; + // The encoded value that references an index in the dictionary + int indexReferenced = 1; + + try (final IntVector encodedVector = new IntVector("encodings", allocator); + final VarCharVector dictionaryVector = newVarCharVector("dict", allocator); ) { + String originalValue = "Foo"; + dictionaryVector.allocateNew(1); + dictionaryVector.setValueCount(1); + dictionaryVector.set(encodedIndex, originalValue.getBytes(StandardCharsets.UTF_8)); + + encodedVector.allocateNew(1); + encodedVector.setValueCount(1); + encodedVector.set(0, indexReferenced); + + Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null)); + + try (ValueVector decoded = DictionaryEncoder.decode(encodedVector, dictionary)) { + fail("There should be an exception when decoding index outside dictionary's range."); + } catch (Exception e) { + assertEquals("Provided dictionary does not contain value for index " + indexReferenced, e.getMessage()); + } + } + } + private int[] convertListToIntArray(List list) { int[] values = new int[list.size()]; for (int i = 0; i < list.size(); i++) {