Skip to content

Commit 8d2d75c

Browse files
GH-1158: Fix inclusive bounds checking for DictionaryDecoder
Closes #1158.
1 parent 3bc34b0 commit 8d2d75c

2 files changed

Lines changed: 34 additions & 3 deletions

File tree

vector/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoder.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ static void retrieveIndexVector(
165165
for (int i = start; i < end; i++) {
166166
if (!indices.isNull(i)) {
167167
int indexAsInt = (int) indices.getValueAsLong(i);
168-
if (indexAsInt > dictionaryCount) {
168+
if (indexAsInt >= dictionaryCount) {
169169
throw new IllegalArgumentException(
170170
"Provided dictionary does not contain value for index " + indexAsInt);
171171
}

vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,7 @@ public void testIntEquals() {
403403
}
404404

405405
@Test
406-
public void testVarcharEquals() {
406+
public void testVarCharEquals() {
407407
try (final VarCharVector vector1 = new VarCharVector("varchar", allocator);
408408
final VarCharVector vector2 = new VarCharVector("varchar", allocator)) {
409409

@@ -1061,7 +1061,7 @@ public void testStructNoMemoryLeak() {
10611061
try (final StructVector decode = StructSubfieldEncoder.decode(indices, provider, allocator)) {
10621062
fail("There should be an exception when decoding");
10631063
} catch (Exception e) {
1064-
assertEquals("Provided dictionary does not contain value for index 3", e.getMessage());
1064+
assertEquals("Provided dictionary does not contain value for index 1", e.getMessage());
10651065
}
10661066
}
10671067
assertEquals(0, allocator.getAllocatedMemory(), "struct decode memory leak");
@@ -1197,6 +1197,37 @@ public void testDictionaryUIntOverflow() {
11971197
}
11981198
}
11991199

1200+
/**
1201+
* Test related to Issue #1158 - it was possible to decode an encoded value 1 index outside the Dictionary's range.
1202+
*/
1203+
@Test
1204+
public void testReferencingIndexOutOfBounds() {
1205+
// Index at which the original value will be stored at in the dictionary
1206+
int encodedIndex = 0;
1207+
// The encoded value that references an index in the dictionary
1208+
int indexReferenced = 1;
1209+
1210+
try (final IntVector encodedVector = new IntVector("encodings", allocator);
1211+
final VarCharVector dictionaryVector = newVarCharVector("dict", allocator); ) {
1212+
String originalValue = "Foo";
1213+
dictionaryVector.allocateNew(1);
1214+
dictionaryVector.setValueCount(1);
1215+
dictionaryVector.set(encodedIndex, originalValue.getBytes(StandardCharsets.UTF_8));
1216+
1217+
encodedVector.allocateNew(1);
1218+
encodedVector.setValueCount(1);
1219+
encodedVector.set(0, indexReferenced);
1220+
1221+
Dictionary dictionary = new Dictionary(dictionaryVector, new DictionaryEncoding(1L, false, null));
1222+
1223+
try (ValueVector decoded = DictionaryEncoder.decode(encodedVector, dictionary)) {
1224+
fail("There should be an exception when decoding index outside dictionary's range.");
1225+
} catch (Exception e) {
1226+
assertEquals("Provided dictionary does not contain value for index " + indexReferenced, e.getMessage());
1227+
}
1228+
}
1229+
}
1230+
12001231
private int[] convertListToIntArray(List list) {
12011232
int[] values = new int[list.size()];
12021233
for (int i = 0; i < list.size(); i++) {

0 commit comments

Comments
 (0)