diff --git a/src/main/java/org/aksw/iguana/commons/io/BigByteArrayOutputStream.java b/src/main/java/org/aksw/iguana/commons/io/BigByteArrayOutputStream.java index 20396dfd..0f7f70f4 100644 --- a/src/main/java/org/aksw/iguana/commons/io/BigByteArrayOutputStream.java +++ b/src/main/java/org/aksw/iguana/commons/io/BigByteArrayOutputStream.java @@ -16,17 +16,18 @@ * a large amount of data. *

* The BigByteArrayOutputStream works by using an ArrayList of ByteArrayOutputStreams to store - * the byte data. When the current ByteArrayOutputStream fills up, a new one is created and - * added to the list. Writing data to the stream involves writing to the current active - * ByteArrayOutputStream. When the stream is reset, all the internal ByteArrayOutputStreams - * are cleared and a new one is added to the list. + * the byte data. When the current ByteArrayOutputStream fills up, a new one is created with the + * maximum array size (Integer.MAX_VALUE - 8) as its initial capacity and added to the list. + * Writing data to the stream involves writing to the current active ByteArrayOutputStream. When + * the stream is cleared, all the internal ByteArrayOutputStreams are cleared and a new one is + * added to the list. */ public class BigByteArrayOutputStream extends OutputStream { /** * The maximum size limit for an array. This is no limit to the amount of bytes {@code BigByteArrayOutputStream} can consume. */ - public final static long ARRAY_SIZE_LIMIT = 2147483639; + public final static int ARRAY_SIZE_LIMIT = Integer.MAX_VALUE - 8; /** * Holds a list of ByteArrayOutputStream objects. @@ -77,9 +78,9 @@ public BigByteArrayOutputStream(long bufferSize) { baosList = new ArrayList<>(1); baosList.add(new ByteArrayOutputStream((int) bufferSize)); } else { - final var requiredBaoss = (int) (bufferSize / ARRAY_SIZE_LIMIT) + 1; + final var requiredBaoss = (int) (bufferSize / ARRAY_SIZE_LIMIT) + 1; // this might create a fully sized, but empty baos at the end if the buffer size is a multiple of ARRAY_SIZE_LIMIT baosList = new ArrayList<>(requiredBaoss); - IntStream.range(0, requiredBaoss).forEachOrdered(i -> baosList.add(new ByteArrayOutputStream((int) ARRAY_SIZE_LIMIT))); + IntStream.range(0, requiredBaoss).forEachOrdered(i -> baosList.add(new ByteArrayOutputStream(ARRAY_SIZE_LIMIT))); } reset(); } @@ -90,12 +91,7 @@ public List getBaos() { } public void write(BigByteArrayOutputStream bbaos) throws IOException { - for (byte[] bao : bbaos.toByteArray()) { - for (Byte b : bao) { - write(b); - } - } - + write(bbaos.toByteArray()); } public long size() { @@ -123,23 +119,9 @@ public void write(byte[] b, int off, int len) throws IOException { } } - @Override - public void write(byte[] b) throws IOException { - final var space = ensureSpace(); - final var writeLength = Math.min(b.length, space); - this.currentBaos.write(b, 0, writeLength); - final var remainingBytes = b.length - writeLength; - if (remainingBytes > 0) { - ensureSpace(); - this.currentBaos.write(b, writeLength, remainingBytes); - } - } - public void write(byte[][] byteArray) throws IOException { for (byte[] arr : byteArray) { - for (byte b : arr) { - write(b); - } + write(arr); } } @@ -154,19 +136,14 @@ public void write(int i) throws IOException { this.currentBaos.write(i); } - /** - * This method calculates and returns the available space in the current ByteArrayOutputStream. - * If the space is 0, it creates a new ByteArrayOutputStream or resets the next existing one. - * - * @return The available space in the ByteArrayOutputStream. - */ + private int ensureSpace() { - var space = (int) ARRAY_SIZE_LIMIT - currentBaos.size(); + var space = ARRAY_SIZE_LIMIT - currentBaos.size(); if (space == 0) { - space = (int) ARRAY_SIZE_LIMIT; + space = ARRAY_SIZE_LIMIT; if (baosListIndex == baosList.size() - 1) { baosListIndex++; - currentBaos = new ByteArrayOutputStream((int) ARRAY_SIZE_LIMIT); + currentBaos = new ByteArrayOutputStream(ARRAY_SIZE_LIMIT); baosList.add(currentBaos); } else { baosListIndex++; @@ -184,7 +161,9 @@ private int ensureSpace() { */ public void reset() { currentBaos = baosList.get(baosListIndex = 0); - currentBaos.reset(); + for (var baos : baosList) { + baos.reset(); + } } /** @@ -197,6 +176,6 @@ public void clear() { if (baosList.size() > 1) baosList.subList(1, this.baosList.size()).clear(); currentBaos = baosList.get(baosListIndex = 0); + currentBaos.reset(); } - } \ No newline at end of file diff --git a/src/test/java/org/aksw/iguana/commons/io/BigByteArrayOutputStreamTest.java b/src/test/java/org/aksw/iguana/commons/io/BigByteArrayOutputStreamTest.java new file mode 100644 index 00000000..729fac0d --- /dev/null +++ b/src/test/java/org/aksw/iguana/commons/io/BigByteArrayOutputStreamTest.java @@ -0,0 +1,287 @@ +package org.aksw.iguana.commons.io; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Named; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.List; +import java.util.Random; +import java.util.function.Supplier; + +import static org.junit.jupiter.api.Assertions.*; + +class BigByteArrayOutputStreamTest { + final static Random rng = new Random(0); + + public static List data() { + final var maxSize = Integer.MAX_VALUE - 8; + + final Supplier sup1 = () -> getBigRandomBuffer(10, maxSize); + final Supplier sup2 = () -> getBigRandomBuffer(maxSize * 2, maxSize); + + return List.of( + Arguments.of(Named.of(String.valueOf(10), sup1), 10, new int[] { 10 }), + Arguments.of(Named.of(String.valueOf(10), sup1), (long)maxSize * 2, new int[] { maxSize, maxSize, maxSize }), // small data, high initial capacity + Arguments.of(Named.of(String.valueOf((long)maxSize*2), sup2), (long)maxSize * 2, new int[] { maxSize, maxSize, maxSize }) + ); + } + + /** + * Creates a random 2d-array buffer with the given size. + * + * @param size number of bytes + * @param maxSingleBufferSize maximum size of a single array + * @return 2d-array buffer + */ + public static byte[][] getBigRandomBuffer(long size, int maxSingleBufferSize) { + if (size < 1) + return new byte[0][0]; + final var bufferField = new byte[(int) (size - 1) / maxSingleBufferSize + 1][]; + for (int i = 0; i < bufferField.length; i++) { + final var bufferSize = (size > maxSingleBufferSize) ? maxSingleBufferSize : (int) size; + bufferField[i] = new byte[bufferSize]; + rng.nextBytes(bufferField[i]); + size -= bufferSize; + } + return bufferField; + } + + @Test + @DisplayName("Test basic write operations") + public void testOtherWriteMethods() throws IOException { + final byte[] buffer = getBigRandomBuffer(10, 10)[0]; + + final var b2 = new byte[] { 0, 1, 2, 3 }; + int i = ByteBuffer.wrap(b2).getInt(); + + try (final var bbaos = new BigByteArrayOutputStream()) { + assertDoesNotThrow(() -> bbaos.write(buffer[0])); + assertEquals(1, bbaos.size()); + assertEquals(buffer[0], bbaos.toByteArray()[0][0]); + + assertDoesNotThrow(() -> bbaos.write(buffer, 1, 9)); + assertEquals(10, bbaos.size()); + assertArrayEquals(buffer, bbaos.toByteArray()[0]); + + final var bbaos2 = new BigByteArrayOutputStream(1); + assertDoesNotThrow(() -> bbaos2.write(bbaos)); + assertEquals(10, bbaos2.size()); + assertArrayEquals(buffer, bbaos2.toByteArray()[0]); + + assertDoesNotThrow(() -> bbaos2.write(i)); + assertEquals(11, bbaos2.size()); + assertEquals(b2[3], bbaos2.toByteArray()[0][10]); // low order byte + } + } + + @Test + @DisplayName("Test illegal capacity arguments") + public void testNegativeCapactiy() { + assertThrows(IllegalArgumentException.class, () -> new BigByteArrayOutputStream(-1)); + assertThrows(IllegalArgumentException.class, () -> new BigByteArrayOutputStream(-1L)); + } + + @Test + @DisplayName("Test illegal write arguments") + public void testIndexOutOfBounds() throws IOException { + try (final var bbaos = new BigByteArrayOutputStream()) { + final byte[] nullBuffer = null; + final var buffer = new byte[10]; + assertThrows(IndexOutOfBoundsException.class, () -> bbaos.write(buffer, -1, 10)); + assertThrows(IndexOutOfBoundsException.class, () -> bbaos.write(buffer, 0, -1)); + assertThrows(IndexOutOfBoundsException.class, () -> bbaos.write(buffer, 0, 11)); + assertThrows(NullPointerException.class, () -> bbaos.write(nullBuffer)); + }; + } + + + @Test + @DisplayName("Test default constructor") + void testDefaultConstructor() throws IOException { + try (final var bbaos = new BigByteArrayOutputStream()) { + assertEquals(0, bbaos.size()); + assertEquals(1, bbaos.getBaos().size()); + assertEquals(0, bbaos.getBaos().get(0).size()); + assertDoesNotThrow(() -> bbaos.write("test".getBytes(StandardCharsets.UTF_8))); + assertEquals(1, bbaos.getBaos().size()); + assertEquals(4, bbaos.getBaos().get(0).size()); + assertEquals(4, bbaos.size()); + } + } + + @Test + @DisplayName("Test constructor with capacity argument") + void testConstructorWithInt() throws IOException { + try (final var bbaos = new BigByteArrayOutputStream(100)) { + assertEquals(0, bbaos.size()); + assertEquals(1, bbaos.getBaos().size()); + assertEquals(0, bbaos.getBaos().get(0).size()); + assertEquals(100, bbaos.getBaos().get(0).getBuffer().length); + assertDoesNotThrow(() -> bbaos.write("test".getBytes(StandardCharsets.UTF_8))); + assertEquals(4, bbaos.size()); + assertEquals(1, bbaos.getBaos().size()); + assertEquals(4, bbaos.getBaos().get(0).size()); + assertEquals(100, bbaos.getBaos().get(0).getBuffer().length); + } + } + + @Test + @DisplayName("Test constructor with big capacity argument") + void testConstructorWithBigLong() throws IOException { + try (final var bbaos = new BigByteArrayOutputStream(((long) Integer.MAX_VALUE) + 10)) { + assertEquals(0, bbaos.size()); + assertEquals(2, bbaos.getBaos().size()); + assertEquals(0, bbaos.getBaos().get(0).size()); + assertEquals(0, bbaos.getBaos().get(1).size()); + assertNotEquals(0, bbaos.getBaos().get(0).getBuffer().length); // rough comparison + assertNotEquals(0, bbaos.getBaos().get(1).getBuffer().length); + assertDoesNotThrow(() -> bbaos.write("test".getBytes(StandardCharsets.UTF_8))); + assertEquals(4, bbaos.size()); + assertEquals(2, bbaos.getBaos().size()); + assertEquals(4, bbaos.getBaos().get(0).size()); + assertEquals(0, bbaos.getBaos().get(1).size()); + } + } + + @Test + @DisplayName("Test write method with big byte arrays") + void testBaosOverflow() throws IOException { + final var maxArraySize = Integer.MAX_VALUE - 8; + final var firstBufferSize = maxArraySize - 1; + final var secondBufferSize = 2; + try (final var bbaos = new BigByteArrayOutputStream(maxArraySize)) { + final var firstBuffer = getBigRandomBuffer(firstBufferSize, maxArraySize); + final var secondBuffer = getBigRandomBuffer(secondBufferSize, maxArraySize); + + assertEquals(0, bbaos.size()); + assertEquals(1, bbaos.getBaos().size()); + assertEquals(0, bbaos.getBaos().get(0).size()); + assertEquals(maxArraySize, bbaos.getBaos().get(0).getBuffer().length); + assertDoesNotThrow(() -> bbaos.write(firstBuffer)); + for (int i = 0; i < firstBufferSize; i++) { + assertEquals(firstBuffer[0][i], bbaos.getBaos().get(0).getBuffer()[i]); // save memory during execution of this test with this loop + } + assertEquals(firstBufferSize, bbaos.size()); + assertEquals(1, bbaos.getBaos().size()); + assertEquals(firstBufferSize, bbaos.getBaos().get(0).size()); + assertArrayEquals(firstBuffer, bbaos.toByteArray()); + + // overflow first baos + assertDoesNotThrow(() -> bbaos.write(secondBuffer)); + assertEquals(maxArraySize, bbaos.getBaos().get(1).getBuffer().length); + assertEquals(firstBufferSize + secondBufferSize, bbaos.size()); + assertEquals(2, bbaos.getBaos().size()); + assertEquals(maxArraySize, bbaos.getBaos().get(0).size()); + assertEquals(secondBufferSize - (maxArraySize - firstBufferSize), bbaos.getBaos().get(1).size()); + + // test content of first baos + for (int i = 0; i < firstBufferSize; i++) + assertEquals(firstBuffer[0][i], bbaos.getBaos().get(0).getBuffer()[i]); + for (int i = firstBufferSize; i < maxArraySize; i++) + assertEquals(secondBuffer[0][i - firstBufferSize], bbaos.getBaos().get(0).getBuffer()[i]); + + // test content of second baos + assertArrayEquals(Arrays.copyOfRange(secondBuffer[0], secondBufferSize - (maxArraySize - firstBufferSize), secondBufferSize), bbaos.getBaos().get(1).toByteArray()); + + // reset + bbaos.reset(); + assertEquals(2, bbaos.getBaos().size()); // baos won't be removed with reset + assertEquals(0, bbaos.size()); + assertEquals(0, bbaos.getBaos().get(0).size()); + assertEquals(0, bbaos.getBaos().get(1).size()); + assertEquals(maxArraySize, bbaos.getBaos().get(0).getBuffer().length); + assertEquals(maxArraySize, bbaos.getBaos().get(1).getBuffer().length); + + assertDoesNotThrow(() -> bbaos.write(firstBuffer)); + assertEquals(firstBufferSize, bbaos.size()); + assertEquals(firstBufferSize, bbaos.getBaos().get(0).size()); + for (int i = 0; i < firstBufferSize; i++) { + assertEquals(firstBuffer[0][i], bbaos.getBaos().get(0).getBuffer()[i]); + } + + assertDoesNotThrow(() -> bbaos.write(secondBuffer)); + assertEquals(2, bbaos.getBaos().size()); + assertEquals(maxArraySize, bbaos.getBaos().get(1).getBuffer().length); + assertEquals(firstBufferSize + secondBufferSize, bbaos.size()); + assertEquals(maxArraySize, bbaos.getBaos().get(0).size()); + assertEquals(secondBufferSize - (maxArraySize - firstBufferSize), bbaos.getBaos().get(1).size()); + for (int i = 0; i < firstBufferSize; i++) + assertEquals(firstBuffer[0][i], bbaos.getBaos().get(0).getBuffer()[i]); + for (int i = firstBufferSize; i < maxArraySize; i++) + assertEquals(secondBuffer[0][i - firstBufferSize], bbaos.getBaos().get(0).getBuffer()[i]); + + assertArrayEquals(Arrays.copyOfRange(secondBuffer[0], secondBufferSize - (maxArraySize - firstBufferSize), secondBufferSize), bbaos.getBaos().get(1).toByteArray()); + } + } + + @ParameterizedTest(name = "[{index}] randomBufferSize={0}, initialCapacitiy={1}, baosSizes={2}") + @MethodSource("data") + @DisplayName("Test reset method") + void testReset(Supplier bufferSup, long initialCapacitiy, int[] baosSizes) throws IOException { + final var buffer = bufferSup.get(); + try (final var bbaos = new BigByteArrayOutputStream(initialCapacitiy)) { + bbaos.write(buffer); + assertEquals(baosSizes.length, bbaos.getBaos().size()); // expected amount of baos + for (int i = 0; i < buffer.length; i++) { + assertArrayEquals(buffer[i], bbaos.getBaos().get(i).toByteArray()); // expected content + assertEquals(baosSizes[i], bbaos.getBaos().get(i).getBuffer().length); // expected baos sizes + } + assertEquals(Arrays.stream(buffer).mapToInt(x -> x.length).sum(), bbaos.size()); + + bbaos.reset(); + + assertEquals(0, bbaos.size()); + assertEquals(baosSizes.length, bbaos.getBaos().size()); // same amount of baos + for (int i = 0; i < buffer.length; i++) { + assertEquals(baosSizes[i], bbaos.getBaos().get(i).getBuffer().length); // baos sizes should be same + } + + // after clear, a new write should result same expected content and state + bbaos.write(buffer); + assertEquals(Arrays.stream(buffer).mapToInt(x -> x.length).sum(), bbaos.size()); + for (int i = 0; i < buffer.length; i++) { + assertArrayEquals(buffer[i], bbaos.getBaos().get(i).toByteArray()); // expected content + } + + // check baos sizes again after write + for (int i = 0; i < baosSizes.length; i++) { + assertEquals(baosSizes[i], bbaos.getBaos().get(i).getBuffer().length); + } + } + } + + @ParameterizedTest(name = "[{index}] randomBufferSize={0}, initialCapacitiy={1}, baosSizes={2}") + @MethodSource("data") + @DisplayName("Test clear method") + void testClear(Supplier bufferSup, long initialCapacitiy, int[] baosSizes) throws IOException { + final var buffer = bufferSup.get(); + try (final var bbaos = new BigByteArrayOutputStream(initialCapacitiy)) { + bbaos.write(buffer); + assertEquals(baosSizes.length, bbaos.getBaos().size()); // expected amount of baos + for (int i = 0; i < buffer.length; i++) { + assertArrayEquals(buffer[i], bbaos.getBaos().get(i).toByteArray()); // expected content + assertEquals(baosSizes[i], bbaos.getBaos().get(i).getBuffer().length); // expected baos sizes + } + assertEquals(Arrays.stream(buffer).mapToInt(x -> x.length).sum(), bbaos.size()); + + bbaos.clear(); + assertEquals(0, bbaos.size()); + assertEquals(1, bbaos.getBaos().size()); // deleted all baos except first one + assertEquals(baosSizes[0], bbaos.getBaos().get(0).getBuffer().length); // first baos maintained previous buffer size + + // after clear, a new write should result same expected content + bbaos.write(buffer); + for (int i = 0; i < buffer.length; i++) { + assertArrayEquals(buffer[i], bbaos.getBaos().get(i).toByteArray()); // expected content + } + assertEquals(Arrays.stream(buffer).mapToInt(x -> x.length).sum(), bbaos.size()); + } + } +} \ No newline at end of file