diff --git a/src/main/java/org/aksw/iguana/commons/io/BigByteArrayOutputStream.java b/src/main/java/org/aksw/iguana/commons/io/BigByteArrayOutputStream.java
index 20396dfd..0f7f70f4 100644
--- a/src/main/java/org/aksw/iguana/commons/io/BigByteArrayOutputStream.java
+++ b/src/main/java/org/aksw/iguana/commons/io/BigByteArrayOutputStream.java
@@ -16,17 +16,18 @@
* a large amount of data.
*
* The BigByteArrayOutputStream works by using an ArrayList of ByteArrayOutputStreams to store
- * the byte data. When the current ByteArrayOutputStream fills up, a new one is created and
- * added to the list. Writing data to the stream involves writing to the current active
- * ByteArrayOutputStream. When the stream is reset, all the internal ByteArrayOutputStreams
- * are cleared and a new one is added to the list.
+ * the byte data. When the current ByteArrayOutputStream fills up, a new one is created with the
+ * maximum array size (Integer.MAX_VALUE - 8
) as its initial capacity and added to the list.
+ * Writing data to the stream involves writing to the current active ByteArrayOutputStream. When
+ * the stream is cleared, all the internal ByteArrayOutputStreams are cleared and a new one is
+ * added to the list.
*/
public class BigByteArrayOutputStream extends OutputStream {
/**
* The maximum size limit for an array. This is no limit to the amount of bytes {@code BigByteArrayOutputStream} can consume.
*/
- public final static long ARRAY_SIZE_LIMIT = 2147483639;
+ public final static int ARRAY_SIZE_LIMIT = Integer.MAX_VALUE - 8;
/**
* Holds a list of ByteArrayOutputStream objects.
@@ -77,9 +78,9 @@ public BigByteArrayOutputStream(long bufferSize) {
baosList = new ArrayList<>(1);
baosList.add(new ByteArrayOutputStream((int) bufferSize));
} else {
- final var requiredBaoss = (int) (bufferSize / ARRAY_SIZE_LIMIT) + 1;
+ final var requiredBaoss = (int) (bufferSize / ARRAY_SIZE_LIMIT) + 1; // this might create a fully sized, but empty baos at the end if the buffer size is a multiple of ARRAY_SIZE_LIMIT
baosList = new ArrayList<>(requiredBaoss);
- IntStream.range(0, requiredBaoss).forEachOrdered(i -> baosList.add(new ByteArrayOutputStream((int) ARRAY_SIZE_LIMIT)));
+ IntStream.range(0, requiredBaoss).forEachOrdered(i -> baosList.add(new ByteArrayOutputStream(ARRAY_SIZE_LIMIT)));
}
reset();
}
@@ -90,12 +91,7 @@ public List getBaos() {
}
public void write(BigByteArrayOutputStream bbaos) throws IOException {
- for (byte[] bao : bbaos.toByteArray()) {
- for (Byte b : bao) {
- write(b);
- }
- }
-
+ write(bbaos.toByteArray());
}
public long size() {
@@ -123,23 +119,9 @@ public void write(byte[] b, int off, int len) throws IOException {
}
}
- @Override
- public void write(byte[] b) throws IOException {
- final var space = ensureSpace();
- final var writeLength = Math.min(b.length, space);
- this.currentBaos.write(b, 0, writeLength);
- final var remainingBytes = b.length - writeLength;
- if (remainingBytes > 0) {
- ensureSpace();
- this.currentBaos.write(b, writeLength, remainingBytes);
- }
- }
-
public void write(byte[][] byteArray) throws IOException {
for (byte[] arr : byteArray) {
- for (byte b : arr) {
- write(b);
- }
+ write(arr);
}
}
@@ -154,19 +136,14 @@ public void write(int i) throws IOException {
this.currentBaos.write(i);
}
- /**
- * This method calculates and returns the available space in the current ByteArrayOutputStream.
- * If the space is 0, it creates a new ByteArrayOutputStream or resets the next existing one.
- *
- * @return The available space in the ByteArrayOutputStream.
- */
+
private int ensureSpace() {
- var space = (int) ARRAY_SIZE_LIMIT - currentBaos.size();
+ var space = ARRAY_SIZE_LIMIT - currentBaos.size();
if (space == 0) {
- space = (int) ARRAY_SIZE_LIMIT;
+ space = ARRAY_SIZE_LIMIT;
if (baosListIndex == baosList.size() - 1) {
baosListIndex++;
- currentBaos = new ByteArrayOutputStream((int) ARRAY_SIZE_LIMIT);
+ currentBaos = new ByteArrayOutputStream(ARRAY_SIZE_LIMIT);
baosList.add(currentBaos);
} else {
baosListIndex++;
@@ -184,7 +161,9 @@ private int ensureSpace() {
*/
public void reset() {
currentBaos = baosList.get(baosListIndex = 0);
- currentBaos.reset();
+ for (var baos : baosList) {
+ baos.reset();
+ }
}
/**
@@ -197,6 +176,6 @@ public void clear() {
if (baosList.size() > 1)
baosList.subList(1, this.baosList.size()).clear();
currentBaos = baosList.get(baosListIndex = 0);
+ currentBaos.reset();
}
-
}
\ No newline at end of file
diff --git a/src/test/java/org/aksw/iguana/commons/io/BigByteArrayOutputStreamTest.java b/src/test/java/org/aksw/iguana/commons/io/BigByteArrayOutputStreamTest.java
new file mode 100644
index 00000000..729fac0d
--- /dev/null
+++ b/src/test/java/org/aksw/iguana/commons/io/BigByteArrayOutputStreamTest.java
@@ -0,0 +1,287 @@
+package org.aksw.iguana.commons.io;
+
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Named;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+import java.util.function.Supplier;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+class BigByteArrayOutputStreamTest {
+ final static Random rng = new Random(0);
+
+ public static List data() {
+ final var maxSize = Integer.MAX_VALUE - 8;
+
+ final Supplier sup1 = () -> getBigRandomBuffer(10, maxSize);
+ final Supplier sup2 = () -> getBigRandomBuffer(maxSize * 2, maxSize);
+
+ return List.of(
+ Arguments.of(Named.of(String.valueOf(10), sup1), 10, new int[] { 10 }),
+ Arguments.of(Named.of(String.valueOf(10), sup1), (long)maxSize * 2, new int[] { maxSize, maxSize, maxSize }), // small data, high initial capacity
+ Arguments.of(Named.of(String.valueOf((long)maxSize*2), sup2), (long)maxSize * 2, new int[] { maxSize, maxSize, maxSize })
+ );
+ }
+
+ /**
+ * Creates a random 2d-array buffer with the given size.
+ *
+ * @param size number of bytes
+ * @param maxSingleBufferSize maximum size of a single array
+ * @return 2d-array buffer
+ */
+ public static byte[][] getBigRandomBuffer(long size, int maxSingleBufferSize) {
+ if (size < 1)
+ return new byte[0][0];
+ final var bufferField = new byte[(int) (size - 1) / maxSingleBufferSize + 1][];
+ for (int i = 0; i < bufferField.length; i++) {
+ final var bufferSize = (size > maxSingleBufferSize) ? maxSingleBufferSize : (int) size;
+ bufferField[i] = new byte[bufferSize];
+ rng.nextBytes(bufferField[i]);
+ size -= bufferSize;
+ }
+ return bufferField;
+ }
+
+ @Test
+ @DisplayName("Test basic write operations")
+ public void testOtherWriteMethods() throws IOException {
+ final byte[] buffer = getBigRandomBuffer(10, 10)[0];
+
+ final var b2 = new byte[] { 0, 1, 2, 3 };
+ int i = ByteBuffer.wrap(b2).getInt();
+
+ try (final var bbaos = new BigByteArrayOutputStream()) {
+ assertDoesNotThrow(() -> bbaos.write(buffer[0]));
+ assertEquals(1, bbaos.size());
+ assertEquals(buffer[0], bbaos.toByteArray()[0][0]);
+
+ assertDoesNotThrow(() -> bbaos.write(buffer, 1, 9));
+ assertEquals(10, bbaos.size());
+ assertArrayEquals(buffer, bbaos.toByteArray()[0]);
+
+ final var bbaos2 = new BigByteArrayOutputStream(1);
+ assertDoesNotThrow(() -> bbaos2.write(bbaos));
+ assertEquals(10, bbaos2.size());
+ assertArrayEquals(buffer, bbaos2.toByteArray()[0]);
+
+ assertDoesNotThrow(() -> bbaos2.write(i));
+ assertEquals(11, bbaos2.size());
+ assertEquals(b2[3], bbaos2.toByteArray()[0][10]); // low order byte
+ }
+ }
+
+ @Test
+ @DisplayName("Test illegal capacity arguments")
+ public void testNegativeCapactiy() {
+ assertThrows(IllegalArgumentException.class, () -> new BigByteArrayOutputStream(-1));
+ assertThrows(IllegalArgumentException.class, () -> new BigByteArrayOutputStream(-1L));
+ }
+
+ @Test
+ @DisplayName("Test illegal write arguments")
+ public void testIndexOutOfBounds() throws IOException {
+ try (final var bbaos = new BigByteArrayOutputStream()) {
+ final byte[] nullBuffer = null;
+ final var buffer = new byte[10];
+ assertThrows(IndexOutOfBoundsException.class, () -> bbaos.write(buffer, -1, 10));
+ assertThrows(IndexOutOfBoundsException.class, () -> bbaos.write(buffer, 0, -1));
+ assertThrows(IndexOutOfBoundsException.class, () -> bbaos.write(buffer, 0, 11));
+ assertThrows(NullPointerException.class, () -> bbaos.write(nullBuffer));
+ };
+ }
+
+
+ @Test
+ @DisplayName("Test default constructor")
+ void testDefaultConstructor() throws IOException {
+ try (final var bbaos = new BigByteArrayOutputStream()) {
+ assertEquals(0, bbaos.size());
+ assertEquals(1, bbaos.getBaos().size());
+ assertEquals(0, bbaos.getBaos().get(0).size());
+ assertDoesNotThrow(() -> bbaos.write("test".getBytes(StandardCharsets.UTF_8)));
+ assertEquals(1, bbaos.getBaos().size());
+ assertEquals(4, bbaos.getBaos().get(0).size());
+ assertEquals(4, bbaos.size());
+ }
+ }
+
+ @Test
+ @DisplayName("Test constructor with capacity argument")
+ void testConstructorWithInt() throws IOException {
+ try (final var bbaos = new BigByteArrayOutputStream(100)) {
+ assertEquals(0, bbaos.size());
+ assertEquals(1, bbaos.getBaos().size());
+ assertEquals(0, bbaos.getBaos().get(0).size());
+ assertEquals(100, bbaos.getBaos().get(0).getBuffer().length);
+ assertDoesNotThrow(() -> bbaos.write("test".getBytes(StandardCharsets.UTF_8)));
+ assertEquals(4, bbaos.size());
+ assertEquals(1, bbaos.getBaos().size());
+ assertEquals(4, bbaos.getBaos().get(0).size());
+ assertEquals(100, bbaos.getBaos().get(0).getBuffer().length);
+ }
+ }
+
+ @Test
+ @DisplayName("Test constructor with big capacity argument")
+ void testConstructorWithBigLong() throws IOException {
+ try (final var bbaos = new BigByteArrayOutputStream(((long) Integer.MAX_VALUE) + 10)) {
+ assertEquals(0, bbaos.size());
+ assertEquals(2, bbaos.getBaos().size());
+ assertEquals(0, bbaos.getBaos().get(0).size());
+ assertEquals(0, bbaos.getBaos().get(1).size());
+ assertNotEquals(0, bbaos.getBaos().get(0).getBuffer().length); // rough comparison
+ assertNotEquals(0, bbaos.getBaos().get(1).getBuffer().length);
+ assertDoesNotThrow(() -> bbaos.write("test".getBytes(StandardCharsets.UTF_8)));
+ assertEquals(4, bbaos.size());
+ assertEquals(2, bbaos.getBaos().size());
+ assertEquals(4, bbaos.getBaos().get(0).size());
+ assertEquals(0, bbaos.getBaos().get(1).size());
+ }
+ }
+
+ @Test
+ @DisplayName("Test write method with big byte arrays")
+ void testBaosOverflow() throws IOException {
+ final var maxArraySize = Integer.MAX_VALUE - 8;
+ final var firstBufferSize = maxArraySize - 1;
+ final var secondBufferSize = 2;
+ try (final var bbaos = new BigByteArrayOutputStream(maxArraySize)) {
+ final var firstBuffer = getBigRandomBuffer(firstBufferSize, maxArraySize);
+ final var secondBuffer = getBigRandomBuffer(secondBufferSize, maxArraySize);
+
+ assertEquals(0, bbaos.size());
+ assertEquals(1, bbaos.getBaos().size());
+ assertEquals(0, bbaos.getBaos().get(0).size());
+ assertEquals(maxArraySize, bbaos.getBaos().get(0).getBuffer().length);
+ assertDoesNotThrow(() -> bbaos.write(firstBuffer));
+ for (int i = 0; i < firstBufferSize; i++) {
+ assertEquals(firstBuffer[0][i], bbaos.getBaos().get(0).getBuffer()[i]); // save memory during execution of this test with this loop
+ }
+ assertEquals(firstBufferSize, bbaos.size());
+ assertEquals(1, bbaos.getBaos().size());
+ assertEquals(firstBufferSize, bbaos.getBaos().get(0).size());
+ assertArrayEquals(firstBuffer, bbaos.toByteArray());
+
+ // overflow first baos
+ assertDoesNotThrow(() -> bbaos.write(secondBuffer));
+ assertEquals(maxArraySize, bbaos.getBaos().get(1).getBuffer().length);
+ assertEquals(firstBufferSize + secondBufferSize, bbaos.size());
+ assertEquals(2, bbaos.getBaos().size());
+ assertEquals(maxArraySize, bbaos.getBaos().get(0).size());
+ assertEquals(secondBufferSize - (maxArraySize - firstBufferSize), bbaos.getBaos().get(1).size());
+
+ // test content of first baos
+ for (int i = 0; i < firstBufferSize; i++)
+ assertEquals(firstBuffer[0][i], bbaos.getBaos().get(0).getBuffer()[i]);
+ for (int i = firstBufferSize; i < maxArraySize; i++)
+ assertEquals(secondBuffer[0][i - firstBufferSize], bbaos.getBaos().get(0).getBuffer()[i]);
+
+ // test content of second baos
+ assertArrayEquals(Arrays.copyOfRange(secondBuffer[0], secondBufferSize - (maxArraySize - firstBufferSize), secondBufferSize), bbaos.getBaos().get(1).toByteArray());
+
+ // reset
+ bbaos.reset();
+ assertEquals(2, bbaos.getBaos().size()); // baos won't be removed with reset
+ assertEquals(0, bbaos.size());
+ assertEquals(0, bbaos.getBaos().get(0).size());
+ assertEquals(0, bbaos.getBaos().get(1).size());
+ assertEquals(maxArraySize, bbaos.getBaos().get(0).getBuffer().length);
+ assertEquals(maxArraySize, bbaos.getBaos().get(1).getBuffer().length);
+
+ assertDoesNotThrow(() -> bbaos.write(firstBuffer));
+ assertEquals(firstBufferSize, bbaos.size());
+ assertEquals(firstBufferSize, bbaos.getBaos().get(0).size());
+ for (int i = 0; i < firstBufferSize; i++) {
+ assertEquals(firstBuffer[0][i], bbaos.getBaos().get(0).getBuffer()[i]);
+ }
+
+ assertDoesNotThrow(() -> bbaos.write(secondBuffer));
+ assertEquals(2, bbaos.getBaos().size());
+ assertEquals(maxArraySize, bbaos.getBaos().get(1).getBuffer().length);
+ assertEquals(firstBufferSize + secondBufferSize, bbaos.size());
+ assertEquals(maxArraySize, bbaos.getBaos().get(0).size());
+ assertEquals(secondBufferSize - (maxArraySize - firstBufferSize), bbaos.getBaos().get(1).size());
+ for (int i = 0; i < firstBufferSize; i++)
+ assertEquals(firstBuffer[0][i], bbaos.getBaos().get(0).getBuffer()[i]);
+ for (int i = firstBufferSize; i < maxArraySize; i++)
+ assertEquals(secondBuffer[0][i - firstBufferSize], bbaos.getBaos().get(0).getBuffer()[i]);
+
+ assertArrayEquals(Arrays.copyOfRange(secondBuffer[0], secondBufferSize - (maxArraySize - firstBufferSize), secondBufferSize), bbaos.getBaos().get(1).toByteArray());
+ }
+ }
+
+ @ParameterizedTest(name = "[{index}] randomBufferSize={0}, initialCapacitiy={1}, baosSizes={2}")
+ @MethodSource("data")
+ @DisplayName("Test reset method")
+ void testReset(Supplier bufferSup, long initialCapacitiy, int[] baosSizes) throws IOException {
+ final var buffer = bufferSup.get();
+ try (final var bbaos = new BigByteArrayOutputStream(initialCapacitiy)) {
+ bbaos.write(buffer);
+ assertEquals(baosSizes.length, bbaos.getBaos().size()); // expected amount of baos
+ for (int i = 0; i < buffer.length; i++) {
+ assertArrayEquals(buffer[i], bbaos.getBaos().get(i).toByteArray()); // expected content
+ assertEquals(baosSizes[i], bbaos.getBaos().get(i).getBuffer().length); // expected baos sizes
+ }
+ assertEquals(Arrays.stream(buffer).mapToInt(x -> x.length).sum(), bbaos.size());
+
+ bbaos.reset();
+
+ assertEquals(0, bbaos.size());
+ assertEquals(baosSizes.length, bbaos.getBaos().size()); // same amount of baos
+ for (int i = 0; i < buffer.length; i++) {
+ assertEquals(baosSizes[i], bbaos.getBaos().get(i).getBuffer().length); // baos sizes should be same
+ }
+
+ // after clear, a new write should result same expected content and state
+ bbaos.write(buffer);
+ assertEquals(Arrays.stream(buffer).mapToInt(x -> x.length).sum(), bbaos.size());
+ for (int i = 0; i < buffer.length; i++) {
+ assertArrayEquals(buffer[i], bbaos.getBaos().get(i).toByteArray()); // expected content
+ }
+
+ // check baos sizes again after write
+ for (int i = 0; i < baosSizes.length; i++) {
+ assertEquals(baosSizes[i], bbaos.getBaos().get(i).getBuffer().length);
+ }
+ }
+ }
+
+ @ParameterizedTest(name = "[{index}] randomBufferSize={0}, initialCapacitiy={1}, baosSizes={2}")
+ @MethodSource("data")
+ @DisplayName("Test clear method")
+ void testClear(Supplier bufferSup, long initialCapacitiy, int[] baosSizes) throws IOException {
+ final var buffer = bufferSup.get();
+ try (final var bbaos = new BigByteArrayOutputStream(initialCapacitiy)) {
+ bbaos.write(buffer);
+ assertEquals(baosSizes.length, bbaos.getBaos().size()); // expected amount of baos
+ for (int i = 0; i < buffer.length; i++) {
+ assertArrayEquals(buffer[i], bbaos.getBaos().get(i).toByteArray()); // expected content
+ assertEquals(baosSizes[i], bbaos.getBaos().get(i).getBuffer().length); // expected baos sizes
+ }
+ assertEquals(Arrays.stream(buffer).mapToInt(x -> x.length).sum(), bbaos.size());
+
+ bbaos.clear();
+ assertEquals(0, bbaos.size());
+ assertEquals(1, bbaos.getBaos().size()); // deleted all baos except first one
+ assertEquals(baosSizes[0], bbaos.getBaos().get(0).getBuffer().length); // first baos maintained previous buffer size
+
+ // after clear, a new write should result same expected content
+ bbaos.write(buffer);
+ for (int i = 0; i < buffer.length; i++) {
+ assertArrayEquals(buffer[i], bbaos.getBaos().get(i).toByteArray()); // expected content
+ }
+ assertEquals(Arrays.stream(buffer).mapToInt(x -> x.length).sum(), bbaos.size());
+ }
+ }
+}
\ No newline at end of file