|
110 | 110 | import org.slf4j.Logger;
|
111 | 111 | import org.slf4j.LoggerFactory;
|
112 | 112 |
|
| 113 | +/** |
| 114 | + * Verify that data can be written and read back again. |
| 115 | + * The test suite is parameterized on vector IO being disabled/enabled. |
| 116 | + * This verifies that the vector IO code path is correct, and that |
| 117 | + * the default path continues to work. |
| 118 | + */ |
113 | 119 | @RunWith(Parameterized.class)
|
114 | 120 | public class TestParquetFileWriter {
|
115 | 121 |
|
@@ -152,16 +158,24 @@ public static List<Boolean> params() {
|
152 | 158 | /**
|
153 | 159 | * Read type: true for vectored IO.
|
154 | 160 | */
|
155 |
| - private final boolean readType; |
| 161 | + private final boolean vectoredRead; |
156 | 162 |
|
157 |
| - public TestParquetFileWriter(boolean readType) { |
158 |
| - this.readType = readType; |
| 163 | + /** |
| 164 | + * Instantiate. |
| 165 | + * @param vectoredRead use vector IO for reading. |
| 166 | + */ |
| 167 | + public TestParquetFileWriter(boolean vectoredRead) { |
| 168 | + this.vectoredRead = vectoredRead; |
159 | 169 | }
|
160 | 170 |
|
| 171 | + /** |
| 172 | + * Get the configuration for the tests. |
| 173 | + * @return a configuration which may have vector IO set. |
| 174 | + */ |
161 | 175 | private Configuration getTestConfiguration() {
|
162 | 176 | Configuration conf = new Configuration();
|
163 | 177 | // set the vector IO option
|
164 |
| - conf.setBoolean(ParquetInputFormat.HADOOP_VECTORED_IO_ENABLED, readType); |
| 178 | + conf.setBoolean(ParquetInputFormat.HADOOP_VECTORED_IO_ENABLED, vectoredRead); |
165 | 179 | return conf;
|
166 | 180 | }
|
167 | 181 |
|
@@ -297,7 +311,7 @@ public void testWriteReadWithRecordReader() throws Exception {
|
297 | 311 | testFile.delete();
|
298 | 312 |
|
299 | 313 | Path path = new Path(testFile.toURI());
|
300 |
| - Configuration configuration = new Configuration(); |
| 314 | + Configuration configuration = getTestConfiguration(); |
301 | 315 |
|
302 | 316 | ParquetFileWriter w = new ParquetFileWriter(configuration, SCHEMA, path);
|
303 | 317 | w.start();
|
@@ -395,7 +409,7 @@ public void testBloomFilterWriteRead() throws Exception {
|
395 | 409 | File testFile = temp.newFile();
|
396 | 410 | testFile.delete();
|
397 | 411 | Path path = new Path(testFile.toURI());
|
398 |
| - Configuration configuration = new Configuration(); |
| 412 | + Configuration configuration = getTestConfiguration(); |
399 | 413 | configuration.set("parquet.bloom.filter.column.names", "foo");
|
400 | 414 | String[] colPath = {"foo"};
|
401 | 415 | ColumnDescriptor col = schema.getColumnDescription(colPath);
|
@@ -436,7 +450,7 @@ public void testWriteReadDataPageV2() throws Exception {
|
436 | 450 | testFile.delete();
|
437 | 451 |
|
438 | 452 | Path path = new Path(testFile.toURI());
|
439 |
| - Configuration configuration = new Configuration(); |
| 453 | + Configuration configuration = getTestConfiguration(); |
440 | 454 |
|
441 | 455 | ParquetFileWriter w = new ParquetFileWriter(configuration, SCHEMA, path);
|
442 | 456 | w.start();
|
@@ -592,9 +606,11 @@ public void testAlignmentWithPadding() throws Exception {
|
592 | 606 | FileSystem fs = path.getFileSystem(conf);
|
593 | 607 | long fileLen = fs.getFileStatus(path).getLen();
|
594 | 608 |
|
595 |
| - FSDataInputStream data = fs.open(path); |
596 |
| - data.seek(fileLen - 8); // 4-byte offset + "PAR1" |
597 |
| - long footerLen = BytesUtils.readIntLittleEndian(data); |
| 609 | + long footerLen; |
| 610 | + try (FSDataInputStream data = fs.open(path)) { |
| 611 | + data.seek(fileLen - 8); // 4-byte offset + "PAR1" |
| 612 | + footerLen = BytesUtils.readIntLittleEndian(data); |
| 613 | + } |
598 | 614 | long startFooter = fileLen - footerLen - 8;
|
599 | 615 |
|
600 | 616 | assertEquals("Footer should start after second row group without padding", secondRowGroupEnds, startFooter);
|
@@ -677,6 +693,8 @@ public void testAlignmentWithNoPaddingNeeded() throws Exception {
|
677 | 693 | Configuration conf = getTestConfiguration();
|
678 | 694 | // Disable writing out checksums as hardcoded byte offsets in assertions below expect it
|
679 | 695 | conf.setBoolean(ParquetOutputFormat.PAGE_WRITE_CHECKSUM_ENABLED, false);
|
| 696 | + // close any filesystems to ensure that the the FS used by the writer picks up the configuration |
| 697 | + FileSystem.closeAll(); |
680 | 698 |
|
681 | 699 | // uses the test constructor
|
682 | 700 | ParquetFileWriter w = new ParquetFileWriter(conf, SCHEMA, path, 100, 50);
|
@@ -716,9 +734,11 @@ public void testAlignmentWithNoPaddingNeeded() throws Exception {
|
716 | 734 | FileSystem fs = path.getFileSystem(conf);
|
717 | 735 | long fileLen = fs.getFileStatus(path).getLen();
|
718 | 736 |
|
719 |
| - FSDataInputStream data = fs.open(path); |
720 |
| - data.seek(fileLen - 8); // 4-byte offset + "PAR1" |
721 |
| - long footerLen = BytesUtils.readIntLittleEndian(data); |
| 737 | + long footerLen; |
| 738 | + try (FSDataInputStream data = fs.open(path)) { |
| 739 | + data.seek(fileLen - 8); // 4-byte offset + "PAR1" |
| 740 | + footerLen = BytesUtils.readIntLittleEndian(data); |
| 741 | + } |
722 | 742 | long startFooter = fileLen - footerLen - 8;
|
723 | 743 |
|
724 | 744 | assertEquals("Footer should start after second row group without padding", secondRowGroupEnds, startFooter);
|
@@ -975,6 +995,8 @@ public void testWriteReadStatisticsAllNulls() throws Exception {
|
975 | 995 | configuration.setBoolean("parquet.strings.signed-min-max.enabled", true);
|
976 | 996 | GroupWriteSupport.setSchema(schema, configuration);
|
977 | 997 |
|
| 998 | + // close any filesystems to ensure that the the FS used by the writer picks up the configuration |
| 999 | + FileSystem.closeAll(); |
978 | 1000 | ParquetWriter<Group> writer = new ParquetWriter<Group>(path, configuration, new GroupWriteSupport());
|
979 | 1001 |
|
980 | 1002 | Group r1 = new SimpleGroup(schema);
|
|
0 commit comments