|
95 | 95 | import org.slf4j.Logger;
|
96 | 96 | import org.slf4j.LoggerFactory;
|
97 | 97 |
|
| 98 | +/** |
| 99 | + * Verify that data can be written and read back again. |
| 100 | + * The test suite is parameterized on vector IO being disabled/enabled. |
| 101 | + * This verifies that the vector IO code path is correct, and that |
| 102 | + * the default path continues to work. |
| 103 | + */ |
98 | 104 | @RunWith(Parameterized.class)
|
99 | 105 | public class TestParquetFileWriter {
|
100 | 106 |
|
@@ -136,16 +142,24 @@ public static List<Boolean> params() {
|
136 | 142 | /**
|
137 | 143 | * Read type: true for vectored IO.
|
138 | 144 | */
|
139 |
| - private final boolean readType; |
| 145 | + private final boolean vectoredRead; |
140 | 146 |
|
141 |
| - public TestParquetFileWriter(boolean readType) { |
142 |
| - this.readType = readType; |
| 147 | + /** |
| 148 | + * Instantiate. |
| 149 | + * @param vectoredRead use vector IO for reading. |
| 150 | + */ |
| 151 | + public TestParquetFileWriter(boolean vectoredRead) { |
| 152 | + this.vectoredRead = vectoredRead; |
143 | 153 | }
|
144 | 154 |
|
| 155 | + /** |
| 156 | + * Get the configuration for the tests. |
| 157 | + * @return a configuration which may have vector IO set. |
| 158 | + */ |
145 | 159 | private Configuration getTestConfiguration() {
|
146 | 160 | Configuration conf = new Configuration();
|
147 | 161 | // set the vector IO option
|
148 |
| - conf.setBoolean(ParquetInputFormat.HADOOP_VECTORED_IO_ENABLED, readType); |
| 162 | + conf.setBoolean(ParquetInputFormat.HADOOP_VECTORED_IO_ENABLED, vectoredRead); |
149 | 163 | return conf;
|
150 | 164 | }
|
151 | 165 |
|
@@ -277,7 +291,7 @@ public void testWriteReadWithRecordReader() throws Exception {
|
277 | 291 | testFile.delete();
|
278 | 292 |
|
279 | 293 | Path path = new Path(testFile.toURI());
|
280 |
| - Configuration configuration = new Configuration(); |
| 294 | + Configuration configuration = getTestConfiguration(); |
281 | 295 |
|
282 | 296 | ParquetFileWriter w = new ParquetFileWriter(configuration, SCHEMA, path);
|
283 | 297 | w.start();
|
@@ -371,7 +385,7 @@ public void testBloomFilterWriteRead() throws Exception {
|
371 | 385 | File testFile = temp.newFile();
|
372 | 386 | testFile.delete();
|
373 | 387 | Path path = new Path(testFile.toURI());
|
374 |
| - Configuration configuration = new Configuration(); |
| 388 | + Configuration configuration = getTestConfiguration(); |
375 | 389 | configuration.set("parquet.bloom.filter.column.names", "foo");
|
376 | 390 | String[] colPath = {"foo"};
|
377 | 391 | ColumnDescriptor col = schema.getColumnDescription(colPath);
|
@@ -406,7 +420,7 @@ public void testWriteReadDataPageV2() throws Exception {
|
406 | 420 | testFile.delete();
|
407 | 421 |
|
408 | 422 | Path path = new Path(testFile.toURI());
|
409 |
| - Configuration configuration = new Configuration(); |
| 423 | + Configuration configuration = getTestConfiguration(); |
410 | 424 |
|
411 | 425 | ParquetFileWriter w = new ParquetFileWriter(configuration, SCHEMA, path);
|
412 | 426 | w.start();
|
@@ -511,14 +525,15 @@ public void testAlignmentWithPadding() throws Exception {
|
511 | 525 | FileSystem fs = path.getFileSystem(conf);
|
512 | 526 | long fileLen = fs.getFileStatus(path).getLen();
|
513 | 527 |
|
514 |
| - FSDataInputStream data = fs.open(path); |
515 |
| - data.seek(fileLen - 8); // 4-byte offset + "PAR1" |
516 |
| - long footerLen = BytesUtils.readIntLittleEndian(data); |
| 528 | + long footerLen; |
| 529 | + try (FSDataInputStream data = fs.open(path)) { |
| 530 | + data.seek(fileLen - 8); // 4-byte offset + "PAR1" |
| 531 | + footerLen = BytesUtils.readIntLittleEndian(data); |
| 532 | + } |
517 | 533 | long startFooter = fileLen - footerLen - 8;
|
518 | 534 |
|
519 | 535 | assertEquals("Footer should start after second row group without padding",
|
520 | 536 | secondRowGroupEnds, startFooter);
|
521 |
| - |
522 | 537 | ParquetMetadata readFooter = ParquetFileReader.readFooter(conf, path);
|
523 | 538 | assertEquals("footer: "+ readFooter, 2, readFooter.getBlocks().size());
|
524 | 539 | assertEquals(c1Ends - c1Starts, readFooter.getBlocks().get(0).getColumns().get(0).getTotalSize());
|
@@ -581,6 +596,8 @@ public void testAlignmentWithNoPaddingNeeded() throws Exception {
|
581 | 596 | Configuration conf = getTestConfiguration();
|
582 | 597 | // Disable writing out checksums as hardcoded byte offsets in assertions below expect it
|
583 | 598 | conf.setBoolean(ParquetOutputFormat.PAGE_WRITE_CHECKSUM_ENABLED, false);
|
| 599 | + // close any filesystems to ensure that the the FS used by the writer picks up the configuration |
| 600 | + FileSystem.closeAll(); |
584 | 601 |
|
585 | 602 | // uses the test constructor
|
586 | 603 | ParquetFileWriter w = new ParquetFileWriter(conf, SCHEMA, path, 100, 50);
|
@@ -620,9 +637,11 @@ public void testAlignmentWithNoPaddingNeeded() throws Exception {
|
620 | 637 | FileSystem fs = path.getFileSystem(conf);
|
621 | 638 | long fileLen = fs.getFileStatus(path).getLen();
|
622 | 639 |
|
623 |
| - FSDataInputStream data = fs.open(path); |
624 |
| - data.seek(fileLen - 8); // 4-byte offset + "PAR1" |
625 |
| - long footerLen = BytesUtils.readIntLittleEndian(data); |
| 640 | + long footerLen; |
| 641 | + try (FSDataInputStream data = fs.open(path)) { |
| 642 | + data.seek(fileLen - 8); // 4-byte offset + "PAR1" |
| 643 | + footerLen = BytesUtils.readIntLittleEndian(data); |
| 644 | + } |
626 | 645 | long startFooter = fileLen - footerLen - 8;
|
627 | 646 |
|
628 | 647 | assertEquals("Footer should start after second row group without padding",
|
@@ -855,6 +874,8 @@ public void testWriteReadStatisticsAllNulls() throws Exception {
|
855 | 874 | configuration.setBoolean("parquet.strings.signed-min-max.enabled", true);
|
856 | 875 | GroupWriteSupport.setSchema(schema, configuration);
|
857 | 876 |
|
| 877 | + // close any filesystems to ensure that the the FS used by the writer picks up the configuration |
| 878 | + FileSystem.closeAll(); |
858 | 879 | ParquetWriter<Group> writer = new ParquetWriter<Group>(path, configuration, new GroupWriteSupport());
|
859 | 880 |
|
860 | 881 | Group r1 = new SimpleGroup(schema);
|
|
0 commit comments