Skip to content

Commit

Permalink
[s] using key NullWritable with StructuredRecord value
Browse files Browse the repository at this point in the history
  • Loading branch information
psainics committed Jan 9, 2024
1 parent 2f35ce9 commit 0d8ecb1
Showing 1 changed file with 6 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
Expand All @@ -47,27 +48,25 @@
* The {@link XlsInputFormat.XlsRecordReader} reads a given sheet, and within a sheet reads
* all columns and all rows.
*/
public class XlsInputFormat extends CombineFileInputFormat<LongWritable, StructuredRecord> {
public class XlsInputFormat extends CombineFileInputFormat<NullWritable, StructuredRecord> {

public static final String SHEET_NO = "Sheet Number";
public static final String SHEET_VALUE = "sheetValue";
public static final String NAME_SKIP_HEADER = "skipHeader";
public static final String TERMINATE_IF_EMPTY_ROW = "terminateIfEmptyRow";

@Override
public RecordReader<LongWritable, StructuredRecord> createRecordReader(InputSplit split, TaskAttemptContext context) {
public RecordReader<NullWritable, StructuredRecord> createRecordReader(InputSplit split, TaskAttemptContext context) {
return new XlsRecordReader();
}

/**
* Reads excel spread sheet, where the keys are the offset in the excel file and the text is the complete record.
*/
public static class XlsRecordReader extends RecordReader<LongWritable, StructuredRecord> {
public static class XlsRecordReader extends RecordReader<NullWritable, StructuredRecord> {
// DataFormatter to format and get each cell's value as String
XlsDataFormatter formatter;
FormulaEvaluator formulaEvaluator;
// Map key that represents the row index.
private LongWritable key;
// Map value that represents an excel row
private StructuredRecord value;
private Sheet workSheet;
Expand Down Expand Up @@ -141,7 +140,6 @@ public boolean nextKeyValue() {
return true;
}
}
key = new LongWritable(rowIndex);

StructuredRecord.Builder builder = StructuredRecord.builder(outputSchema);
List<Schema.Field> fields = outputSchema.getFields();
Expand Down Expand Up @@ -188,8 +186,8 @@ public void close() throws IOException {
}

@Override
public LongWritable getCurrentKey() {
return key;
public NullWritable getCurrentKey() {
return NullWritable.get();
}

@Override
Expand Down

0 comments on commit 0d8ecb1

Please sign in to comment.