Skip to content

Commit

Permalink
[s] Review 2 Squashed
Browse files Browse the repository at this point in the history
  • Loading branch information
psainics committed Jan 22, 2024
1 parent 783ee3c commit cd5f2a0
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 34 deletions.
6 changes: 0 additions & 6 deletions core-plugins/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -279,12 +279,6 @@
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
</dependency>
<dependency>
<groupId>io.cdap.plugin</groupId>
<artifactId>format-xls</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.cdap.plugin</groupId>
<artifactId>format-xls</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

package io.cdap.plugin.format.xls.input;

import com.google.common.base.Preconditions;
import io.cdap.cdap.api.data.format.StructuredRecord;
import io.cdap.cdap.api.data.schema.Schema;
import io.cdap.plugin.format.input.PathTrackingInputFormat;
Expand Down Expand Up @@ -48,7 +47,7 @@
*/
public class XlsInputFormat extends PathTrackingInputFormat {

public static final String SHEET_NO = "Sheet Number";
public static final String SHEET_NUM = "Sheet Number";
public static final String SHEET_VALUE = "sheetValue";
public static final String NAME_SKIP_HEADER = "skipHeader";
public static final String TERMINATE_IF_EMPTY_ROW = "terminateIfEmptyRow";
Expand All @@ -58,10 +57,10 @@ protected RecordReader<NullWritable, StructuredRecord.Builder> createRecordReade
FileSplit split, TaskAttemptContext context, @Nullable String pathField,
@Nullable Schema schema) throws IOException {
Configuration jobConf = context.getConfiguration();
boolean skipFirstRow = jobConf.getBoolean(NAME_SKIP_HEADER, true);
boolean skipFirstRow = jobConf.getBoolean(NAME_SKIP_HEADER, false);
boolean terminateIfEmptyRow = jobConf.getBoolean(TERMINATE_IF_EMPTY_ROW, false);
Schema outputSchema = schema != null ? Schema.parseJson(context.getConfiguration().get("schema")) : null;
String sheet = jobConf.get(SHEET_NO);
Schema outputSchema = schema != null ? Schema.parseJson(context.getConfiguration().get("schema")) : null;
String sheet = jobConf.get(SHEET_NUM);
String sheetValue = jobConf.get(SHEET_VALUE, "0");
return new XlsRecordReader(sheet, sheetValue, outputSchema, terminateIfEmptyRow, skipFirstRow);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import javax.annotation.Nullable;


Expand Down Expand Up @@ -84,12 +83,7 @@ public void validate(FormatContext context) {
if (!conf.containsMacro(XlsInputFormatConfig.NAME_SHEET_VALUE) &&
conf.getSheet().equals(XlsInputFormatConfig.SHEET_NUMBER) &&
!Strings.isNullOrEmpty(conf.getSheetValue())) {
try {
Integer.parseInt(conf.getSheetValue());
} catch (NumberFormatException e) {
collector.addFailure("Sheet number must be a number.", null)
.withConfigProperty(XlsInputFormatConfig.NAME_SHEET_VALUE);
}
getSheetAsNumber(collector);
}
if (!conf.containsMacro(PathTrackingConfig.NAME_SCHEMA) && schema == null && context.getInputSchema() == null) {
collector.addFailure("XLS format cannot be used without specifying a schema.", "Schema must be specified.")
Expand All @@ -99,7 +93,7 @@ public void validate(FormatContext context) {

@Override
protected void addFormatProperties(Map<String, String> properties) {
properties.put(XlsInputFormat.SHEET_NO, conf.getSheet());
properties.put(XlsInputFormat.SHEET_NUM, conf.getSheet());
if (!Strings.isNullOrEmpty(conf.getSheetValue())) {
properties.put(XlsInputFormat.SHEET_VALUE, conf.getSheetValue());
}
Expand All @@ -122,15 +116,9 @@ public Schema detectSchema(FormatContext context, InputFiles inputFiles) throws
Sheet workSheet;
// Check if user wants to access with name or number
if (conf.getSheet() != null && conf.getSheet().equals(XlsInputFormatConfig.SHEET_NUMBER)) {
int sheetValue = 0;
if (!Strings.isNullOrEmpty(conf.getSheetValue())) {
try {
sheetValue = Integer.parseInt(conf.getSheetValue());
} catch (NumberFormatException e) {
failureCollector.addFailure("Sheet number must be a number.", null)
.withConfigProperty(XlsInputFormatConfig.NAME_SHEET_VALUE);
return null;
}
Integer sheetValue = getSheetAsNumber(failureCollector);
if (sheetValue == null) {
return null;
}
workSheet = workbook.getSheetAt(sheetValue);
} else {
Expand Down Expand Up @@ -204,4 +192,20 @@ public Schema detectSchema(FormatContext context, InputFiles inputFiles) throws
return null;
}

private Integer getSheetAsNumber(FailureCollector failureCollector) {
if (!Strings.isNullOrEmpty(conf.getSheetValue())) {
try {
int sheetValue = Integer.parseInt(conf.getSheetValue());
if (sheetValue >= 0) {
return sheetValue;
}
failureCollector.addFailure("Sheet number must be a positive number.", null)
.withConfigProperty(XlsInputFormatConfig.NAME_SHEET_VALUE);
} catch (NumberFormatException e) {
failureCollector.addFailure("Sheet number must be a number.", null)
.withConfigProperty(XlsInputFormatConfig.NAME_SHEET_VALUE);
}
}
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.poi.ss.usermodel.Row;

import java.util.List;
import javax.annotation.Nullable;

/**
* Converts a row of XLS cells to a StructuredRecord.
Expand All @@ -42,19 +43,15 @@ public class XlsRowConverter {
* Converts a row of XLS cells to a StructuredRecord.
* Returns null if the row is null or empty.
*/
@Nullable
public StructuredRecord.Builder convert(Row row, Schema outputSchema) {
if (row == null) {
return null;
}
boolean isRowEmpty = true;
StructuredRecord.Builder builder = StructuredRecord.builder(outputSchema);
List<Schema.Field> fields = outputSchema.getFields();
for (int cellIndex = 0; cellIndex < row.getLastCellNum(); cellIndex++) {
if (cellIndex >= fields.size()) {
throw new IllegalArgumentException(
String.format("Schema contains fewer fields than the number of columns in the excel file. " +
"Schema fields: %s, Excel columns: %s", fields.size(), row.getLastCellNum()));
}
for (int cellIndex = 0; cellIndex < row.getLastCellNum() && cellIndex < fields.size(); cellIndex++) {
Cell cell = row.getCell(cellIndex, Row.MissingCellPolicy.RETURN_BLANK_AS_NULL);
if (cell == null) {
// Blank cells are skipped, builder will set null for the field, no processing needed.
Expand All @@ -75,6 +72,7 @@ public StructuredRecord.Builder convert(Row row, Schema outputSchema) {
cellValue = getCellAsBoolean(cell);
break;
default:
// As we only support string, double and boolean, this should never happen.
throw new IllegalArgumentException(
String.format("Field '%s' is of unsupported type '%s'. Supported types are: %s",
field.getName(), type, "string, double, boolean"));
Expand Down

0 comments on commit cd5f2a0

Please sign in to comment.