Skip to content

Commit

Permalink
HIVE-27995: Fix inconsistent behavior of LOAD DATA command for partit…
Browse files Browse the repository at this point in the history
…oned and non-partitioned tables
  • Loading branch information
shivjha30 committed Jul 5, 2024
1 parent 1a969f6 commit 863c2d0
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3422,4 +3422,26 @@ public void testHeaderFooterNonTextFiles() throws Exception {
stmt.close();
}
}
/**
* These test methods validate the error handling when attempting to load data from a non-existent file into different types of Hive tables.
* Each test creates a specific type of table ( dynamically partitioned, or bucketed), and then attempts to load data from a non-existent file.
* The tests pass if an exception is thrown with a message containing the string "Invalid path".
*/
@Test
public void testLoadDataNegativeForDynamicPartition() throws Exception {
HiveStatement stmt = (HiveStatement) con.createStatement();
try {
stmt.execute("drop table if exists T");
stmt.execute("create table T (a int, b int) partitioned by (p int) stored as orc");
try {
stmt.execute("load data local inpath '/path/to/nonexistent/file.txt' into table T");
fail("Expected an exception to be thrown");
} catch (Exception e) {
assertTrue("load data inpath",
e.getMessage() != null && e.getMessage().contains("Invalid path"));
}
} finally {
stmt.close();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,12 @@ private URI initializeFromURI(String fromPath, boolean isLocal)
return new URI(fromScheme, fromAuthority, path, null, null);
}

private List<FileStatus> applyConstraintsAndGetFiles(URI fromURI, Table table) throws SemanticException {

/**
* Verifies the file path existence. If the path is a directory, then it is
* verified to not be a glob pattern. If the path is a file, then it is
* verified to be a valid file name.
*/
private FileStatus[] verifyFilePathExists(URI fromURI) throws SemanticException {
FileStatus[] srcs = null;

// local mode implies that scheme should be "file"
Expand All @@ -172,7 +176,6 @@ private List<FileStatus> applyConstraintsAndGetFiles(URI fromURI, Table table) t
ErrorMsg.ILLEGAL_PATH.getMsg(), fromTree,
"Source file system should be \"file\" if \"local\" is specified"));
}

try {
FileSystem fileSystem = FileSystem.get(fromURI, conf);
srcs = matchFilesOrDir(fileSystem, new Path(fromURI));
Expand All @@ -181,13 +184,22 @@ private List<FileStatus> applyConstraintsAndGetFiles(URI fromURI, Table table) t
ErrorMsg.INVALID_PATH.getMsg(), fromTree,
"No files matching path " + fromURI));
}

} catch (IOException e) {
throw new SemanticException(ASTErrorUtils.getMsg(
ErrorMsg.INVALID_PATH.getMsg(), fromTree), e);
}
return srcs;
}
private List<FileStatus> applyConstraintsAndGetFiles(URI fromURI, Table table, FileStatus[] srcs)
throws SemanticException {
try {
for (FileStatus oneSrc : srcs) {
if (oneSrc.isDir()) {
reparseAndSuperAnalyze(table, fromURI);
return null;
}
}
FileSystem fileSystem = FileSystem.get(fromURI, conf);
AcidUtils.validateAcidFiles(table, srcs, fileSystem);
// Do another loop if table is bucketed
List<String> bucketCols = table.getBucketCols();
Expand Down Expand Up @@ -290,6 +302,8 @@ private void analyzeLoad(ASTNode ast) throws SemanticException {
throw new SemanticException(ASTErrorUtils.getMsg(
ErrorMsg.INVALID_PATH.getMsg(), fromTree, e.getMessage()), e);
}
// validate the arguments
FileStatus[] srcs = verifyFilePathExists(fromURI);

// initialize destination table/partition
TableSpec ts = new TableSpec(db, conf, (ASTNode) tableTree);
Expand Down Expand Up @@ -344,7 +358,7 @@ private void analyzeLoad(ASTNode ast) throws SemanticException {
}

// make sure the arguments make sense
List<FileStatus> files = applyConstraintsAndGetFiles(fromURI, ts.tableHandle);
List<FileStatus> files = applyConstraintsAndGetFiles(fromURI, ts.tableHandle, srcs);
if (queryReWritten) {
return;
}
Expand Down
12 changes: 6 additions & 6 deletions ql/src/test/org/apache/hadoop/hive/ql/TestTxnLoadData.java
Original file line number Diff line number Diff line change
Expand Up @@ -394,16 +394,16 @@ public void loadDataPartitioned() throws Exception {
*/
@Test
public void testValidations() throws Exception {
runStatementOnDriver("drop table if exists T");
runStatementOnDriver("drop table if exists Tstage");
runStatementOnDriver("create table T (a int, b int) clustered by (a) into 2 buckets stored as orc tblproperties('transactional'='true')");
runStatementOnDriver("drop table if exists t");
runStatementOnDriver("drop table if exists tstage");
runStatementOnDriver("create table t (a int, b int) clustered by (a) into 2 buckets stored as orc tblproperties('transactional'='true')");
File createdFile= folder.newFile("myfile.txt");
FileUtils.writeStringToFile(createdFile, "hello world");
runStatementOnDriver("create table Tstage (a int, b int) stored as orc tblproperties('transactional'='false')");
runStatementOnDriver("create table tstage (a int, b int) stored as orc tblproperties('transactional'='false')");
//this creates an ORC data file with correct schema under table root
runStatementOnDriver("insert into Tstage values(1,2),(3,4)");
runStatementOnDriver("insert into tstage values(1,2),(3,4)");
// This will work with the new support of rewriting load into IAS.
runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/Tstage' into table T");
runStatementOnDriver("load data local inpath '" + getWarehouseDir() + "/tstage' into table t");
}

private void checkExpected(List<String> rs, String[][] expected, String msg) {
Expand Down
6 changes: 6 additions & 0 deletions ql/src/test/queries/clientnegative/load_data_partition.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
drop table if exists validate_load_data;
CREATE TABLE validate_load_data(key int, value string) partitioned by (hr int) STORED AS TEXTFILE;
LOAD DATA INPATH 'hdfs:///validateload/filedoesnt.txt' INTO TABLE validate_load_data;
SELECT * FROM validate_load_data;
DROP TABLE validate_load_data;
``````
2 changes: 1 addition & 1 deletion ql/src/test/queries/clientnegative/load_wrong_noof_part.q
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@

CREATE TABLE loadpart1(a STRING, b STRING) PARTITIONED BY (ds STRING,ds1 STRING);
LOAD DATA LOCAL INPATH '../../data1/files/kv1.txt' INTO TABLE loadpart1 PARTITION(ds='2009-05-05');
LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE loadpart1 PARTITION(ds='2009-05-05');
15 changes: 15 additions & 0 deletions ql/src/test/results/clientnegative/load_data_partition.q.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
PREHOOK: query: drop table if exists validate_load_data
PREHOOK: type: DROPTABLE
PREHOOK: Output: database:default
POSTHOOK: query: drop table if exists validate_load_data
POSTHOOK: type: DROPTABLE
POSTHOOK: Output: database:default
PREHOOK: query: CREATE TABLE validate_load_data(key int, value string) partitioned by (hr int) STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@validate_load_data
POSTHOOK: query: CREATE TABLE validate_load_data(key int, value string) partitioned by (hr int) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@validate_load_data
FAILED: SemanticException Line 2:17 Invalid path ''hdfs://### HDFS PATH ###''
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ POSTHOOK: query: CREATE TABLE loadpart1(a STRING, b STRING) PARTITIONED BY (ds S
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@loadpart1
FAILED: SemanticException [Error 10006]: Line 2:82 Partition not found ''2009-05-05''
FAILED: SemanticException [Error 10006]: Line 2:81 Partition not found ''2009-05-05''

0 comments on commit 863c2d0

Please sign in to comment.