Skip to content

Commit

Permalink
HIVE-28292: Optimize SHOW TABLES/VIEWS statements (Wechar Yu, reviewe…
Browse files Browse the repository at this point in the history
…d by Denys Kuzmenko)

Closes #5273
  • Loading branch information
wecharyu authored Sep 16, 2024
1 parent 24fa2c2 commit 6b361ac
Show file tree
Hide file tree
Showing 23 changed files with 142 additions and 125 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@
package org.apache.hadoop.hive.ql.ddl.table.info.show.tables;

import com.google.common.collect.ImmutableMap;

import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.ddl.ShowUtils;
import org.apache.hadoop.hive.ql.ddl.ShowUtils.TextMetaDataTable;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.metadata.formatting.MapBuilder;
import org.apache.hadoop.hive.ql.metadata.formatting.MetaDataFormatUtils;
import org.apache.hadoop.hive.ql.session.SessionState;
Expand All @@ -36,6 +36,7 @@
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

/**
* Formats SHOW TABLES results.
Expand All @@ -51,7 +52,7 @@ public static ShowTablesFormatter getFormatter(HiveConf conf) {

public abstract void showTables(DataOutputStream out, List<String> tables) throws HiveException;

abstract void showTablesExtended(DataOutputStream out, List<Table> tables) throws HiveException;
abstract void showTablesExtended(DataOutputStream out, Map<String, String> tableNameToType) throws HiveException;

// ------ Implementations ------

Expand All @@ -62,16 +63,16 @@ public void showTables(DataOutputStream out, List<String> tables) throws HiveExc
}

@Override
void showTablesExtended(DataOutputStream out, List<Table> tables) throws HiveException {
if (tables.isEmpty()) {
void showTablesExtended(DataOutputStream out, Map<String, String> tableNameToType) throws HiveException {
if (tableNameToType.isEmpty()) {
return;
}

List<Map<String, Object>> tableDataList = new ArrayList<>();
for (Table table : tables) {
for (Map.Entry<String, String> table : tableNameToType.entrySet()) {
Map<String, Object> tableData = ImmutableMap.of(
"Table Name", table.getTableName(),
"Table Type", table.getTableType().toString());
"Table Name", table.getKey(),
"Table Type", table.getValue());
tableDataList.add(tableData);
}

Expand All @@ -96,8 +97,8 @@ public void showTables(DataOutputStream out, List<String> tables) throws HiveExc
}

@Override
void showTablesExtended(DataOutputStream out, List<Table> tables) throws HiveException {
if (tables.isEmpty()) {
void showTablesExtended(DataOutputStream out, Map<String, String> tableNameToType) throws HiveException {
if (tableNameToType.isEmpty()) {
return;
}

Expand All @@ -106,8 +107,8 @@ void showTablesExtended(DataOutputStream out, List<Table> tables) throws HiveExc
if (!SessionState.get().isHiveServerQuery()) {
mdt.addRow("# Table Name", "Table Type");
}
for (Table table : tables) {
mdt.addRow(table.getTableName(), table.getTableType().toString());
for (Map.Entry<String, String> table : tableNameToType.entrySet()) {
mdt.addRow(table.getKey(), table.getValue());
}
// In case the query is served by HiveServer2, don't pad it with spaces,
// as HiveServer2 output is consumed by JDBC/ODBC clients.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,15 @@
import java.io.DataOutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.Map;
import java.util.TreeMap;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.ddl.DDLOperation;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.udf.UDFLike;

/**
Expand All @@ -60,12 +59,9 @@ public int execute() throws HiveException {
}

private void showTables() throws HiveException {
String pattern = UDFLike.likePatternToRegExp(desc.getPattern(), false, true);
List<String> tableNames = new ArrayList<>(
context.getDb().getTablesByType(desc.getDbName(), null, desc.getTypeFilter()));
if (desc.getPattern() != null) {
Pattern pattern = Pattern.compile(UDFLike.likePatternToRegExp(desc.getPattern()), Pattern.CASE_INSENSITIVE);
tableNames = tableNames.stream().filter(name -> pattern.matcher(name).matches()).collect(Collectors.toList());
}
context.getDb().getTablesByType(desc.getDbName(), pattern, desc.getTypeFilter()));
Collections.sort(tableNames);
LOG.debug("Found {} table(s) matching the SHOW TABLES statement.", tableNames.size());

Expand All @@ -78,20 +74,19 @@ private void showTables() throws HiveException {
}

private void showTablesExtended() throws HiveException {
List<Table> tableObjects = new ArrayList<>();
tableObjects.addAll(context.getDb().getTableObjects(desc.getDbName(), null, desc.getTypeFilter()));
if (desc.getPattern() != null) {
Pattern pattern = Pattern.compile(UDFLike.likePatternToRegExp(desc.getPattern()), Pattern.CASE_INSENSITIVE);
tableObjects = tableObjects.stream()
.filter(object -> pattern.matcher(object.getTableName()).matches())
.collect(Collectors.toList());
Map<String, String> tableNameToType = new TreeMap<>();
String pattern = UDFLike.likePatternToRegExp(desc.getPattern(), false, true);
TableType typeFilter = desc.getTypeFilter();
TableType[] tableTypes = typeFilter == null ? TableType.values() : new TableType[]{typeFilter};
for (TableType tableType : tableTypes) {
List<String> tables = context.getDb().getTablesByType(desc.getDbName(), pattern, tableType);
tables.forEach(name -> tableNameToType.put(name, tableType.toString()));
}
Collections.sort(tableObjects, Comparator.comparing(Table::getTableName));
LOG.debug("Found {} table(s) matching the SHOW EXTENDED TABLES statement.", tableObjects.size());
LOG.debug("Found {} table(s) matching the SHOW EXTENDED TABLES statement.", tableNameToType.size());

try (DataOutputStream os = ShowUtils.getOutputStream(new Path(desc.getResFile()), context)) {
ShowTablesFormatter formatter = ShowTablesFormatter.getFormatter(context.getConf());
formatter.showTablesExtended(os, tableObjects);
formatter.showTablesExtended(os, tableNameToType);
} catch (Exception e) {
throw new HiveException(e, ErrorMsg.GENERIC_ERROR, "in database " + desc.getDbName());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
import java.io.DataOutputStream;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.metastore.TableType;
Expand All @@ -48,13 +46,8 @@ public int execute() throws HiveException {
throw new HiveException(ErrorMsg.DATABASE_NOT_EXISTS, desc.getDbName());
}

List<String> viewNames = context.getDb().getTablesByType(desc.getDbName(), null, TableType.VIRTUAL_VIEW);
if (desc.getPattern() != null) {
Pattern pattern = Pattern.compile(UDFLike.likePatternToRegExp(desc.getPattern()), Pattern.CASE_INSENSITIVE);
viewNames = viewNames.stream()
.filter(name -> pattern.matcher(name).matches())
.collect(Collectors.toList());
}
String pattern = UDFLike.likePatternToRegExp(desc.getPattern(), false, true);
List<String> viewNames = context.getDb().getTablesByType(desc.getDbName(), pattern, TableType.VIRTUAL_VIEW);
Collections.sort(viewNames);
LOG.debug("Found {} view(s) matching the SHOW VIEWS statement.", viewNames.size());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ public List<String> getTables(String dbName, String tablePattern) throws MetaExc
if (tables == null || tables.size() == 0) {
return tableNames;
}
tablePattern = tablePattern.replaceAll("\\*", ".*");
tablePattern = tablePattern.replaceAll("(?<!\\.)\\*", ".*");
Pattern pattern = Pattern.compile(tablePattern);
Matcher matcher = pattern.matcher("");
Set<String> combinedTableNames = new HashSet<String>();
Expand Down Expand Up @@ -351,7 +351,7 @@ public List<String> getTables(String dbname, String tablePattern, TableType tabl
if (tables == null || tables.size() == 0) {
return tableNames;
}
tablePattern = tablePattern.replaceAll("\\*", ".*");
tablePattern = tablePattern.replaceAll("(?<!\\.)\\*", ".*");
Pattern pattern = Pattern.compile(tablePattern);
Matcher matcher = pattern.matcher("");
Set<String> combinedTableNames = new HashSet<String>();
Expand Down
11 changes: 9 additions & 2 deletions ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLike.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ public UDFLike() {
}

public static String likePatternToRegExp(String likePattern) {
return likePatternToRegExp(likePattern, true, false);
}

public static String likePatternToRegExp(String likePattern, boolean literalize, boolean greedyMatch) {
if (likePattern == null) {
return null;
}
StringBuilder sb = new StringBuilder();
for (int i = 0; i < likePattern.length(); i++) {
// Make a special case for "\\_" and "\\%"
Expand All @@ -77,9 +84,9 @@ public static String likePatternToRegExp(String likePattern) {
if (n == '_') {
sb.append(".");
} else if (n == '%') {
sb.append(".*?");
sb.append(greedyMatch ? ".*" : ".*?");
} else {
sb.append(Pattern.quote(Character.toString(n)));
sb.append(literalize ? Pattern.quote(Character.toString(n)) : n);
}
}
return sb.toString();
Expand Down
2 changes: 2 additions & 0 deletions ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHive.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
Expand Down Expand Up @@ -442,6 +443,7 @@ public void testGetAndDropTables() throws Throwable {
hm.createTable(tbl2);

List<String> fts = hm.getTablesForDb(dbName, ".*");
Collections.sort(fts);
assertEquals(ts, fts);
assertEquals(2, fts.size());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ PREHOOK: query: SHOW TABLES IN test_auth_obj_db
PREHOOK: type: SHOWTABLES
PREHOOK: Input: database:test_auth_obj_db
filterListCmdObjects
HIVE PRIVILEGE OBJECT { objectName: test_privs type: TABLE_OR_VIEW actionType: OTHER dbName: test_auth_obj_db OWNER: testuser OWNERTYPE: USER}
HIVE PRIVILEGE OBJECT { objectName: test_privs2 type: TABLE_OR_VIEW actionType: OTHER dbName: test_auth_obj_db OWNER: testuser2 OWNERTYPE: USER}
HIVE PRIVILEGE OBJECT { objectName: test_privs type: TABLE_OR_VIEW actionType: OTHER dbName: test_auth_obj_db}
HIVE PRIVILEGE OBJECT { objectName: test_privs2 type: TABLE_OR_VIEW actionType: OTHER dbName: test_auth_obj_db}
POSTHOOK: query: SHOW TABLES IN test_auth_obj_db
POSTHOOK: type: SHOWTABLES
POSTHOOK: Input: database:test_auth_obj_db
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3015,26 +3015,44 @@ public List<String> getTables(String dbname, String tablePattern) throws MetaExc
@Override
public List<String> getTables(String catName, String dbName, String tablePattern)
throws TException {
List<String> tables = new ArrayList<>();
GetProjectionsSpec projectionsSpec = new GetProjectionsSpec();
projectionsSpec.setFieldList(Arrays.asList("dbName", "tableName", "owner", "ownerType"));
GetTablesRequest req = new GetTablesRequest(dbName);
req.setCatName(catName);
req.setCapabilities(version);
req.setTblNames(null);
if(tablePattern == null){
if (tablePattern == null) {
tablePattern = ".*";
}
req.setTablesPattern(tablePattern);
if (processorCapabilities != null)
req.setProcessorCapabilities(new ArrayList<String>(Arrays.asList(processorCapabilities)));
if (processorIdentifier != null)
req.setProcessorIdentifier(processorIdentifier);
req.setProjectionSpec(projectionsSpec);
List<Table> tableObjects = client.get_table_objects_by_name_req(req).getTables();
tableObjects = deepCopyTables(FilterUtils.filterTablesIfEnabled(isClientFilterEnabled, filterHook, tableObjects));
for (Table tbl : tableObjects) {
tables.add(tbl.getTableName());
List<String> tables = new ArrayList<>();
Database db = null;
try {
db = getDatabase(catName, dbName);
} catch (NoSuchObjectException e) { /* appears exception is not thrown currently if db doesnt exist */ }

if (MetaStoreUtils.isDatabaseRemote(db)) {
// TODO: remote database does not support list table names by pattern yet.
// This branch can be removed once it's supported.
GetProjectionsSpec projectionsSpec = new GetProjectionsSpec();
projectionsSpec.setFieldList(Arrays.asList("dbName", "tableName", "owner", "ownerType"));
GetTablesRequest req = new GetTablesRequest(dbName);
req.setCatName(catName);
req.setCapabilities(version);
req.setTblNames(null);
req.setTablesPattern(tablePattern);
if (processorCapabilities != null)
req.setProcessorCapabilities(Arrays.asList(processorCapabilities));
if (processorIdentifier != null)
req.setProcessorIdentifier(processorIdentifier);
req.setProjectionSpec(projectionsSpec);
List<Table> tableObjects = client.get_table_objects_by_name_req(req).getTables();
tableObjects = deepCopyTables(FilterUtils.filterTablesIfEnabled(isClientFilterEnabled, filterHook, tableObjects));
for (Table tbl : tableObjects) {
tables.add(tbl.getTableName());
}
} else {
// This trick handles pattern for both string regex and wildcards ('*' and '|').
// We need unify the pattern definition, see HIVE-28297 for details.
String[] patterns = tablePattern.split("\\|");
for (String pattern : patterns) {
pattern = "(?i)" + pattern.replaceAll("(?<!\\.)\\*", ".*");
String filter = String.format("%s like \"%s\"", hive_metastoreConstants.HIVE_FILTER_FIELD_TABLE_NAME, pattern);
tables.addAll(listTableNamesByFilter(catName, dbName, filter, (short) -1));
}
}
return tables;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.DatabaseType;
import org.apache.hadoop.hive.metastore.api.WMPoolSchedulingPolicy;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
Expand Down Expand Up @@ -1335,4 +1336,8 @@ public static String getHttpPath(String httpPath) {
}
return httpPath;
}

public static boolean isDatabaseRemote(Database db) {
return db != null && db.getType() == DatabaseType.REMOTE;
}
}
Loading

0 comments on commit 6b361ac

Please sign in to comment.