Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Format change support partition type change, clustered by #111

Open
wants to merge 34 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions metastor/managedObjectLoader/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
<parent>
<artifactId>metastore</artifactId>
<groupId>org.finra.herd-mdl.metastore</groupId>
<version>1.2.48</version>
<version>1.2.54</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down Expand Up @@ -158,6 +158,18 @@
<artifactId>commons-dbcp2</artifactId>
<version>${common.dpcp.version}</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.11</version>
</dependency>

<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-collections4</artifactId>
<version>4.1</version>
</dependency>

<dependency>
<groupId>com.mitchellbosecke</groupId>
<artifactId>pebble</artifactId>
Expand All @@ -167,11 +179,12 @@
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive.jdbc.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.common.version}</version>
<exclusions>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ public class HerdMetastoreConfig {
public static final String DM_PASS_FILE_PATH = String.format( "%s/dmCreds/dmPass.base64", homeDir );
public static final String CRED_FILE_PATH = "cred.file.path";
public static final String ANALYZE_STATS = "analyze.stats";
public static final int ALTER_TABLE_MAX_PARTITIONS = 6000; //Max partitions that can be dropped at a highest partition level.



Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
import java.util.Map;
import java.util.stream.IntStream;

import static org.finra.herd.metastore.managed.conf.HerdMetastoreConfig.ALTER_TABLE_MAX_PARTITIONS;

/**
* Herd Client
*/
Expand Down Expand Up @@ -137,6 +139,52 @@ public BusinessObjectDataDdl getBusinessObjectDataDdl(org.finra.herd.metastore.m
return businessObjectDataApi.businessObjectDataGenerateBusinessObjectDataDdl(request);
}

/*
Overloaded - Combines Alter Statements for partitions
for both Drop and Add.
ALTER_TABLE_MAX_PARTITIONS set to 6k.
*/
public BusinessObjectDataDdl getBusinessObjectDataDdl(org.finra.herd.metastore.managed.JobDefinition jd, List<String> partitions,boolean combineAlterStmts) throws ApiException {
BusinessObjectDataDdlRequest request = new BusinessObjectDataDdlRequest();

request.setIncludeDropTableStatement(false);
request.setOutputFormat(BusinessObjectDataDdlRequest.OutputFormatEnum.HIVE_13_DDL);
request.setBusinessObjectFormatUsage(jd.getObjectDefinition().getUsageCode());
request.setBusinessObjectFormatFileType(jd.getObjectDefinition().getFileType());
request.setBusinessObjectDefinitionName(jd.getObjectDefinition().getObjectName());
request.setAllowMissingData(true);
request.setIncludeDropPartitions(true);
request.setIncludeIfNotExistsOption(true);
request.setTableName(jd.getTableName());
request.combineMultiplePartitionsInSingleAlterTable(combineAlterStmts);
request.combinedAlterTableMaxPartitions(ALTER_TABLE_MAX_PARTITIONS);

List<PartitionValueFilter> partitionValueFilters = Lists.newArrayList();

log.info("PartitionKey: {} \t Partitions: {}", jd.getPartitionKey(), partitions);
if (MetastoreUtil.isPartitionedSingleton(jd.getWfType(), jd.getPartitionKey())) {
addPartitionedSingletonFilter(jd, partitionValueFilters);
} else {

if (MetastoreUtil.isNonPartitionedSingleton(jd.getWfType(), jd.getPartitionKey())) {
addPartitionFilter(jd.getPartitionKey(), Lists.newArrayList("none"), partitionValueFilters);
} else {
if (jd.isSubPartitionLevelProcessing()) {
addSubPartitionFilter(jd, partitionValueFilters);
} else {
addPartitionFilter(jd.getPartitionKey(), partitions, partitionValueFilters);
}
}
}

request.setPartitionValueFilter(null);
request.setPartitionValueFilters(partitionValueFilters);
request.setNamespace(jd.getObjectDefinition().getNameSpace());

log.info("Get BO DDL Request with combine Alter Statements: \n{}", request.toString());
return businessObjectDataApi.businessObjectDataGenerateBusinessObjectDataDdl(request);
}

private void addPartitionFilter(String partitionKey, List<String> partitions, List<PartitionValueFilter> partitionValueFilters) {
PartitionValueFilter filter = new PartitionValueFilter();
filter.setPartitionKey(partitionKey);
Expand Down Expand Up @@ -233,6 +281,7 @@ public BusinessObjectDataSearchResult searchBOData(JobDefinition jd, int pageNum
boDataSearchKeyItem.setBusinessObjectFormatFileType(jd.getObjectDefinition().getFileType());
boDataSearchKeyItem.setFilterOnLatestValidVersion(filterOnValidLatestVersions);

log.info("BusinessObjectDataSearchKey,pageNum,pageSize :{} ,{} ,{}",boDataSearchKeyItem,pageNum,pageSize);

// Search BO Data
return businessObjectDataApi.businessObjectDataSearchBusinessObjectData(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package org.finra.herd.metastore.managed.hive;

import com.google.common.collect.Lists;
import lombok.*;
import org.springframework.stereotype.Component;

import java.util.List;

@Builder
@Setter
@Getter
@NoArgsConstructor
@AllArgsConstructor
@Component
public class ClusteredDef {
private String clusterSql;
private List<ColumnDef> clusteredSortedColDefs = Lists.newArrayList();
private List<String> clusterCols = Lists.newArrayList();
private List<String> sortedCols = Lists.newArrayList();
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,35 @@ public class FormatChange {

List<Pair<ColumnDef, ColumnDef>> nameChanges = Lists.newArrayList();
List<Pair<ColumnDef, ColumnDef>> typeChanges = Lists.newArrayList();
List<Pair<ColumnDef,ColumnDef>> partitionColNameChanges = Lists.newArrayList();
List<Pair<ColumnDef,ColumnDef>> partitionColTypeChanges = Lists.newArrayList();
List<ColumnDef> newColumns = Lists.newArrayList();
boolean isClusteredSortedChange = false;
ClusteredDef clusteredDef;

boolean partitonColumnChanged = false;
boolean escapeStrChanged = false;
boolean nullStrChanged = false;
boolean delimChanged = false;
boolean partitonColumnChanged = false;

public boolean hasChange()
{
return hasColumnChanges()||partitonColumnChanged;
return hasColumnChanges()|| hasPartitionColumnChanges() || isClusteredSortedChange;
}

public boolean hasColumnChanges()
{
return (! (nameChanges.isEmpty() && typeChanges.isEmpty() && newColumns.isEmpty()));
return ((nameChanges!=null && !nameChanges.isEmpty()) || (typeChanges!=null && !typeChanges.isEmpty()) || (newColumns!=null && !newColumns.isEmpty()));

}

public boolean hasPartitionColumnChanges ()
{

partitonColumnChanged = (partitionColTypeChanges!=null && !partitionColTypeChanges.isEmpty());
return partitonColumnChanged;
}



}
Loading