Skip to content

Commit f49e3db

Browse files
committed
Metastore Direct Process to cleanup partition/fs discrepancies. #44
1 parent 0be41f3 commit f49e3db

File tree

11 files changed

+112
-10
lines changed

11 files changed

+112
-10
lines changed

bin/setup.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ rm -f $BASE_DIR/bin/*
4545

4646
cp -f hive-sre $BASE_DIR/bin
4747
cp -f hive-sre-cli $BASE_DIR/bin
48+
cp -f u3/get_part_ids.sh $BASE_DIR/bin
49+
cp -f u3/mysql_missing_parts.sh $BASE_DIR/bin
4850

4951
if [ -f hive-sre-shaded.jar ]; then
5052
cp -f hive-sre-shaded.jar $BASE_DIR/lib
@@ -57,15 +59,21 @@ fi
5759
chmod -R +r $BASE_DIR
5860
chmod +x $BASE_DIR/bin/hive-sre
5961
chmod +x $BASE_DIR/bin/hive-sre-cli
62+
chmod +x $BASE_DIR/bin/get_part_ids.sh
63+
chmod +x $BASE_DIR/bin/mysql_missing_parts.sh
6064

6165
if (( $EUID == 0 )); then
6266
echo "Setting up global links"
6367
ln -sf $BASE_DIR/bin/hive-sre /usr/local/bin/hive-sre
6468
ln -sf $BASE_DIR/bin/hive-sre-cli /usr/local/bin/hive-sre-cli
69+
ln -sf $BASE_DIR/bin/get_part_ids.sh /usr/local/bin/get_part_ids.sh
70+
ln -sf $BASE_DIR/bin/mysql_missing_parts.sh /usr/local/bin/mysql_missing_parts.sh
6571
else
6672
mkdir -p $HOME/bin
6773
ln -sf $BASE_DIR/bin/hive-sre $HOME/bin/hive-sre
6874
ln -sf $BASE_DIR/bin/hive-sre-cli $HOME/bin/hive-sre-cli
75+
ln -sf $BASE_DIR/bin/get_part_ids.sh $HOME/bin/get_part_ids.sh
76+
ln -sf $BASE_DIR/bin/mysql_missing_parts.sh $HOME/bin/mysql_missing_parts.sh
6977
echo "Executable in $HOME/bin . Add this to the environment path."
7078
fi
7179

dependency-reduced-pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
<groupId>com.cloudera.utils.hive</groupId>
55
<artifactId>hive-sre</artifactId>
66
<name>hive-sre</name>
7-
<version>3.0.0.0</version>
7+
<version>3.0.1.0</version>
88
<url>https://github.com/cloudera-labs/hive-sre</url>
99
<build>
1010
<finalName>${project.artifactId}</finalName>

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
<groupId>com.cloudera.utils.hive</groupId>
2424
<artifactId>hive-sre</artifactId>
25-
<version>3.0.0.0</version>
25+
<version>3.0.1.0</version>
2626

2727
<name>hive-sre</name>
2828

src/main/assembly/assembly.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,15 @@
3939
<outputDirectory>hive-sre-install</outputDirectory>
4040
<useDefaultExcludes>true</useDefaultExcludes>
4141
</fileSet>
42+
<fileSet>
43+
<directory>${project.basedir}/src/main/bash</directory>
44+
<includes>
45+
<include>get_part_ids.sh</include>
46+
<include>mysql_missing_parts.sh</include>
47+
</includes>
48+
<outputDirectory>hive-sre-install/u3</outputDirectory>
49+
<useDefaultExcludes>true</useDefaultExcludes>
50+
</fileSet>
4251
<fileSet>
4352
<directory>${project.basedir}/configs</directory>
4453
<includes>

src/main/bash/get_part_ids.sh

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#! /bin/bash
2+
PARTIDS=()
3+
4+
# Parse the loc missing dirs file for the partition id.
5+
# Build a list of id's that can be added to an 'in' clause
6+
# in SQL to modify the need tables.
7+
8+
while IFS="|" read -r one two three four five six
9+
do
10+
nospaces=${six// } # remove leading spaces
11+
12+
re='^[0-9]+$'
13+
if [[ $nospaces =~ $re ]] ; then
14+
# echo "$nospaces"
15+
PARTIDS+=("$nospaces")
16+
fi
17+
done < <(grep ^\| $1)
18+
19+
arraylength=${#PARTIDS[@]}
20+
#echo "Total Partition Ids: $arraylength"
21+
22+
# Set the initial item.
23+
PARTS_LIST=${PARTIDS[0]}
24+
for (( i=1; i<${arraylength}; i++ ));
25+
do
26+
# New Line every 10000 records.
27+
if [ `expr $i % 10000` -eq 0 ];
28+
then
29+
# Print the list so far.
30+
echo "${PARTS_LIST}" >> $2
31+
# Reset the list
32+
PARTS_LIST=${PARTIDS[$i]}
33+
else
34+
PARTS_LIST="$PARTS_LIST,${PARTIDS[$i]}"
35+
fi
36+
done
37+
38+
# Print the list
39+
echo "${PARTS_LIST}" >> $2
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#! /bin/bash
2+
3+
# Build Mysql Statements for Metastore to remove partitions that didn't have directories in HDFS.
4+
5+
TMPDIR=$(mktemp -d)
6+
7+
echo "Temp Dir: $TMPDIR"
8+
9+
$(dirname $0)/get_part_ids.sh $1 $TMPDIR/part_ids.txt
10+
11+
while IFS="|" read -r line
12+
do
13+
echo "Line: $line"
14+
# PART_COL_PRIVS
15+
echo "DELETE FROM PART_COL_PRIVS WHERE PART_ID IN ($line);" >> mysql_missing_parts.sql
16+
# PART_COL_STATS
17+
echo "DELETE FROM PART_COL_STATS WHERE PART_ID IN ($line);" >> mysql_missing_parts.sql
18+
# PART_PRIVS
19+
echo "DELETE FROM PART_PRIVS WHERE PART_ID IN ($line);" >> mysql_missing_parts.sql
20+
# PARTITION_KEY_VALS
21+
echo "DELETE FROM PARTITION_KEY_VALS WHERE PART_ID IN ($line);" >> mysql_missing_parts.sql
22+
# PARTITION_PARAMS
23+
echo "DELETE FROM PARTITION_PARAMS WHERE PART_ID IN ($line);" >> mysql_missing_parts.sql
24+
# PARTITIONS
25+
echo "DELETE FROM PARTITIONS WHERE PART_ID IN ($line);" >> mysql_missing_parts.sql
26+
done <$TMPDIR/part_ids.txt

src/main/resources/u3/MYSQL/hive_u3_queries.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ query_definitions:
4141
WHEN PS.LOCATION IS NULL
4242
THEN S.LOCATION
4343
ELSE PS.LOCATION
44-
END AS PATH_LOCATION
44+
END AS PATH_LOCATION,
45+
P.PART_ID
4546
FROM
4647
DBS D
4748
INNER JOIN

src/main/resources/u3/ORACLE/hive_u3_queries.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ query_definitions:
4141
WHEN PS.LOCATION IS NULL
4242
THEN S.LOCATION
4343
ELSE PS.LOCATION
44-
END AS PATH_LOCATION
44+
END AS PATH_LOCATION,
45+
P.PART_ID
4546
FROM
4647
DBS D
4748
INNER JOIN

src/main/resources/u3/POSTGRES/hive_u3_queries.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ query_definitions:
3838
WHEN \"PS\".\"LOCATION\" IS NULL
3939
THEN \"S\".\"LOCATION\"
4040
ELSE \"PS\".\"LOCATION\"
41-
END AS \"PATH_LOCATION\"
41+
END AS \"PATH_LOCATION\",
42+
\"P\".\"PART_ID\",
4243
FROM
4344
\"DBS\" \"D\"
4445
INNER JOIN

src/main/resources/u3/proc/hive_u3_procs.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ processes:
1919
displayName: "Locations Scan"
2020
queryDefinitionReference: "/hive_u3_queries.yaml"
2121
dbListingQuery: "db_tbl_count"
22-
listingColumns: [ "name" ,"tbl_name" , "tbl_type" ,"part_name" , "path_location" ]
22+
listingColumns: [ "name" ,"tbl_name" , "tbl_type" ,"part_name" , "path_location", "part_id" ]
2323
pathsListingQuery: "tbl_part_locations"
2424
commandChecks:
2525
- displayName: "Hive 3 Upgrade Check - Missing Directory Locations Scan"
26-
title: "# Hive 3 Upgrade Check - Missing Direcotories (v.${Implementation-Version})\n\n## Missing Directory Locations Scan"
26+
title: "# Hive 3 Upgrade Check - Missing Directories (v.${Implementation-Version})\n\n## Missing Directory Locations Scan"
2727
note: "
2828
*Remediation Options*\n
2929
_NOTE:_ Beware of \"Smart Quote\" AND other conversions via Markdown Renderers. Copy/paste from 'raw' text to ensure compatibility.\n\n
@@ -39,7 +39,7 @@ processes:
3939
always a good practice.\n\n
4040
This process should be run frequently, up to the point of the upgrade, to ensure all items have been addressed. Before
4141
the upgrade can succeed, this report should be *empty*.\n\n"
42-
header: "| DB.Table:Partition Spec | Hive SQL (recommended) | HDFS | Hive MSCK (Post Upgrade) |\n|:---|:---|:---|:---|"
42+
header: "| DB.Table:Partition Spec | Hive SQL (recommended) | HDFS | Hive MSCK (Post Upgrade) | PART ID |\n|:---|:---|:---|:---|:---|"
4343
errorDescription: "Scan Actions"
4444
successDescription: "Issues"
4545
errorFilename: loc_scan_missing_dirs.md
@@ -54,8 +54,8 @@ processes:
5454
partitionCheck:
5555
RECORDS:
5656
test: "if (\"%4$s\".equals(\" \")) true; else false;"
57-
pass: "\"| %1$s.%2$s | DROP TABLE IF EXISTS `%1$s`.`%2$s`; | mkdir -p \\\"%5$s\\\" | MSCK REPAIR TABLE `%1$s`.`%2$s` SYNC PARTITIONS; |\""
58-
fail: "\"| %1$s.%2$s:\" + com.cloudera.utils.hive.sre.Utils.dirToPartitionSpec('%4$s') + \" | ALTER TABLE `%1$s`.`%2$s` DROP IF EXISTS PARTITION (\" + com.cloudera.utils.hive.sre.Utils.dirToPartitionSpec('%4$s') + \"); | mkdir -p \\\"%5$s\\\" | |\""
57+
pass: "\"| %1$s.%2$s | DROP TABLE IF EXISTS `%1$s`.`%2$s`; | mkdir -p \\\"%5$s\\\" | MSCK REPAIR TABLE `%1$s`.`%2$s` SYNC PARTITIONS; | |\""
58+
fail: "\"| %1$s.%2$s:\" + com.cloudera.utils.hive.sre.Utils.dirToPartitionSpec('%4$s') + \" | ALTER TABLE `%1$s`.`%2$s` DROP IF EXISTS PARTITION (\" + com.cloudera.utils.hive.sre.Utils.dirToPartitionSpec('%4$s') + \"); | mkdir -p \\\"%5$s\\\" | | %6$s |\""
5959
params: [ "hive" ]
6060

6161
- type: "dbSet"

0 commit comments

Comments
 (0)