-
Notifications
You must be signed in to change notification settings - Fork 39
/
git-find-large-files
executable file
·89 lines (76 loc) · 2.48 KB
/
git-find-large-files
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env bash
#
# Print the largest files in a Git repository. The script must be called
# from the root of the Git repository. You can pass a threshold to print
# only files greater than a certain size (compressed size in Git database,
# default is 500kb).
#
# Files that have a large compressed size should usually be stored in
# Git LFS [2].
#
# Based on script from Antony Stubbs [1] and improved with ideas from Peff.
#
# [1] http://stubbisms.wordpress.com/2009/07/10/git-script-to-show-largest-pack-objects-and-trim-your-waist-line/
# [2] https://git-lfs.github.com/
#
# Usage:
# git-find-large-files [size threshold in KB]
#
if [ -z "$1" ]; then
MIN_SIZE_IN_KB=500
else
MIN_SIZE_IN_KB=$1
fi
# Use "look" if it is available, otherwise use "grep" (e.g. on Windows)
if look >/dev/null 2>&1; then
# On Debian the "-b" is available and required to make "look" perform
# a binary search (see https://unix.stackexchange.com/a/499312/275508 ).
if look 2>&1 | grep -q .-b; then
search="look -b"
else
search=look
fi
else
search=grep
fi
# set the internal field separator to line break,
# so that we can iterate easily over the verify-pack output
IFS=$'\n';
# list all objects including their size, sort by compressed size
OBJECTS=$(
git cat-file \
--batch-all-objects \
--batch-check='%(objectsize:disk) %(objectname)' \
| sort -nr
)
TMP_DIR=$(mktemp -d "${TMPDIR:-/tmp}/git-find-large-files.XXXXXX") || exit
trap "rm -rf '$TMP_DIR'" EXIT
git rev-list --all --objects | sort > "$TMP_DIR/objects"
git rev-list --all --objects --max-count=1 | sort > "$TMP_DIR/objects.1"
for OBJ in $OBJECTS; do
# extract the compressed size in kilobytes
COMPRESSED_SIZE=$(($(echo $OBJ | cut -f 1 -d ' ')/1024))
if [ $COMPRESSED_SIZE -le $MIN_SIZE_IN_KB ]; then
break
fi
# extract the SHA
SHA=$(echo $OBJ | cut -f 2 -d ' ')
# find the objects location in the repository tree
LOCATION=$($search $SHA "$TMP_DIR/objects" | sed "s/$SHA //")
if $search $SHA "$TMP_DIR/objects.1" >/dev/null; then
# Object is in the head revision
HEAD="Present"
elif [ -e $LOCATION ]; then
# Objects path is in the head revision
HEAD="Changed"
else
# Object nor its path is in the head revision
HEAD="Deleted"
fi
echo "$COMPRESSED_SIZE,$HEAD,$LOCATION" >> "$TMP_DIR/output"
done
if [ -f "$TMP_DIR/output" ]; then
column -t -s ',' < "$TMP_DIR/output"
fi
rm -rf "$TMP_DIR"
exit 0