forked from cfpb/consumerfinance.gov
-
Notifications
You must be signed in to change notification settings - Fork 0
/
refresh-data.sh
executable file
·116 lines (97 loc) · 3.29 KB
/
refresh-data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/bin/bash
# ==========================================================================
# Import data from a gzipped dump. Provide the filename as the first arg.
# NOTE: Run this script while in the project root directory.
# It will not run correctly when run from another directory.
# ==========================================================================
set -e
usage() {
cat << EOF
Please download a recent database dump before running this script:
./refresh-data.sh production_django.sql.gz
Or you can define the location of a dump and this script will
download it for you:
export CFGOV_PROD_DB_LOCATION=https://example.com/production_django.sql.gz
./refresh-data.sh
Additional options:
--noindex Do not update search indexes after refreshing
EOF
exit 1;
}
download_data() {
echo 'Downloading database dump...'
skip_download=0
# If the file already exists, check its timestamp, and skip the download
# if it matches the timestamp of the remote file.
if test -e "$refresh_dump_name"; then
timestamp_check=$(curl -s -I -R -L -z "$refresh_dump_name" "${CFGOV_PROD_DB_LOCATION:-$DB_DUMP_URL}")
if [[ "$timestamp_check" == *"304 Not Modified"* ]]; then
echo 'Skipping download as local timestamp matches remote timestamp'
skip_download=1
fi
fi
if [[ "$skip_download" == 0 ]]; then
curl -RL -o "$refresh_dump_name" "${CFGOV_PROD_DB_LOCATION:-$DB_DUMP_URL}"
fi
}
check_data() {
echo 'Validating local dump file'
gunzip -t "$refresh_dump_name"
}
refresh_data() {
echo 'Importing refresh db'
gunzip < "$refresh_dump_name" | cfgov/manage.py dbshell > /dev/null
SCHEMA="$(gunzip -c $refresh_dump_name | grep -m 1 'CREATE SCHEMA' | sed 's/CREATE SCHEMA \(.*\);$/\1/')"
PGUSER="${PGUSER:-cfpb}"
if [ "${PGUSER}" != "${SCHEMA}" ]; then
echo "Adjusting schema name to match username..."
echo "DROP SCHEMA IF EXISTS \"${PGUSER}\" CASCADE; \
ALTER SCHEMA \"${SCHEMA}\" RENAME TO \"${PGUSER}\"" | psql > /dev/null 2>&1
fi
echo 'Running any necessary migrations'
./cfgov/manage.py migrate --noinput --fake-initial
echo 'Setting up initial data'
./cfgov/manage.py runscript initial_data
}
update_index() {
echo 'Updating search indexes'
./cfgov/manage.py opensearch index --force rebuild
./cfgov/manage.py opensearch document --force --refresh --parallel index
}
get_data() {
if [[ -z "$refresh_dump_name" ]]; then
if [[ -z "$CFGOV_PROD_DB_LOCATION" ]] && [[ -z "$DB_DUMP_URL" ]]; then
usage
fi
if [[ ! -z "$CFGOV_PROD_DB_LOCATION" ]]; then
refresh_dump_name='production_django.sql.gz'
else
# Split URL, and get the file name.
refresh_dump_name="$(echo $DB_DUMP_URL | tr '/' '\n' | tail -1)"
fi
download_data
else
if [[ $refresh_dump_name != *.sql.gz ]]; then
echo "Input dump '$refresh_dump_name' expected to end with .sql.gz."
exit 2
fi
fi
}
noindex=false
for arg in "$@"; do
shift
case "$arg" in
"--noindex")
noindex=1
;;
*)
refresh_dump_name=$arg
;;
esac
done
get_data
check_data
refresh_data
if [[ $noindex -ne 1 ]]; then
update_index
fi