This repository has been archived by the owner on Mar 16, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathsystem-backup.py
executable file
·360 lines (308 loc) · 13.2 KB
/
system-backup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
#!/usr/bin/python
# -*- coding: UTF-8 -*-
##############################################################################
#
# Copyright (C) 2007-2009 Kevin Deldycke <[email protected]>
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
"""
Description:
This script automate system backups thanks to rdiff-backup and rsync. It
is based on an idea from the "Backup up on unreliable link" article
( http://wiki.rdiff-backup.org/wiki/index.php/BackupUpOnUnreliableLink )
published on the official rdiff-backup wiki ( http://wiki.rdiff-backup.org ).
Requirements:
* linux
* python >= 2.4
* rdiff-backup >= 1.1.0
* rsync >= 2.6.7
* ssh
* ps
Features:
* Use rsync to make a local mirror of the remote machine (to speed-up backups and make them working over unreliable connection)
* Auto-clean rdiff-backup repository
* Lock mechanism to not run this script twice
* Auto-kill the script if the backup process take to much time
* New increment will not be created if rsync didn't reached the remote host
Advices:
* Run this script regularly via to a cron entry, e.g.: `0 13 * * * root /root/system-backup.py >> /mnt/backup-disk/backup.log`
* Modify BACKUP_TIMEOUT constant according your cron job frequency
* Use rsa-key/certificate-based authentication to access remote machine via SSH
TODO:
* nice -n 19 (do not take all ressources) ?
* use logger module instead of printing in the wild...
* How can I do to not run command in shell ? is it make sense ?
* auto kill after XX hours of running (use a timer).
"""
################### Start of user config ###################
# Main backup folder where all backups defined in the 'backup_list' are stored
BACKUP_DIR = '/mnt/backup-disk'
# Number of increments to keep
INCREMENTS_TO_KEEP = 15
# 47 hours = 2 days minus one hour
BACKUP_TIMEOUT = 47 * 60 * 60
backup_list = [
{ 'local_dir' : 'laptop-home'
, 'remote_dir': '-z -e ssh [email protected]:/home/'
},
{ 'local_dir' : 'laptop-main'
, 'remote_dir': '-z -e ssh [email protected]:/'
},
{ 'local_dir' : 'server-backup'
, 'remote_dir': '/'
},
]
#################### End of user config ####################
################ Do not modify code below ! ################
import atexit
import pickle
import signal
from os import makedirs, remove, getpid, kill, waitpid
from os.path import abspath, exists
from sys import exit
from time import time as now
from commands import getstatusoutput
from subprocess import STDOUT, PIPE, Popen
# Global constant
LOCK_FILE = abspath("%s/system-backup.lock" % BACKUP_DIR)
def run(cmd, quiet=False, update_lock_file=True):
"""
Generic method to run a linux command and register all its child processes in the lock file.
Lock file updates can be bypassed using the update_lock_file flag.
"""
if not quiet:
print "DEBUG - Run `%s`..." % cmd
# Run command as a child in a shell environment, else rdiff-backup and rsync will fail
child = Popen( cmd
, shell = True
, env = {"LANGUAGE": "en"}
, stdin = PIPE
, stdout = PIPE
, stderr = STDOUT
, close_fds = True
)
(child_input, child_output) = (child.stdin, child.stdout)
# Register child in the lock file
if update_lock_file:
updateLockFile()
# Wait for the child end
exit_code = waitpid(child.pid, 0)[1]
if not quiet and exit_code != 0:
print "ERROR - Command `%s` failed !" % cmd
# Unregister child from the lock file
if update_lock_file:
updateLockFile()
# Return something similar to commands.getstatusoutput() method
return (exit_code, child_output.read())
def updateLockFile():
"""
Generic method to update and save lock file content with start time, process
list and related info.
By default the current list of child processes is updated and the start time
info from the current lock file is not modifed. But if this method is called
and no lock file is found, this method automaticaly swith from an "update"
mode to a "create" mode and use current time to init the start time value.
"""
# Detect in which mode this method must operate (create or update)
create_mode = True
if exists(LOCK_FILE):
create_mode = False
# Initialize the dict which will be pickled and which contain all infos
lock_data = { 'start_time' : 0
, 'process_list': {}
}
# Init start time value to now in "create" mode
if create_mode:
lock_data['start_time'] = now()
# XXX isn't it better to use "ps" and output format option "etime" to know for how long the script is running ?
# As said in the manual: etime = elapsed time since the process was started, in the form [[dd-]hh:]mm:ss."
# Keep the previous start time value in "update" mode
else:
lock_data['start_time'] = getDataFromLockFile()['start_time']
# Get the detailed list of running child
lock_data['process_list'] = getProcessList()
# Update the lock file content
try:
f = open(LOCK_FILE, 'w')
pickle.dump(lock_data, f)
f.close()
except IOError:
print "FATAL - Can't create lock file !"
exit(1)
# Print end message
if create_mode:
print "INFO - Lock file created as '%s'." % LOCK_FILE
else:
print "INFO - Lock file updated."
def getDataFromLockFile():
"""
Generic method to get start time and pid list from lock file.
"""
try:
f = open(LOCK_FILE, 'r')
lock_data = pickle.load(f)
f.close()
except IOError:
print "FATAL - Can't open lock file !"
exit(1)
except IndexError:
print "FATAL - Can't read lock file content !"
exit(1)
return lock_data
def removeLockFile():
"""
Generic method to remove the lock file.
"""
try:
remove(LOCK_FILE)
except IOError:
print "FATAL - Can't remove lock file !"
exit(1)
print "INFO - Lock file removed."
def getProcessChildren(parent_pid):
"""
This function use the "ps" linux command to get children info
(pid and command) of a given process.
"""
children = {}
get_children_cmd = "ps --no-headers --ppid %d -o pid,command" % parent_pid # Get pid and command info only
(exit_code, cmd_output) = run(get_children_cmd, quiet=True, update_lock_file=False) # No lock file update else we will face infinite recursion...
if exit_code not in (0, 256): # 0 mean that "ps" return something, 256 mean "ps" didn't find any child
# Command fail for unknown reason
print "FATAL - Command `%s` failed with %d exit code !" % (get_children_cmd, exit_code)
exit(1)
# Parse "ps" output to extract child pids and commands
for child_info in cmd_output.split('\n'):
child_info_list = child_info.strip().split(' ', 1)
if len(child_info_list) == 2:
child_pid = int(child_info_list[0])
child_cmd = child_info_list[1]
children[child_pid] = child_cmd
return children
def getRecursiveProcessChildren(parent_pid):
"""
This method get all children, grand-children and all other sub-children
of a given process and return a flat list of all the "sub-family".
"""
children = getProcessChildren(parent_pid)
for (child_pid, child_cmd) in children.items():
children.update(getRecursiveProcessChildren(child_pid))
return children
def getProcessList():
"""
This method is the same as getRecursiveProcessChildren() but use current
script as parent process and add it to the list.
"""
# Get current script pid and its child processes
script_pid = getpid()
children = getRecursiveProcessChildren(parent_pid=script_pid)
# Get the command used to launch current script
get_script_cmd = "ps --no-headers --pid %d -o command" % script_pid
(exit_code, cmd_output) = run(get_script_cmd, quiet=True, update_lock_file=False) # No lock file update else we will face infinite recursion...
# Add current script process details to its child list
script_cmd = cmd_output.split('\n')[0].strip().split(' ', 1)[1]
children.update({script_pid: script_cmd})
return children
def main():
"""
Core of the backup script...
"""
# Check existence of main backup folder
if not exists(abspath(BACKUP_DIR)):
print "FATAL - Main backup folder '%s' is not reachable ! Create it by hand or check access rights." % BACKUP_DIR
exit(1)
# Check existence of previous lock file
if exists(LOCK_FILE):
print "WARNING - Lock file found !"
lock_data = getDataFromLockFile()
start = lock_data['start_time']
pid_list = lock_data['process_list']
# Kill previous instance of the backup process if it's older than the backup timeout
if (now() - float(start)) > BACKUP_TIMEOUT:
print "INFO - Timeout reached: previous instance of this script was started %s hours ago." % (float(BACKUP_TIMEOUT) / 3600)
# Kill the parent process and all its remaining children
old_instance_running = False
for (pid, pcmd) in pid_list.items():
try:
# TODO: check here that the process is our (use "pcmd" to compare with current process')
kill(int(pid), signal.SIGKILL)
old_instance_running = True
except:
# If kill() failed it mean that the process is already dead
pass
if old_instance_running:
print "INFO - Previous running instance found and killed !"
else:
print "INFO - No running instance found."
# We can remove previous lock file safely
removeLockFile()
# No timeout reached: do not disturb running instance
else:
print "FATAL - Another instance seems to be running. Please investigate before considering the removal of '%s'." % LOCK_FILE
exit(1)
# Init and create the lock file
updateLockFile()
# Register signal handler to automatically remove the lock file if the script is stopped
def atSIGTERM(signum, frame):
removeLockFile()
exit(0)
atexit.register(lambda: exit(0))
signal.signal(signal.SIGTERM, atSIGTERM)
# Backup strategy: mirror all machines first then make incremental storage.
# This strategy lower the "exposure" of external machines to this script, by running rsync operations first then rdiff-backup ones.
command_list = []
# Proceed each backup set
for backup_item in backup_list:
# Create local folder tree
local_dir = abspath("%s/%s" % (BACKUP_DIR, backup_item['local_dir']))
mirror_dir = abspath("%s/mirror" % local_dir)
increment_dir = abspath("%s/incremental" % local_dir)
for folder in [local_dir, mirror_dir, increment_dir]:
if not exists(folder):
makedirs(folder)
# TODO: detect here if the parameter is a local or a remote directory. Then, if the things to backup is a directory and the path doesn't end with a "/" add one.
# Mirror the remote directory
mirror_cmd = """rsync -axHhhv --numeric-ids --partial --stats --delete --delete-before %s %s """ % (backup_item['remote_dir'], mirror_dir)
command_list.append(mirror_cmd)
# Check rdiff-backup consistency: if the previous rdiff-backup transaction has failed (power failure, or reboot), rdiff-backup folder must be cleaned up else new increments can't be added
check_consistency_cmd = """rdiff-backup -l "%s" """ % increment_dir
(exit_code, cmd_output) = run(check_consistency_cmd)
# Auto clean the repository if necessary
# Case 1: remove inconsistent last increment
if cmd_output.find("--check-destination-dir") != -1:
roll_back_cmd = """rdiff-backup --check-destination-dir --force -v5 "%s" """ % increment_dir
command_list.append(roll_back_cmd)
# Case 2: repository is in a very bad shape, the only solution is to delete the rdiff-backup-data directory
elif cmd_output.find("Fatal Error: Bad rdiff-backup-data dir on destination side") != -1:
reset_repository_cmd = """rm -rf "%s" """ % abspath("%s/rdiff-backup-data" % increment_dir)
command_list.append(reset_repository_cmd)
# Purge old increments first to free space
purge_cmd = """rdiff-backup --force --remove-older-than %dB "%s" """ % (INCREMENTS_TO_KEEP, increment_dir)
command_list.append(purge_cmd)
# Add an increment
add_increment_cmd = """rdiff-backup --exclude-device-files --force -v5 --preserve-numerical-ids --restrict-read-only --print-statistics --exclude-sockets "%s" "%s" """ % (mirror_dir, increment_dir)
command_list.append(add_increment_cmd)
# Run all system commands
for cmd in command_list:
run(cmd)
# Backup successfull ! Remove lock file.
removeLockFile()
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
exit(0)