#!/usr/bin/env python # -*- coding: iso-8859-1 -*- # Incremental Backup with Rsync # If you used "rsync .... localdir" before, # just call "incrisync initold" once and then # incrisync rsync .... localdir # This will create incremental backups using hardlinks # under localdir/days, localdir/months and localdir/years # # (c) March 2007 Thomas Guettler http://www.thomas-guettler.de # This script is in the public domain # Use at your own risk # Python Imports import os import re import sys import fcntl import getopt import shutil import random import tempfile import datetime import subprocess def usage(): print """Incremental backup with rsync. Usage1: %s init localdir create the daily, monthly and yearly directories under localdir. It is save to call it twice, no files get deleted. Usage2: %s initold localdir If you copied with rsync to localdir in the past, you can use this. The old backup will be under localdir/days/DD. With DD being yesterday. Usage3: %s backup OPTIONS rsync RSYNC_OPTIONS ... localdir localdir: This directory should be created with "init" OPTIONS: --test: Display rsync command and exit. --faketoday YYYY-MM-DD: For debugging. Pretend that this day is today. Usage4: %s move localdir localdirnew Since the localdir contains absolute symlinks, just moving the directory to an other places does not work. This uses the "mv" system command. At least my mv command (GNU coreutils 5.93) keeps hardlinks. Needs at least rsync 2.6.4 and Python 2.4 The script creates hardlinks by using the --link-dest option of rsync. This way you only need a fraction of the space of full backups. Nevertheless you can access the local directories by following the symlinks in the"history" directory. All backups look like full backups. Since most data does not change from one backup to the next, must files are hardlinks to older backups. You don't need to tell the script if it should keep this backup as monthly or yearly backup: The script remembers the last backup by the symlink in "last/day". If the year changed between the last time and today, the last backup will be keep for the last year. This yearly backup will never be deleted. You need to delete it manualy. If the month changed since the last backup, this backup will be kept for the monthly backup. It will be overwritten one year later. These things are not possible: - more than one backup per day - weekly backups """ % ( os.path.basename(sys.argv[0]), os.path.basename(sys.argv[0]), os.path.basename(sys.argv[0]), os.path.basename(sys.argv[0])) def main(): if len(sys.argv)==1: usage() sys.exit(3) if sys.argv[1]=="init": if not len(sys.argv)==3: print "Missing localdir" usage() sys.exit(3) return init(sys.argv[2]) if sys.argv[1]=="initold": if not len(sys.argv)==3: print "Missing localdir" usage() sys.exit(3) return initold(sys.argv[2]) if sys.argv[1]=="backup": if not len(sys.argv)>=4: print "Missing rsync args" usage() sys.exit(3) return backup(sys.argv[2:]) if sys.argv[1]=="move": if not len(sys.argv)==4: usage() sys.exit(3) return move(sys.argv[2:]) usage() sys.exit(3) class Tee: def __init__(self, *args): self.fds = args def write(self, data): for fd in self.fds: fd.write(data) def writelines(self, seq): for i in seq: self.write(i) def flush(self): for fd in self.fds: fd.flush() def mydirs(localdir): """ Return a list of all directories which are needed for incrisync """ ret=[] ret.append(localdir) for name, subdirs in [("days", range(1, 32)), ("months", range(1, 13))]: namedir=os.path.join(localdir, name) ret.append(namedir) for subdir in subdirs: subdir=os.path.join(namedir, "%02d" % subdir) ret.append(subdir) for extradir in ["years", "log", "last", "history", "tmp", "run"]: extradir=os.path.join(localdir, extradir) ret.append(extradir) return ret def init(localdir): dirs=mydirs(localdir) for dir in dirs: if not os.path.isdir(dir): try: os.mkdir(dir) except OSError, e: print e sys.exit(3) print "Created", dir else: print "Exists", dir assert testlocaldir(localdir) def initold(localdir): localdir=os.path.abspath(localdir) assert os.path.isdir(localdir), localdir tempdir=tempfile.mktemp("", "", localdir) os.mkdir(tempdir) files=os.listdir(localdir) files.sort() for filename in files: file=os.path.join(localdir, filename) if file==tempdir: continue os.rename(file, os.path.join(tempdir, filename)) init(localdir) today=datetime.date.today() yesterday=today-datetime.timedelta(days=1) todir=os.path.join(localdir, "days", "%02d" % yesterday.day) os.rmdir(todir) print "Old files were moved to", todir os.rename(tempdir, todir) histlink=os.path.join( localdir, "history", yesterday.strftime("%Y-%m-%d_%H-%M-%S")) print "Symlink of 'last backup'", histlink os.symlink(todir, histlink) lastday=os.path.join(localdir, "last", "day") os.symlink(histlink, lastday) assert testlocaldir(localdir) def getlinkdests(localdir, todir): return getlastlink(localdir, returnlinkdests=True, todir=todir) def getlastlink(localdir, returnlinkdests=False, todir=None): """ Returns the path to the latest entry in localdir/history/ This symlink should point to the same dir as localdir/last/day """ historydir=os.path.join(localdir, "history") histlinks=os.listdir(historydir) histlinks.sort() histlinks.reverse() # Latest first linkdests=[] lastlink=None for histlink in histlinks: histlink=os.path.join(historydir, histlink) if not os.path.islink(histlink): continue if not lastlink: # YYYY-MM-DD_HH-MM-SS lastlink=histlink linkdest=os.readlink(histlink) assert os.path.isabs(linkdest) if linkdest in linkdests: print "%s: Second link to a directory. " \ "This should not happen." % linkdest continue if linkdest!=todir: linkdests.append(linkdest) if len(linkdests)==10: break if returnlinkdests: return linkdests return lastlink def testlocaldir(localdir): """ Test if the localdir is in a valid state. No broken links, no empty linkdestinations of localdir/last und localdir/history Returns True if OK, False if failed. """ localdir=os.path.abspath(localdir) ret=True for dirname, regex in [ ("last", re.compile(r'^(day|month|year)$')), ("history", re.compile(r'^\d\d\d\d-\d\d-\d\d_\d\d-\d\d-\d\d$'))]: dir=os.path.join(localdir, dirname) files=os.listdir(dir) files.sort() symlinks=[] for file in files: match=regex.match(file) if not match: print "Warning: File does not match regex:", file continue file=os.path.join(dir, file) if not os.path.islink(file): print "Warning: Unknown file. No Symlink", file continue syncdir=os.readlink(file) if not syncdir.startswith(localdir): print "%s (<- %s) must be under %s" % ( syncdir, file, localdir) return False if not os.path.isdir(syncdir): print "%s does not exist. Link from: %s" % (syncdir, file) ret=False else: dirfiles=os.listdir(syncdir) if not dirfiles: print "%s is empty. Please remove symlink %s" % ( syncdir, file) ret=False else: symlinks.append(syncdir) if dirname=="history" and symlinks: lastlink=getlastlink(localdir) lastdir=os.readlink(lastlink) lastday=os.path.join(localdir, "last", "day") if not os.path.islink(lastday): print "history dir not empty and %s does not exist. " \ "Should be a symlink to %s" % ( lastday, lastlink) ret=False else: lastlinkis=os.readlink(lastday) # is vs. should if lastlinkis!=lastlink: print "%s should link to %s but links to %s" % ( lastday, lastlink, lastlinkis) ret=False dirs=mydirs(localdir) for dir in dirs: if not os.path.isdir(dir): print "%s is not a directory. Try %s init %s" % ( dir, os.path.basename(sys.argv[0]), localdir) ret=False return ret def symlink2datetime(symlink): match=re.match( r'^(\d\d\d\d)-(\d\d)-(\d\d)_(\d\d)-(\d\d)-(\d\d)$', os.path.basename(symlink)) assert match, symlink ints=[int(i) for i in match.groups()] return datetime.datetime(*ints) def movedir(localdir, lastlink, fromdir, todir, testonly): assert not os.path.islink(todir), todir assert not os.path.islink(fromdir), fromdir assert not fromdir==todir, "%s %s" % (fromdir, todir) print "%s ==> %s"% ( lastlink, todir) if not testonly: if os.path.exists(todir): assert os.path.isdir(todir), todir unlinkdir=os.path.join(localdir, "tmp", "%s-%s" % ( os.path.basename(lastlink), random.randint(10000, 99999))) os.rename(todir, unlinkdir) try: print "Deleting", todir, unlinkdir shutil.rmtree(unlinkdir) except OSError, e: print "Deleteing old %s failed: %s" % ( todir, str(e)) print "%s => %s" % (fromdir, todir) os.rename(fromdir, todir), os.mkdir(fromdir) os.unlink(lastlink) os.symlink(todir, lastlink) lastday=os.path.join(localdir, "last", "day") if os.path.exists(lastday): os.unlink(lastday) os.symlink(lastlink, lastday) assert testlocaldir(localdir), localdir else: print " test only (no move has happend)" def backup(args): try: opts, args = getopt.getopt(args, "", ["test", "faketoday="]) except getopt.GetoptError, e: print e sys.exit(3) now=datetime.datetime.now() testonly=False for o, a in opts: if o=="--test": testonly=True elif o=="--faketoday": match=re.match(r'^(\d\d\d\d)-(\d\d)-(\d\d)$', a) if not match: print "Wrong date. Need YYYY-MM-DD" sys.exit(3) now=now.replace(int(match.group(1)), int(match.group(2)), int(match.group(3))) else: assert False, "Unhandled command %s %s" % (o, a) localdir=os.path.abspath(args[-1]) daystr=now.strftime("%Y-%m-%d_%H-%M-%S") logdir=os.path.join(localdir, "log") if not os.path.isdir(logdir): print "%s ist kein Verzeichnis" % logdir sys.exit(3) logfile= "%s.log" % os.path.join(logdir, daystr) fdlog=open(logfile, "wt") sys.stdout=Tee(sys.stdout, fdlog) sys.stderr=Tee(sys.stderr, fdlog) rundir=os.path.join(localdir, "run") if not os.path.isdir(rundir): print "%s does not exist." % rundir usage() sys.exit(3) lockfile=os.path.join(rundir, "pid.lock") fdlock=open(lockfile, "a+") try: fcntl.flock(fdlock, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError, e: print e print "Locking %s failed. An other proccess is running." % ( lockfile) try: line=open(lockfile).readline() match=re.match(r'^(\S+)\s+(\d+).*$', line) if match: print "Host %s PID %s" % ( match.group(1), match.group(2)) finally: sys.exit(3) fdlock.seek(0) fdlock.write("%s %s\n" % (os.uname()[1], os.getpid())) fdlock.flush() if not testlocaldir(localdir): print "localdir %s is not in a valid state. Exit" % localdir sys.exit(3) todir=os.path.join(localdir, "days", "%02d" % now.day) historydir=os.path.join(localdir, "history") newlink=os.path.join(historydir, daystr) lastday=os.path.join(localdir, "last", "day") lastlink=getlastlink(localdir) if lastlink: lasttime=symlink2datetime(lastlink) lastdir=os.readlink(lastlink) if now %s" % (lastlink, newlink) yeardir=os.path.join(localdir, "years", str(lasttime.year)) #assert not os.path.exists(yeardir), yeardir if lastdir==yeardir: print "Move to %s was already done. Last Backup failed?" % ( yeardir) else: movedir(localdir, lastlink, lastdir, yeardir, testonly) elif now.month!=lasttime.month: print "Month changed: %s -> %s" % (lastlink, newlink) # Month changed. Move last day to localdir/months/ monthdir=os.path.join(localdir, "months", "%02d" % lasttime.month) assert os.path.exists(monthdir), monthdir if lastdir==monthdir: print "Move to %s was already done. Last Backup failed?" % ( monthdir) else: movedir(localdir, lastlink, lastdir, monthdir, testonly) testlocaldir(localdir) # Build arguments for calling rsync cmdargs=[args[0]] for linkdest in getlinkdests(localdir, todir): cmdargs.append("--link-dest") cmdargs.append(linkdest) cmdargs.extend(args[1:-1]) cmdargs.append(todir) assert testlocaldir(localdir) print "Calling %s" % (' '.join(cmdargs)) if testonly: print "--test given. Exit." assert testlocaldir(localdir) sys.exit(3) sys.stdout.flush() try: pipe=subprocess.Popen(cmdargs, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) except OSError, e: print e assert testlocaldir(localdir) sys.exit(3) for line in pipe.stdout: print line, ret=pipe.wait() # 0: OK # 24: Partial transfer due to vanished source files if ret not in [0, 24]: print "######" print "######%s" % ' '.join(cmdargs) print "######Failed!" assert testlocaldir(localdir) sys.exit(ret) if not os.listdir(todir): print "######" print "######%s" % ' '.join(cmdargs) print "######Failed: %s is empty" % todir assert testlocaldir(localdir) sys.exit(3) for linkfile in os.listdir(historydir): linkfile=os.path.join(historydir, linkfile) if not os.path.islink(linkfile): print "%s is not a symlink?" % linkfile continue linkdest=os.readlink(linkfile) if linkdest==todir: os.unlink(linkfile) print "Removing old link", linkfile continue print "%s -> %s" % (newlink, todir) os.symlink(todir, newlink) if os.path.islink(lastday): os.unlink(lastday) os.symlink(newlink, lastday) assert testlocaldir(localdir) def move(args): """ Move a localdir directory and fix the symlinks. """ assert len(args)==2 localdirold=os.path.abspath(args[0]) localnew=os.path.abspath(args[1]) if os.path.exists(localnew): print localnew, "must not exist" sys.exit(2) assert testlocaldir(localdirold) ret=subprocess.call(["mv", localdirold, localnew]) if ret: print "mv failed" sys.exit(3) print "mv %s --> %s OK" % ( localdirold, localnew) localdir=localnew histdir=os.path.join(localdir, "history") lastdir=os.path.join(localdir, "last") links=[] for file in os.listdir(histdir): file=os.path.join(histdir, file) if not os.path.islink(file): continue links.append(file) for file in os.listdir(lastdir): file=os.path.join(lastdir, file) if not os.path.islink(file): continue links.append(file) for file in links: olddest=os.readlink(file) assert olddest.startswith(localdirold), localdirold newdest="%s%s" % (localdir, olddest[len(localdirold):]) assert os.path.exists(newdest) os.unlink(file) os.symlink(newdest, file) print "%s --> %s" % (file, newdest) assert testlocaldir(localdir) if __name__=="__main__": main()