====== A Backup Script for Systems with Rsync ====== Below is the code for a sample backup script that uses ''rsync'' and the ''%%--%%link-dest'' option to create snapshots. The disclaimer is repeated here in case you missed it: ** USE AT YOUR OWN RISK. THIS SOFTWARE IS NOT GUARANTEED TO WORK AS DOCUMENTED. ** Usage is: run_backup SOURCE_PATHS... DEST_PATH SOURCE_PATHS and DEST_PATH can be any paths supported by ''rsync''. DEST_PATH must not exist the first time you run this script. It will be used as the base path for snapshots, with a timestamp appended. DEST_PATH will be created as a symbolic link to the latest backup. If you run it multiple times, the most recent previous backup will be used in the the ''%%--%%link-dest'' option to ''rsync''. If your backup media is expected to always be connected and available (or at least at the scheduled backup times), this can be put in a cron job to run automatically. For example the following line in your crontab: 15 2 * * * /path/to/run_backup /source/path1 /source/path2 /dest/path will run the backup script to create backups of ''/source/path1'' and ''/source/path2'' to ''/dest/path'' at 2:15 AM every morning. Run ''man crontab'' and ''man 5 crontab'' for further details on editing crontab files and how to run these commands more or less frequently. (Note that the default editor for ''crontab'' is ''vi'' -- if you get stuck, type '':q!'' to exit ''vi'' and to discard all changes. Or run ''env EDITOR=nano crontab -e'' to edit your crontab with the ''nano'' editor.) ===== Perl script: run_backup ===== #!/usr/bin/env perl # run_backup # # Author: Syam Gadde (gadde@biac.duke.edu) # # Back up files from source paths to destination directory. Destination # (which should not exist the first time this is called) will be created as a # symbolic link to the actual backup, which will have a date/time stamp in # the directory name. Subsequent backups to the same destination will only # transfer changed files, and will use hard links to unchanged files in the # previous backup to save space. Source paths can be files or directories. # # If you want to specify an alternative path for the rsync binary, put it in # the RSYNC environment variable. # # DISCLAIMER: USE AT YOUR OWN RISK. THIS SOFTWARE IS NOT GUARANTEED TO WORK # AS DOCUMENTED. # # $Id: run_backup,v 1.3 2009/09/14 18:57:06 gadde Exp $ use strict; use File::Path; use File::Spec; use Time::Local; use IO::Handle; sub log_message { my $ppid = getppid(); print STDERR localtime() . " (${ppid}:$$): run_backup: " . join('', @_); } # Calculate and report how long it took to run this backup (on success # or failure). sub report_duration { my ($starttime,) = @_; my $dur = time() - $starttime; my $dursecs = $dur % 60; $dur -= $dursecs; $dur /= 60; my $durmins = $dur % 60; $dur -= $durmins; $dur /= 60; my $durhours = $dur % 24; $dur -= $durhours; $dur /= 24; my $durdays = $dur; log_message("Backup duration: ", ($durdays ? " ${durdays}d" : ()), (($durdays || $durhours) ? " ${durhours}h" : ()), (($durdays || $durhours || $durmins) ? " ${durmins}m" : ()), " ${dursecs}s\n"); } # make sure standard error doesn't get buffered STDERR->autoflush(1); my $minwait = -1; my @rsync_opts = (); my @saveARGV = @ARGV; @ARGV = (); for my $arg (@saveARGV) { if ($arg =~ /^--minwait=(.*)$/) { $minwait = $1; } elsif ($arg =~ /^--rsyncopt=(.*)$/) { push @rsync_opts, $1; } else { push @ARGV, $arg; } } if (scalar(@ARGV) < 2) { print STDERR <splitpath($nbpath); if (-e $destpath) { if (! -l $destpath) { log_message("$destpath exists and is not a symbolic link! Exiting...\n"); goto FAIL; } # Destination path exists and it is a symbolic link. # Resolve the symbolic link and find out which actual path it points to. # This will have been the last (successful) backup. $lbpath = readlink($destpath); if (!defined($lbpath)) { log_message("Error reading symbolic link at $destpath\n"); goto FAIL; } # Extract the date/time stamp in the last backup. my ($lbvol, $lbdirs, $lbfile) = File::Spec->splitpath($lbpath); if ($lbfile !~ /(\d\d\d\d)-?(\d\d)-?(\d\d)T(\d\d):?(\d\d):?(\d\d)/) { log_message("Error parsing date/time in $lbfile\n"); goto FAIL; } my ($lbyear, $lbmon, $lbday, $lbhour, $lbmin, $lbsec) = ($1, $2, $3, $4, $5, $6); # Check to make sure it has been at least "minwait" hours since the # last backup. my $lbtime = timelocal($lbsec, $lbmin, $lbhour, $lbday, $lbmon - 1, $lbyear - 1900); if ((($starttime - $lbtime) / (60 * 60)) < $minwait) { log_message("Skipping backup -- has been less than $minwait hours since last backup to $destpath\n"); goto EXIT; } # We are going to send the last backup path (lbpath) to rsync via the # --link-dest option. If it is a relative path, then it is interpreted # as relative to the new backup path. Make sure we tell rsync the right # thing. if ($nbvol eq $lbvol && $lbdirs eq '') { # Symbolic link is a relative path (this should be the normal case). if ($nbdirs eq '') { # New backup path is also a relative path, so make last backup path # (lbpath) relative to new backup path, as rsync expects. $lbpath = File::Spec->catpath($nbvol, '..', $lbfile); } else { # New backup path is an absolute path, so just use all but the # last component to convert last backup path to an absolute path. $lbpath = File::Spec->catpath($nbvol, $nbdirs, $lbfile); } } } else { # Destination path does not exist. Make all directories above destination # path (we will create symbolic link later) mkpath $destpath; rmdir $destpath; } # Check to see if there are any existing incomplete or in progress backups. my ($lastincomplete,) = sort { $b cmp $a } glob "${destpath}.*_INCOMPLETE"; if (defined($lastincomplete)) { # Move incomplete backup to current backup path and use that as basis log_message("Recovering from $lastincomplete\n"); rename $lastincomplete, $tmpbpath; } else { my ($lastinprogress,) = sort { $b cmp $a } glob "${destpath}.*_IN_PROGRESS"; if (defined($lastinprogress)) { # Move in progress backup to current backup path and use that as basis log_message("Recovering from $lastinprogress\n"); rename $lastinprogress, $tmpbpath; } } ############# # Do back up. ############# # Check if RSYNC environment variable is set; if so, use that as the rsync # executable. my $rsyncbin = exists($ENV{'RSYNC'}) ? $ENV{'RSYNC'} : 'rsync'; # Check if version is >= 3.1.0. If so, add the --info=progress2 option to # get periodic progress messages. my $rsync_version = `$rsyncbin --version`; my ($vermajor, $verminor, $verrel) = ($rsync_version =~ /version\s+(\d+)\.(\d+)\.(\d+)/); my $doprogress = 0; if (defined($verrel)) { if ($vermajor > 3 || ($vermajor == 3 && ($verminor >= 1))) { $doprogress = 1; } } if ($doprogress) { push @rsync_opts, "--info=progress2"; } # Add --link-dest option. if (defined($lbpath)) { push @rsync_opts, "--link-dest=${lbpath}"; } # Add signal handler in case we are interrupted by Ctrl-C $SIG{INT} = sub { my $signame = shift; log_message("Interrupted during rsync from " . join(", ", @sourcepaths) . " to $tmpbpath\n"); log_message("Moving $tmpbpath to $failbpath\n"); rename $tmpbpath, $failbpath; report_duration($starttime); die "Exiting due to SIG$signame\n"; }; # Run rsync! my @cmd = ($rsyncbin, '-ax', '--partial', @rsync_opts, @sourcepaths, $tmpbpath); log_message("Running command: " . join(' ', @cmd), "\n"); my $rsyncpid = open(RSYNC, '-|', @cmd); if (!defined($rsyncpid)) { log_message("Error running rsync from " . join(", ", @sourcepaths) . " to $tmpbpath: $!\n"); rename $tmpbpath, $failbpath; goto FAIL; } # If there are progress messages coming from rsync, grab them here and output # them (but filter by the first number in the percent progress field so that # we don't get too many messages). local $/ = "\r"; my $curpercent = '-'; while () { s/\r$//; s/\n$//; s/^\s+//; my ($bytes, $percent, undef, undef) = split(/\s+/, $_); if (substr($percent, 0, 1) != substr($curpercent, 0, 1)) { # The first digit in the percent progress number has changed, so output # the message. log_message("rsync (${rsyncpid}): $_\n"); } $curpercent = $percent; } close RSYNC; if ($? != 0) { log_message("Error running rsync from " . join(", ", @sourcepaths) . " to $tmpbpath: $!\n"); rename $tmpbpath, $failbpath; goto FAIL; } # We have been writing to XXXX_IN_PROGRESS. Now that we're done, # remove the "_IN_PROGRESS". rename($tmpbpath, $nbpath) || do { log_message("Error renaming $tmpbpath to $nbpath: $!\n"); goto FAIL; }; # Replace canonical symbolic link (destpath) to point to the current backup if (defined($lbpath)) { unlink $destpath; } symlink $nbfile, $destpath; goto EXIT; FAIL: $retval = -1; EXIT: report_duration($starttime); exit $retval;