User Tools

Site Tools


biac:backup:scripts:run_backup

A Backup Script for Systems with Rsync

Below is the code for a sample backup script that uses rsync and the --link-dest option to create snapshots. The disclaimer is repeated here in case you missed it:

USE AT YOUR OWN RISK. THIS SOFTWARE IS NOT GUARANTEED TO WORK AS DOCUMENTED.

Usage is:

run_backup SOURCE_PATHS... DEST_PATH

SOURCE_PATHS and DEST_PATH can be any paths supported by rsync. DEST_PATH must not exist the first time you run this script. It will be used as the base path for snapshots, with a timestamp appended. DEST_PATH will be created as a symbolic link to the latest backup. If you run it multiple times, the most recent previous backup will be used in the the --link-dest option to rsync. If your backup media is expected to always be connected and available (or at least at the scheduled backup times), this can be put in a cron job to run automatically. For example the following line in your crontab:

15 2 * * * /path/to/run_backup /source/path1 /source/path2 /dest/path

will run the backup script to create backups of /source/path1 and /source/path2 to /dest/path at 2:15 AM every morning. Run man crontab and man 5 crontab for further details on editing crontab files and how to run these commands more or less frequently. (Note that the default editor for crontab is vi – if you get stuck, type :q! to exit vi and to discard all changes. Or run env EDITOR=nano crontab -e to edit your crontab with the nano editor.)

Perl script: run_backup

#!/usr/bin/env perl
 
# run_backup
#
# Author: Syam Gadde (gadde@biac.duke.edu)
#
# Back up files from source paths to destination directory.  Destination
# (which should not exist the first time this is called) will be created as a
# symbolic link to the actual backup, which will have a date/time stamp in
# the directory name.  Subsequent backups to the same destination will only
# transfer changed files, and will use hard links to unchanged files in the
# previous backup to save space.  Source paths can be files or directories.
#
# If you want to specify an alternative path for the rsync binary, put it in
# the RSYNC environment variable.
#
# DISCLAIMER: USE AT YOUR OWN RISK.  THIS SOFTWARE IS NOT GUARANTEED TO WORK
# AS DOCUMENTED.
#
# $Id: run_backup,v 1.3 2009/09/14 18:57:06 gadde Exp $
 
use strict;
 
use File::Path;
use File::Spec;
use Time::Local;
use IO::Handle;
 
sub log_message {
  my $ppid = getppid();
  print STDERR localtime() . " (${ppid}:$$): run_backup: " . join('', @_);
}
 
# Calculate and report how long it took to run this backup (on success
# or failure).
sub report_duration {
  my ($starttime,) = @_;
  my $dur = time() - $starttime;
  my $dursecs = $dur % 60;
  $dur -= $dursecs;
  $dur /= 60;
  my $durmins = $dur % 60;
  $dur -= $durmins;
  $dur /= 60;
  my $durhours = $dur % 24;
  $dur -= $durhours;
  $dur /= 24;
  my $durdays = $dur;
  log_message("Backup duration: ",
	      ($durdays ? " ${durdays}d" : ()),
	      (($durdays || $durhours) ? " ${durhours}h" : ()),
	      (($durdays || $durhours || $durmins) ? " ${durmins}m" : ()),
	      " ${dursecs}s\n");
}
 
# make sure standard error doesn't get buffered
STDERR->autoflush(1);
 
my $minwait = -1;
my @rsync_opts = ();
 
my @saveARGV = @ARGV;
@ARGV = ();
for my $arg (@saveARGV) {
  if ($arg =~ /^--minwait=(.*)$/) {
    $minwait = $1;
  } elsif ($arg =~ /^--rsyncopt=(.*)$/) {
    push @rsync_opts, $1;
  } else {
    push @ARGV, $arg;
  }
}
 
if (scalar(@ARGV) < 2) {
  print STDERR <<EOM;
Not enough arguments!
 
Usage: run_backup [--minwait=HOURS] [--rsyncopt=OPT] SOURCE_PATHS... DEST_PATH
 
DEST_PATH should not exist when calling this script the first time.
SOURCE_PATHS is a list of one or more files or directories, using rsync
conventions for source paths.
If --minwait is specified, if the last backup was less than HOURS hours ago,
this script will return successfully, but do nothing.
If you need to specify extra options to rsync, you can specify them with the
--rsyncopt option, which may be specified multiple times.  Note: if you
use options that require multiple command-line arguments, each must be
specified as a separate option.  For example the rsync option:
  -e 'ssh -l user'
must be specified as
  --rsyncopt=-e --rsyncopt='ssh -l user'
or you should use the single-argument forms:
  --rsh='ssh -l user'
which translates to:
  --rsyncopt=--rsh='ssh -l user'
EOM
  exit -1;
}
 
my $retval = 0;
 
my $destpath = pop;
my @sourcepaths = @ARGV;
my $lbpath = undef;
 
# get current time and create a date/time stamp for use in directory names
my $starttime = time();
my ($cursec, $curmin, $curhour, $curday, $curmon, $curyear, undef, undef, undef) = localtime($starttime);
$curmon ++;
$curyear += 1900;
$curyear = sprintf("%04d", $curyear);
$curmon = sprintf("%02d", $curmon);
$curday = sprintf("%02d", $curday);
$curhour = sprintf("%02d", $curhour);
$curmin = sprintf("%02d", $curmin);
$cursec = sprintf("%02d", $cursec);
my $datetimestr = "${curyear}${curmon}${curday}T${curhour}${curmin}${cursec}";
my $nbpath = "${destpath}.${datetimestr}"; # new (final) backup path
my $tmpbpath =  "${nbpath}_IN_PROGRESS"; # temporary backup (in progress) path
my $failbpath = "${nbpath}_INCOMPLETE"; # when backup fails, rename to this
 
# Separate out destination path components so we can use "relative" symbolic
# links
my ($nbvol, $nbdirs, $nbfile) = File::Spec->splitpath($nbpath);
 
if (-e $destpath) {
    if (! -l $destpath) {
      log_message("$destpath exists and is not a symbolic link!  Exiting...\n");
      goto FAIL;
    }
    # Destination path exists and it is a symbolic link.
    # Resolve the symbolic link and find out which actual path it points to.
    # This will have been the last (successful) backup.
    $lbpath = readlink($destpath);
    if (!defined($lbpath)) {
      log_message("Error reading symbolic link at $destpath\n");
      goto FAIL;
    }
    # Extract the date/time stamp in the last backup.
    my ($lbvol, $lbdirs, $lbfile) = File::Spec->splitpath($lbpath);
    if ($lbfile !~ /(\d\d\d\d)-?(\d\d)-?(\d\d)T(\d\d):?(\d\d):?(\d\d)/) {
      log_message("Error parsing date/time in $lbfile\n");
      goto FAIL;
    }
    my ($lbyear, $lbmon, $lbday, $lbhour, $lbmin, $lbsec) =
	($1, $2, $3, $4, $5, $6);
    # Check to make sure it has been at least "minwait" hours since the
    # last backup.
    my $lbtime = timelocal($lbsec, $lbmin, $lbhour, $lbday, $lbmon - 1, $lbyear - 1900);
    if ((($starttime - $lbtime) / (60 * 60)) < $minwait) {
      log_message("Skipping backup -- has been less than $minwait hours since last backup to $destpath\n");
      goto EXIT;
    }
    # We are going to send the last backup path (lbpath) to rsync via the
    # --link-dest option.  If it is a relative path, then it is interpreted
    # as relative to the new backup path.  Make sure we tell rsync the right
    # thing.
    if ($nbvol eq $lbvol && $lbdirs eq '') {
      # Symbolic link is a relative path (this should be the normal case).
      if ($nbdirs eq '') {
	# New backup path is also a relative path, so make last backup path
	# (lbpath) relative to new backup path, as rsync expects.
	$lbpath = File::Spec->catpath($nbvol, '..', $lbfile);
      } else {
	# New backup path is an absolute path, so just use all but the
	# last component to convert last backup path to an absolute path.
	$lbpath = File::Spec->catpath($nbvol, $nbdirs, $lbfile);
      }
    }
} else {
  # Destination path does not exist.  Make all directories above destination
  # path (we will create symbolic link later)
  mkpath $destpath;
  rmdir $destpath;
}
 
# Check to see if there are any existing incomplete or in progress backups.
my ($lastincomplete,) = sort { $b cmp $a } glob "${destpath}.*_INCOMPLETE";
if (defined($lastincomplete)) {
  # Move incomplete backup to current backup path and use that as basis
  log_message("Recovering from $lastincomplete\n");
  rename $lastincomplete, $tmpbpath;
} else {
  my ($lastinprogress,) = sort { $b cmp $a } glob "${destpath}.*_IN_PROGRESS";
  if (defined($lastinprogress)) {
    # Move in progress backup to current backup path and use that as basis
    log_message("Recovering from $lastinprogress\n");
    rename $lastinprogress, $tmpbpath;
  }
}
 
#############
# Do back up.
#############
 
# Check if RSYNC environment variable is set; if so, use that as the rsync
# executable.
my $rsyncbin = exists($ENV{'RSYNC'}) ? $ENV{'RSYNC'} : 'rsync';
# Check if version is >= 3.1.0.  If so, add the --info=progress2 option to
# get periodic progress messages.
my $rsync_version = `$rsyncbin --version`;
my ($vermajor, $verminor, $verrel) = ($rsync_version =~ /version\s+(\d+)\.(\d+)\.(\d+)/);
my $doprogress = 0;
if (defined($verrel)) {
  if ($vermajor > 3 || ($vermajor == 3 && ($verminor >= 1))) {
    $doprogress = 1;
  }
}
if ($doprogress) {
  push @rsync_opts, "--info=progress2";
}
 
# Add --link-dest option.
if (defined($lbpath)) {
  push @rsync_opts, "--link-dest=${lbpath}";
}
# Add signal handler in case we are interrupted by Ctrl-C
$SIG{INT} = sub {
  my $signame = shift;
  log_message("Interrupted during rsync from " . join(", ", @sourcepaths) . " to $tmpbpath\n");
  log_message("Moving $tmpbpath to $failbpath\n");
  rename $tmpbpath, $failbpath;
  report_duration($starttime);
  die "Exiting due to SIG$signame\n";
};
# Run rsync!
my @cmd = ($rsyncbin, '-ax', '--partial', @rsync_opts, @sourcepaths, $tmpbpath);
log_message("Running command: " . join(' ', @cmd), "\n");
my $rsyncpid = open(RSYNC, '-|', @cmd);
if (!defined($rsyncpid)) {
  log_message("Error running rsync from " . join(", ", @sourcepaths) . " to $tmpbpath: $!\n");
  rename $tmpbpath, $failbpath;
  goto FAIL;
}
# If there are progress messages coming from rsync, grab them here and output
# them (but filter by the first number in the percent progress field so that
# we don't get too many messages).
local $/ = "\r";
my $curpercent = '-';
while (<RSYNC>) {
  s/\r$//;
  s/\n$//;
  s/^\s+//;
  my ($bytes, $percent, undef, undef) = split(/\s+/, $_);
  if (substr($percent, 0, 1) != substr($curpercent, 0, 1)) {
    # The first digit in the percent progress number has changed, so output
    # the message.
    log_message("rsync (${rsyncpid}): $_\n");
  }
  $curpercent = $percent;
}
close RSYNC;
if ($? != 0) {
  log_message("Error running rsync from " . join(", ", @sourcepaths) . " to $tmpbpath: $!\n");
  rename $tmpbpath, $failbpath;
  goto FAIL;
}
 
# We have been writing to XXXX_IN_PROGRESS.  Now that we're done,
# remove the "_IN_PROGRESS".
rename($tmpbpath, $nbpath) || do {
  log_message("Error renaming $tmpbpath to $nbpath: $!\n");
  goto FAIL;
};
 
# Replace canonical symbolic link (destpath) to point to the current backup
if (defined($lbpath)) {
  unlink $destpath;
}
symlink $nbfile, $destpath;
 
goto EXIT;
 
FAIL:
$retval = -1;
 
EXIT:
report_duration($starttime);
exit $retval;
biac/backup/scripts/run_backup.txt · Last modified: 2023/02/23 18:43 (external edit)