#!/usr/bin/perl -w

# Copyright (c) 2001-2009, Kungliga Tekniska Högskolan
# (Royal Institute of Technology, Stockholm Sweden)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the university nor the names of its contributors
#    may be used to endorse or promote products derived from this software
#    without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

use strict;
use Date::Manip;
use File::Temp;

my $dsmpath = "/opt/tivoli/tsm/client/ba/bin";
my $scratchpath = "/var/tsm/afsbackup";
my @saveintervals = (0, 60, 150, 240, 360, 540, 720, 32767);

my $now = ParseDate("now");
my %bupset;
my @periodlimits;
foreach my $j (0..7) {
    my $delta = " - $saveintervals[$j] days";
    $periodlimits[$j] = DateCalc("now", $delta);
}
my %bupsetsorted;
my %tosave;
my %fullbackups;
my %vols_left;
my %vols_deleted;

(my $outfh, my $outfname) = mkstemp("/tmp/tsm-dump-archive-clean-XXXXXX");

sub bupcmp {
    my $date1 = ParseDate($a->[1]);
    my $date2 = ParseDate($b->[1]);
    return Date_Cmp($date2, $date1);
}

sub mark_for_deletion {
    my $s = shift;
    if (defined $vols_left{$s}) {
	print $outfh "$scratchpath/$s\n";
	undef $vols_left{$s};
	$vols_deleted{$s} = 1;
    } else {
	if (defined $vols_deleted{$s}) {
	    print LOGFD "Volume dump $s already marked for deletion.\n";
	} else {
	    die "Internal error: Deleting $s, but it didn't exist.";
	}
    }
}

sub print_buplist {
    my $str = shift;
    my $listp = shift;
    my $vol = shift;
    my $i = 0;
    while (exists $listp->[$i]) {
	print LOGFD "$str $vol.$listp->[$i]->[0].$listp->[$i]->[1] \n";
	$i++
    }
}

sub divide_in_timeperiods {
    my $listp = shift;
    my $vol = shift;
    my @period;
    my $i = 0;
    my $j = 0;
    while (exists $listp->[$i]) {
	for $j (1..8) {
	    if (Date_Cmp($periodlimits[$j - 1], $listp->[$i]->[1]) >= 0 and
		Date_Cmp($listp->[$i]->[1], $periodlimits[$j])>0) {
		push @{$period[$j]}, $listp->[$i];
		$i++;
		last;
	    }
	}
    }
    return \@period;
}

sub delete_full_bups {
    my $periodsp = shift;
    my $vol = shift;
    my $i = 1;
    for (1..7) {
	if (exists($periodsp->[$_])) {
	    while (length(@{$periodsp->[$_]} > 1)) {
		my $notdate = $periodsp->[$_]->[0]->[0];
		die "Internal error: Incremental backup treated as a full backup." if $notdate ne "00000000";
		my $basedate = $periodsp->[$_]->[0]->[1];
		my $s = "$vol.backup.$notdate.$basedate";

		my $has_dep_inc = 0;
		my $inc;
		for my $bup (@{$bupset{$vol}}) {
		    my $incbasedate = $bup->[0];
		    my $incdate = $bup->[1];
		    if ($incbasedate eq $basedate) {
			my $inc = "$vol.backup.$incbasedate.$incdate";
			if (defined $vols_left{$inc}) {
			    $has_dep_inc = 1;
			    print LOGFD "Found an old incremental backup: $inc\n";
			}
		    }
		}
		
		if ($has_dep_inc == 0) {
		    print LOGFD "Deleting backup $s not last in period $_.\n";
		    mark_for_deletion($s);
		} else {
		    print LOGFD "Keeping backup $s because of dependant incremental backups.\n";
		}
		
		shift @{$periodsp->[$_]};
	    }
	    my $notdate = $periodsp->[$_]->[0]->[0];
	    die "Internal error: Incremental backup treated as a full backup." if $notdate ne "00000000";
	    my $basedate = $periodsp->[$_]->[0]->[1];
	    my $s = "$vol.backup.$notdate.$basedate";
	    print LOGFD "Keeping $s last in its period $_.\n";
	}
    }
}

Date_Init();

my $nowtime = UnixDate("now", "%q");

open LOGFD, ">>", "/var/log/afs-tsm/cleanlog.$nowtime" or die "Can't open logfile";
open STDOUT, ">&LOGFD" or die "Can't dup LOGFILE: $!";
open STDERR, ">&LOGFD" or die "Can't dup LOGFILE: $!";

LOGFD->autoflush(1);

print LOGFD "Scanning volume dumps...\n";

open(ARCHIVE, "${dsmpath}/dsmc query archive \"$scratchpath/*\"| "); #
while (<ARCHIVE>) {
    chomp $_;
    if (m; $scratchpath/(.+)\.backup\.(\d\d\d\d\d\d\d\d)\.(\d\d\d\d\d\d\d\d) ;) {
	my $vol = $1;
	my @date = ($2, $3);
	push @{$bupset{$vol}}, \@date;
	
	my $s = $vol . ".backup." . $2 . "." . $3;
	print LOGFD "Found volume dump: $s\n";
	$vols_left{$s} = 1;
    } else {
	    print LOGFD "Ignored dsmc query output: $_\n";
	}
}
close ARCHIVE;

print LOGFD "Sorting volume dumps...\n";

foreach my $key  (sort keys %bupset ) {
    my $j = 0;
    my $i = 0;
    
    @{$bupsetsorted{$key}} =  sort bupcmp @{$bupset{$key}};

    print LOGFD "Processing volume $key\n";

# Keep all volume dumps that are less than 30 days old. Both full and
# incremental backups are kept.

    while (exists( $bupsetsorted{$key}->[$i])) {
	my $datebup = ParseDate($bupsetsorted{$key}->[$i]->[1]);
	my $delta = DateCalc($datebup, $now); # computes $now - $datebup
	my $deltadays = Delta_Format($delta, 1, "%dt");
	$j = $i; # save the loopvariables value
	# Stop when we find the newest backup that is older than 30 days and is not the last backup.
	last if ($deltadays > 30 and $i > 0);
	$i++
	}

    @{$tosave{$key}} = splice @{$bupsetsorted{$key}}, 0, $j;

    my $lastinc = -1;
    for $i (0..$#{$bupsetsorted{$key}}) {
	if ($bupsetsorted{$key}->[$i]->[0] ne "00000000") {
	    $lastinc = $i;
	    last;
	}
    }

    $i=0;
    while (exists($bupsetsorted{$key}->[$i])) {
	my $archdates = $bupsetsorted{$key}->[$i];
	if ($archdates->[0] ne "00000000") {
	    unless ($i == $lastinc) {
		my $s = "$key.backup.$archdates->[0].$archdates->[1]";
		print LOGFD "Deleting incr $s too old.\n";
		mark_for_deletion($s);
	    }
	}
	else {
	    push @{$fullbackups{$key}}, $archdates;
	}
	$i++;
    }

    print_buplist("Keeping volume", \@{$tosave{$key}}, $key);

# Save one extra fullbackup so as to make all incremental backups useful.
    my $extravol = shift @{$fullbackups{$key}};
    print LOGFD "Keeping volume dump: $key.backup.$extravol->[0].$extravol->[1]\n";
    my $periodsp = divide_in_timeperiods(\@{$fullbackups{$key}}, $key);

    delete_full_bups($periodsp, $key);

    print $outfh "\n";
    print LOGFD "\n";
}

close($outfh) or die;

## Sanity check
my %fulldumps_left;
my %latest_left;
for my $s (keys %vols_left) {
    if (defined $vols_left{$s}) {
	if ($s =~ m;(.+)\.backup\.(\d+)\.(\d+);) {
	    print LOGFD "Sanity check for $s...\n";
	    my $vol = $1;
	    my $date1 = $2;
	    my $date2 = $3;
	    if (not defined $latest_left{$vol} or $latest_left{$vol} < $date2) {
		$latest_left{$vol} = $date2;
	    }
	    if ($date1 != "00000000") {
		print LOGFD "   ...is incremental.\n";
		my $base_s = $vol . ".backup.00000000." . $date1;
		if (not defined $vols_left{$base_s}) {
		    if (defined $vols_deleted{$base_s}) {
			die "Sanity check failed: $base_s was to be deleted even though $s still exists.\n";
		    } else {
			print LOGFD "Sanity check warning: $base_s is gone even though $s still exists.\n";
		    }
		} else {
		    print LOGFD "   ...has a base volume dump.\n";
		}
	    } else {
		print LOGFD "   ...is a base volume dump.\n";
		if (not defined $fulldumps_left{$vol} or $fulldumps_left{$vol}->{'date'} < $date2) {
		    $fulldumps_left{$vol} = {'name' => $s, 'date' => $date2};
		}
	    }
	} else {
	    die "Internal error: Invalid volume id: $s\n";
	}
    } else {
	# perl's keys function is strange
	#die "Internal error: There is a key $s in \%vols_left but it is not defined. Huh?\n"
	print LOGFD "Volume $s is to be deleted.\n";
    }
}
for my $s (keys %vols_deleted) {
    if ($s =~ m;(.+)\.backup\.(\d+)\.(\d+);) {
	print LOGFD "Volume dump $s was to be deleted...\n";
	my $vol = $1;
	my $date1 = $2;
	my $date2 = $3;
	if ($date2 > $latest_left{$vol}) {
	    die "Sanity check failed: $s was to be deleted, but it is newer than the latest dump at $latest_left{$vol} which was to be kept.";
	}
	if (defined $fulldumps_left{$vol}) {
	    my $kept = $fulldumps_left{$vol}->{'name'};
	    print LOGFD "   ...but there is another full dump in $kept.\n";
	} else {
	    die "Sanity check failed: $s was to be deleted but there is no other full dump of this volume.\n";
	}
    } else {
	die "Internal error: Invalid volume id: $s\n";
    }
}

my $ret = system("false DID NOT ${dsmpath}/dsmc delete archive -NOPROMPT -VERBOSE -FILELIST=$outfname");
if($ret == 0) {
    unlink($outfname) or die;
}

close(LOGFD) or die;
