#!/usr/bin/perl -w
#
# clean-deploys: Check and clean-up the local repositories for deployed vhosts
#

use strict;
use File::Path qw(rmtree);
use Getopt::Long;
use Pod::Usage;
use Sys::Hostname;
my $host = hostname;
use FindBin;
use lib "$FindBin::Bin/../perllib";
use mySociety::VHosts;

my $quiet = 0;
my $delete = 0;
my $force = 0;
my $help = 0;
my $keep = 5;
my @vhosts_to_check = ();
my @ignore_vhosts = ( "${host}", "debian.mysociety.org" );
my $vhosts_dir = "/data/vhost";
my $deployed_vhosts_state = "/var/lib/server-state/vhost-list";

# Options
GetOptions(
    'quiet' => \$quiet,
    'delete'  => \$delete,
    'force'   => \$force,
    'help'    => \$help,
    'keep=i'  => \$keep,
    'vhost=s' => \@vhosts_to_check,
);

if ($help) {
    pod2usage(-exitval => 0, -verbose => 2);
    exit(0);
}

my $vhosts = mySociety::VHosts->new();

# Unless provided with some on the commandline get a list of deployed
# vhosts from the local state file.
unless ( @vhosts_to_check ) {
    open FH, $deployed_vhosts_state or die "$deployed_vhosts_state not found";
    STATE: while ( my $v = <FH>) {
        $v =~ s/^\s+|\s+$//g;
        foreach ( @ignore_vhosts ) {
            if ( "$_" eq "$v" ) {
                print "${v}: skipping - hardcoded exception.\n" unless $quiet;
                next STATE;
            }
        }
        push @vhosts_to_check, $v;
    }
}

# Go through our list of vhosts to check.
foreach my $vhost ( @vhosts_to_check ) {

    # Get and merge vhost and site data from the JSON.
    my $vhost_conf;
    if ( $vhosts->{vhosts}{$vhost} ) {
        $vhost_conf = $vhosts->vhost($vhost);
    } else {
        print "$vhost: skipping - not in JSON.\n" unless $quiet;
        next;
    }
    my $site_conf  = $vhosts->site($vhost_conf->{site});
    my $conf;
    foreach my $key ( keys %$site_conf ) { $conf->{$key} = $site_conf->{$key}; }
    foreach my $key ( keys %$vhost_conf ) { $conf->{$key} = $vhost_conf->{$key}; }

    # Skip Redirect Sites.
    if ( $conf->{redirects_only} ) {
        print "$vhost: skipping - redirects only.\n" unless $quiet;
        next;
    }

    # Prepare to check for modified or untracked things in the working copies
    my $repo = $conf->{git_repository} || $conf->{private_git_repository};
    my $git_status;

    # Are we dealing with a timestamped deploy?
    if ( $conf->{timestamped_deploy} ) {
        my $live_deploy = readlink("${vhosts_dir}/${vhost}/${repo}");
        my @deploys = glob "${vhosts_dir}/${vhost}/${repo}-????-??-?????-??-??";
        my $num_deploys =  @deploys;
        my $c = 0;
        foreach my $deploy ( sort @deploys ) {
            $c++;
            print "$vhost: working on the live deploy - ${deploy}\n" if (!$quiet && $deploy eq $live_deploy);
            $git_status = qx(git -C $deploy status --porcelain);
            if ($git_status) {
                print "$vhost: timestamped deploy $deploy repo isn't clean.\n" unless $quiet;
                print "$git_status" unless $quiet;
            }
            if ( !$git_status || $force ) {
                if ( $c <= $num_deploys - $keep && $deploy ne $live_deploy ) {
                    # Let's just delete any older timestamped deploys, keeping around the most
                    # recent five. Note we're basing this on the order returned by sort() on
                    # the list of timestamped deploy directories, which should be fine as they
                    # contain sortable timestamps.
                    if ( $delete ) {
                        rmtree( [ $deploy] );
                        print "$vhost: deleted $deploy\n" unless $quiet;
                    } else {
                        print "$vhost: would delete $deploy\n\n" unless $quiet;
                    }
                }
            }
        }
    } else {
        $git_status = qx(git -C ${vhosts_dir}/${vhost}/${repo} status --porcelain);
        if ($git_status) {
            print "$vhost: repo isn't clean.\n" unless $quiet;
            print "$git_status\n" unless $quiet;
        } else {
            print "$vhost: repo is clean.\n" unless $quiet;
        }
    }
}

__END__

=head1 NAME

clean-deploys

=head1 SYNOPSIS

clean-deploys --help | [OPTIONS]

=head1 DESCRIPTION

Checks deployed vhosts for untracked or changed files the local working
copies of their repositories. In the case of timestamped deploys,
optionally removes all but the most recent five deploys (always checking
for the current live copy).

By default B<clean-deploys> will look at all vhosts listed in the
current state file for the server in question. The B<--vhost> option
(q.v.) can be used to specify particular vhosts to check.

Note that aside from B<--help> all other operations expect a copy of
`/data/servers/vhosts.pl` to be present on the system.

=head1 OPTIONS

=over 4

=item --help

Displays this help message.

=item --quiet

This options will suppress all output except for errors, for example
to run under cron.

=item --delete

This will cause all but the most recent 5 timestamped deploys that do
not have any untracked or changed files to be deleted without further
notice. Recentness is simply determined using a sort on the directory
names of the timestamped deploys. The current live deploy, as
determined by the target of the symlink to the bare repository name,
will not be deleted regardless of its place in the sort order.

=item --force

Using this option with B<--delete> will also delete any deploys that
have untracked or changed files in the working copies. Using the option
without the B<--delete> flag and without the B<--quiet> flag will add
output stating which additional deploys would be deleted.

=item --keep

The number of old timestamped deploys to keep around. The default is 5.

=item --vhost

This option is used to check specifc vhosts rather than checking all
deployed vhosts and should be specified once for each vhost that needs
checking and/or cleaning.

=back

=cut
