#! /usr/bin/perl -w

# cvsu - do a quick check to see what files are out of date.
# Initially written by Tom Tromey <tromey@cygnus.com>
# Rewritten by Pavel Roskin <proski@gnu.org>

# $Id: cvsu.pl,v 1.5 2000/10/15 04:45:41 proski Exp $

require 5.004;
use Getopt::Long;
use Time::Local;
use strict;

use vars qw($list_types %messages %options @batch_list $batch_cmd
	    $no_recurse $explain_type $find_mode $short_print
	    $no_cvsignore $file $curr_dir
	    @standard_ignores $ignore_rx);

Main();

sub Main
{
    # types of files to be listed
    $list_types = "^.FCL";

    # long status messages
    %messages = (
	"?" => "Unlisted file",
	"." => "Known directory",
	"F" => "Up-to-date file",
	"C" => "CVS admin directory",
	"M" => "Modified file",
	"S" => "Special file",
	"D" => "Unlisted directory",
	"L" => "Symbolic link",
	"H" => "Hard link",
	"U" => "Lost file",
	"X" => "Lost directory",
	"A" => "Newly added",
	"O" => "Older copy",
	"G" => "Result of merge",
	"R" => "Removed file"
    );

    undef @batch_list;		# List of files for batch processing
    undef $batch_cmd;		# Command to be executed on files
    $no_recurse = 0;		# If this is set, do only local files
    $explain_type = 0;		# Verbosely print status of files
    $find_mode = 0;		# Don't print status at all
    $short_print = 0;		# Print only filenames without path
    $no_cvsignore = 0;		# Ignore .cvsignore
    my $want_msg = 0;		# List possible filetypes and exit
    my $want_help = 0;		# Print help and exit
    my $want_ver = 0;		# Print version and exit

    my %options = (
	"types=s"  => \$list_types,
	"batch=s"  => \$batch_cmd,
	"local"	   => \$no_recurse,
	"explain"  => \$explain_type,
	"find"	   => \$find_mode,
	"short"	   => \$short_print,
	"ignore"   => \$no_cvsignore,
	"messages" => \$want_msg,
	"help"     => \$want_help,
	"version"  => \$want_ver
    );

    GetOptions(%options);

    adjust_types();

    list_messages() if $want_msg;
    usage() if $want_help;
    version() if $want_ver;

    unless ($no_cvsignore) {
	init_ignores();
    }

    if ($#ARGV < 0) {
	@ARGV = ("");
    }

    foreach (@ARGV) {
	process_dir ($_);
    }

    if ($#batch_list >= 0) {
	    do_batch();
    }
}

# print usage information and exit
sub usage
{
    print "Usage:\n" .
	"  --local		Disable recursion\n" .
	"  --explain		Verbosely print status of files\n" .
	"  --find		Emulate find - filenames only\n" .
	"  --short		Don't print paths\n" .
	"  --ignore		Don't read .cvsignore\n" .
	"  --messages		List known file types and long messages\n" .
	"  --types=[^]LIST	Print only file types [not] from LIST\n" .
	"  --batch=COMMAND	Execute this command on files\n" .
	"  --help		Print this usage information\n" .
	"  --version		Print version number\n" .
	"Abbreviations and short options are supported\n";
    exit 0;
}

# print version information and exit
sub version
{
    print "cvsu - CVS offline examiner, version -VERSION-\n";
    exit 0;
}

# If types begin with '^', make inversion
sub adjust_types
{
    if ($list_types =~ m{^\^(.*)$}) {
	$list_types = "";
	foreach (keys %messages) {
	    $list_types .= $_
		if (index ($1, $_) < 0);
	}
    }
}

# list known messages and exit
sub list_messages
{
    my $default_mark;
    print "Recognizable file types are:\n";
    foreach (sort keys %messages) {
	if (index($list_types, $_) >= 0) {
	    $default_mark = "*";
	} else {
	    $default_mark = " ";
	}
	print "  $default_mark $_ $messages{$_}\n";
    }
    print "* indicates file types listed by default\n";
    exit 0;
}

# Initialize @standard_ignores
# Also read $HOME/.cvsignore and append it to @standard_ignores
sub init_ignores
{
    my $HOME = $ENV{"HOME"};
    # This list comes from the CVS manual.
    @standard_ignores = ('RCS', 'SCCS', 'CVS', 'CVS.adm', 'RCSLOG',
			 'cvslog.*', 'tags', 'TAGS', '.make.state',
			 '.nse_depinfo', '*~', '#*', '.#*', ',*',
			 "_\$*", "*\$", '*.old', '*.bak', '*.BAK',
			 '*.orig', '*.rej', '.del-*', '*.a', '*.olb',
			 '*.o', '*.obj', '*.so', '*.exe', '*.Z',
			 '*.elc', '*.ln', 'core');

    unless (defined($HOME)) {
	return;
    }

    my $home_cvsignore = "${HOME}/.cvsignore";

    unless (-f "$home_cvsignore") {
	return;
    }

    unless (open (CVSIGNORE, "< $home_cvsignore")) {
	error ("couldn't open $home_cvsignore: $!");
    }

    while (<CVSIGNORE>) {
	push (@standard_ignores, split);
    }

    close (CVSIGNORE);
}

# print message and exit (like "die", but without raising an exception)
# newline is added at the end
sub error
{
    print STDERR shift(@_) . "\n";
    exit 1;
}

# execute commands from @exec_list with $exec_cmd
sub do_batch
{
	my @cmd_list = split (' ', $batch_cmd);
	system (@cmd_list,  @batch_list);
}

# print files status
# Parameter 1: status in one-letter representation
sub file_status
{
    my $type = shift (@_);
    my $item;
    my $pathfile;

    return
	if $ignore_rx ne '' && $file =~ /$ignore_rx/;

    return
	if (index($list_types, $type) < 0);

    $pathfile = $curr_dir . $file;

    if (defined($batch_cmd)) {
	push (@batch_list, $pathfile);
	# 1000 items in the command line might be too much for HP-UX
	if ($#batch_list > 1000) {
	    do_batch();
	    undef @batch_list;
	}
    }

    if ($short_print) {
	$item = $file;
    } else {
	$item = $pathfile;
    }

    if ($find_mode) {
	print "$item\n";
    } else {
	$type = $messages{$type}
	    if ($explain_type);
	print "$type $item\n";
    }
}

# process one directory
# Parameter 1: directory name
sub process_dir
{
    # 3-letter month names in POSIX locale
    my %months = (
	"Jan" => 0,
	"Feb" => 1,
	"Mar" => 2,
	"Apr" => 3,
	"May" => 4,
	"Jun" => 5,
	"Jul" => 6,
	"Aug" => 7,
	"Sep" => 8,
	"Oct" => 9,
	"Nov" => 10,
	"Dec" => 11
    );

    # $file, $curr_dir, and $ignore_rx must be seen in file_status
    $file = "";
    $curr_dir = shift (@_);
    $ignore_rx = '';

    $curr_dir .= "/"
	unless ( $curr_dir eq "" || $curr_dir =~ m{/$} );

    my $real_curr_dir = $curr_dir eq "" ? "." : $curr_dir;

    error ("$real_curr_dir is not a directory")
	unless ( -d $real_curr_dir );

    # Scan present files.
    file_status (".");
    my %found_files = ();
    opendir (DIR, $real_curr_dir) ||
	error ("couldn't open directory $real_curr_dir: $!");
    foreach (readdir (DIR)) {
	$found_files {$_} = 1;
    }
    closedir (DIR);

    # Scan CVS/Entries.
    my %entries = ();
    my %subdirs = ();
    my %removed = ();
    open (ENTRIES, "< ${curr_dir}CVS/Entries")
	|| error ("couldn't open ${curr_dir}CVS/Entries: $!");
    while (<ENTRIES>) {
	if ( m{^D/([^/]+)/} ) {
	    $subdirs{$1} = 1;
	} elsif ( m{^/([^/]+)/([^/])[^/]*/([^/]+)/} ) {
	    $entries{$1} = $3;
	    $removed{$1} = $3
		if $2 eq '-';
	} else {
	    error ("unrecognizable line in ${curr_dir}CVS/Entries")
		unless m{D}; # what does single "D" in CVS/Entries mean?
	}
    }
    close (ENTRIES);

    # CVS/Entries.Log lists actions to be done in CVS/Entries
    # Currently only adding and deleting directories is known to be safe
    if ( open (ENTRIES, "< ${curr_dir}CVS/Entries.Log") ) {
	while (<ENTRIES>) {
	    if ( m{^A D/([^/]+)/} ) {
		$subdirs{$1} = 1;
	    } elsif ( m{^R D/([^/]+)/} ) {
		delete $subdirs{$1};
	    } else {
		# Note: "cvs commit" helps even when you are offline
		error ("unrecognizable line in ${curr_dir}CVS/Entries.Log, " .
			"try \"cvs commit\"");
	    }
	}
	close (ENTRIES);
    }

    # It is intentional to list CVS before reading .cvsignore
    $file = "CVS";
    file_status ("C");

    # Scan .cvsignore if any
    unless ($no_cvsignore) {
	my (@ignore_list) = ();

	if (-f "${curr_dir}.cvsignore") {
	    open (CVSIGNORE, "< ${curr_dir}.cvsignore")
		|| error ("couldn't open ${curr_dir}.cvsignore: $!");
	    while (<CVSIGNORE>) {
		push (@ignore_list, split);
	    }
	    close (CVSIGNORE);
	}

	my ($iter);
	foreach $iter (@ignore_list, @standard_ignores) {
	    if ($ignore_rx eq '') {
		$ignore_rx = '^(';
	    } else {
		$ignore_rx .= '|';
	    }
	    $ignore_rx .= glob_to_rx ($iter);
	}
	$ignore_rx .= ')$'
	    if $ignore_rx ne '';
    }

    # File is missing
    foreach $file (sort keys %entries) {
	unless ($found_files{$file}) {
	    if ($removed{$file}) {
		file_status("R");
	    } else {
		file_status("U");
	    }
	}
    }

    foreach $file (sort keys %found_files) {
	next if ($file eq 'CVS' || $file eq '.' || $file eq '..');
	lstat ($curr_dir . $file); # Don't use stat() and -X on other files
	my $is_link = 0;
	eval {
	    if (-l _) {
		$is_link = 1;
	    }
	};
	if ($is_link) {
	    file_status ("L");
	} elsif (-d _) {
	    if ($subdirs{$file}) {
		$subdirs{$file} = 2;
	    } else {
		file_status ("D"); # Unknown directory
	    }
	} elsif (! (-f _)) {
	    file_status ("S"); # This must be something very special
	} elsif ( (stat _) [3] > 1 ) {
	    file_status ("H"); # Hard link
	} elsif (! $entries{$file}) {
	    file_status ("?");
	} elsif ($entries{$file} =~ /^Initial |^dummy /) {
	    file_status ("A");
	} elsif ($entries{$file} =~ /^Result of merge/) {
	    file_status ("G");
	} elsif ($entries{$file} !~
		/^(...) (...) (..) (..):(..):(..) (....)$/) {
	    error ("Invalid timestamp for $curr_dir$file: $entries{$file}");
	} else {
	    my $cvtime = timegm($6, $5, $4, $3, $months{$2}, $7 - 1900);
	    my $mtime = (stat _) [9];
	    if ($cvtime == $mtime) {
		file_status ("F");
	    } elsif ($cvtime < $mtime) {
		file_status ("M");
	    } else {
		file_status ("O");
	    }
	}
    }

    # Now do directories.
    unless ($no_recurse) {
	my $save_curr_dir = $curr_dir;
	foreach $file (sort keys %subdirs) {
	    if ($subdirs{$file} == 1) {
		$curr_dir = $save_curr_dir;
		file_status ("X");
	    } elsif ($subdirs{$file} == 2) {
		process_dir ($save_curr_dir . $file)
	    }
	}
    }
}

# Turn a glob into a regexp
sub glob_to_rx
{
    my ($expr) = @_;
    $expr =~ s/(\W)/\\$1/g;
    $expr =~ s/\\\*/.*/g;
    $expr =~ s/\\\?/./g;
    return $expr;
}
