#!/usr/bin/env perl
#
# Converts a CVS Repository to a Darcs Repository using cvsps.
#
use strict;
use warnings;

# This has just been changed so that it uses cvs update to work out changes
# rather than cvsps diff generation - this correctly handles binary files
# A side effect is that we can stop requiring a checked out directory as
# a parameter, because the darcs tree becomes a CVS checkout too.
# This needs changing, but hasn't been done yet.

use Cwd;
use HTTP::Date;
use Getopt::Long;

my $usage
  ="usage: in darcs repository: "
  ."$0 [--lastpatch <n>] [--branch <branch>] "
  ."<checked out directory>";

my $lastpatch;
my $wantbranch='HEAD';

GetOptions('lastpatch=i' => \$lastpatch,
	   'branch=s' => \$wantbranch)
  or die $usage;

$#ARGV == 0
  or die $usage;

if(!-d '_darcs') {
  die "Not a darcs tree! Perhaps you wanted to run 'darcs initialize' first?";
}

`cvsps 2>&1` || die "cvsps not found in path.";

# Test for "-q" flag support. It's nice to have but not necessary.
my $cvsps_cmd;
if (`cvsps -q 2>&1` =~ m/invalid argument/i) {
   $cvsps_cmd = 'cvsps';
}
else {
   $cvsps_cmd = 'cvsps -q';
}

my $base_darcs = getcwd();
chdir($ARGV[0]);
my $base_cvs = getcwd();

my $patchset_num = 0;
open ROOT,"<$base_cvs/CVS/Root" || die "Couldn't open CVS Root file";
my $root=<ROOT>;
chomp $root;
close ROOT;
open REPO,"<$base_cvs/CVS/Repository"
  || die "Couldn't open CVS Repository file";
my $repo=<REPO>;
chomp $repo;
close REPO;

sub echosystem {
  print "running: @_\n";
  system @_;
}

sub get_directory {
  my ($dir) = @_;
  if ($dir =~/(.+)\/[^\/+]/) {
    -d "$base_darcs/$1" || get_directory($1);
  }
  print "creating directory $base_darcs/$dir\n";
  -d "$base_darcs/$dir" || mkdir "$base_darcs/$dir";
  echosystem('cvs','-d',$root,'get','-D','19700102','-P','-d',"$base_darcs/$dir","$repo/$dir");
  unlink "$base_darcs/$dir/CVS/Tag";
  open TAG,">$base_darcs/$dir/CVS/Tag" || die "Couldn't write out tag name";
  print TAG "T$wantbranch\n";
  close TAG;
  -d "$base_darcs/$dir" || die "Couldn't create $base_darcs/$dir"
}

sub consider_trashing_parents {
  my ($file) = @_;
  if (! -f $file && ! -d $file && $file =~ /(.*)\/[^\/]+/) {
    my $dir = $1;
    if (`ls $dir | wc -l` == 1)  { # i.e. if there is just the CVS directory...
      print "Directory $dir is now empty...\n";
      echosystem("rm -rf $dir");
      consider_trashing_parents($dir);
    }
  }
}
if (open(PN,"$base_darcs/_darcs/prefs/patchnum")) {
  $patchset_num = <PN>;
  close(PN);
} else {
  get_directory("");
}

my $prev_date = "";
my $prev_name = "";

# loop indefinitely unless $lastpatch is defined
while (!defined $lastpatch || $patchset_num<$lastpatch) {
    $patchset_num++;

    my ($name, $log, $author);
    my $date = "";
    my $tag = "";
    my $branch = "";
    my $bkrev = "";

    my @updateitems=();

    my $maybe_blank = 0;
    my $getting_log = 0;
    my $havetag = 0;
    my $havebkrev = 0;
    my $getting_members = 0;

    # read in patch from cvsps
    chdir($base_cvs); open(LOG,"$cvsps_cmd -u -s $patchset_num |");
    while (<LOG>) {
        if ($getting_members) {
	  if(my ($thisname,$thisfrom,$thisto)
	     =/^\t([\w\.\-\_\/,\+]+):([\w\d\.]+)->([\d\.]+)/) {
	    push @updateitems,[$thisname,$thisfrom,$thisto];
	  }
        } elsif (/^Date: (.*)/) {
            # darcs wants the date to resemble output of `date`.
            $date = time2str(str2time($1));
            $date =~ s/(...), (..) (...) (....) (.*)/$1 $3 $2 $5 $4/g;
        } elsif (/^Author: (.*)/) {
            $author = $1;
        } elsif (/^Tag: \(none\)/) {
        } elsif (/^Tag: (.*)/) {
            $tag = $1;
            $havetag = 1;
	} elsif (/^Branch: (.*)/) {
            $branch = $1;
        } elsif (/^Log:/) {
            $name = <LOG>;
            chomp $name;
            $log = "";
            $maybe_blank = 1;
            $getting_log = 1;
        } elsif (/^BKrev: (.*)/) {
            $bkrev = $1;
            $havebkrev = 1;
        } elsif ($getting_log) {
            if (/^$/ && $maybe_blank) {
                # Skip leading whitespace in the log...
# BUG: something matching this regexp in the changelog will break things
            } elsif (/^Members: (.*)/) {
                $getting_log = 0;
                $getting_members = 1;
            } else {
	        $log = "$name\n" if $log eq '';
                $maybe_blank = 0;
                $log = $log . $_;
            }
        }
    }
    close(LOG);

    if (! $date) {
        # No date from cvsps means we're out of patchsets to convert.
        print "Conversion Complete\n";
        exit(0);
    }

    next unless $branch eq $wantbranch;

    print "\n";
    print "Importing Patchset $patchset_num\n";
    print "  Name:   $name\n";
    print "  Author: $author\n";
    print "  Date:   $date\n";
    print "  Tag:    $tag\n" unless $tag eq "";

    # get and apply patch
    chdir $base_cvs;
    foreach my $details (reverse @updateitems) {
      my ($file,$from,$to)=@$details;
      if ($file =~ /(.*)\/[^\/]+/) {
        my $dir = $1;
        -d $dir || get_directory $dir;
      }
      print "File $file is changed to version $to from $from...\n";
      chdir $base_cvs;
      echosystem('cvs','update','-r',$to,$file);
      if (! -f $file) {
        print "File $file is deleted...\n";
      }
      consider_trashing_parents($file);
    }

    # record patch in darcs
    if ($date eq $prev_date && $name eq $prev_name) { 
      print "IDENTICAL PATCHES, using amend-record...\n";
      chdir $base_darcs; open(DARCS,"| darcs amend-record -va --look-for-adds");
      print DARCS "y\n";
      close(DARCS);
    } else {
      chdir $base_darcs; open(DARCS,"| darcs record -va --look-for-adds --pipe");
      print DARCS "$date\n";
      print DARCS "$author\n";
      print DARCS "$name\n";
      print DARCS "$log";
      close(DARCS);
    }

    if ($havetag) {
        # recording tag in darcs
        chdir $base_darcs; open(DARCS,"| darcs tag -v --pipe");
        print DARCS "$date\n";
        print DARCS "$author\n";
        print DARCS "$tag\n";
        close(DARCS);
    }

    if ($havebkrev) {
        # recording bkrev as tag in darcs
        chdir $base_darcs; open(DARCS,"| darcs tag -v --pipe");
        print DARCS "$date\n";
        print DARCS "$author\n";
        print DARCS "$bkrev\n";
        close(DARCS);
    }

    # save new patcheset number
    chdir $base_darcs; open(PN,">_darcs/prefs/patchnum");
    print PN "$patchset_num";
    close(PN);

    if ($patchset_num % 1000 == 0) {
        echosystem("darcs optimize");
    }

    $prev_date = $date;
    $prev_name = $name;
}
