#!/usr/local/bin/perl -w

=head2 NAME

gentrevml - Generate a .revml file used by the t/ scripts

=head2 SYNOPSIS

   perl bin/gentrevml --(revml|p4|cvs|vss) [--bootstrap] [--no-a-big-file] [--batch=1]

=head2 DESCRIPTION

The test suite uses a basic RevML file to check to see vcp it can copy
in to and out of a repository correctly.  This is done for each
repository class.

Note that going through a repository may lose some information, so the
test suite can't always compare the input RevML to the output RevML.

Only the revml->revml case is known to be idempotent.

I chose to do this over using some base repository because not every
user is going to happen to have that repository, and (2) not every
repository will pass through all information correctly.

=head2 COPYRIGHT

Copyright 2000, Perforce Software, Inc.  All Rights Reserved.

This will be licensed under a suitable license at a future date.
Until then, you may only use this for evaluation purposes.  Besides
which, it's in an early alpha state, so you shouldn't depend on it
anyway.

=head2 AUTHOR

Barrie Slaymaker <barries@slaysys.com>

=cut

# NOTE: The keys this program uses are slightly different than
# VCP::Rev's uids and ->as_string output:  the branch_id is always
# surrounded by "()" here, even if it is "".  There, the "()" is omitted
# for the sake of readability if the branch_id is undef or has no length

use Text::Diff ;
use File::Basename ;
use VCP::DiffFormat ;
use Getopt::Long ;
use MIME::Base64 ;

use strict ;

my $which ;
my $debug ;

sub which {
   die "Only one mode allowed\n" if $which ;
   $which = shift ;
}

my $batch ;
my $bootstrap ;
my $no_big_files ;

BEGIN {
   ## Need to know how to name the output file before we can
   ## "use RevML::Writer".
   $batch = 0 ;
   Getopt::Long::Configure( qw( no_auto_abbrev no_getopt_compat ) ) ;
   unless (
      GetOptions(
	 'p4'           => \&which,
	 'cvs'          => \&which,
	 'revml'        => \&which,
	 'vss'          => \&which,
	 'b|bootstrap'  => \$bootstrap,
	 'batch=i'      => \$batch,
	 'd|debug'      => \$debug,
         'no-big-files' => \$no_big_files,
      )
      && $which
   ) {
      require Pod::Usage ;
      Pod::Usage::pod2usage( exitval => 2, verbose => 3 ) ;
   }
}

if ( $debug ) {
   print STDERR "for $which\n" ;
   print STDERR "bootstrap mode ", $bootstrap ? "on" : "off", "\n" ;
   print STDERR "batch $batch\n" ;
   print STDERR "\n" ;
}

##
## BUILD  THE MANIFEST
##
my $branch_mode = $which eq "p4" ? "dir" : "rev";

my @file_names ;
## Put @files in alpha order.  p4 likes to output in alpha order, and this
## makes comparing p4->revml output to revml->p4 input easier.

for my $main (
   qw( a/deeply/buried/file add binary branched del readd ),
   ! $no_big_files ? "a_big_file" : (),
#   "spacey file name"
) {
   if ( $main eq "add" || $main eq "del" ) {
      for my $file    ( qw( f1 f2 f3 f4 ) ) {
	 my $fn = "$main/$file" ;
	 next if $fn eq "del/f1" ; # Can't delete in change 1
	 push @file_names, $fn ;
      }
   }
   else {
      push @file_names, $main ;
   }
}

my $prefix = $branch_mode eq "dir" ? "main/" : "";
@file_names = map "$prefix$_", @file_names;

# rev_root is so far ignored in the test suite since most
# commands specify a destination directory.  This value is chosen
# to make that obvious.
my $rev_root = "/ignored";
#$rev_root = "//ignored" if $which eq "p4";

##
## BUILD REVISIONS IN MEMORY
##
my @revs;
my @changes;
my %versions;
my %branch_ids;

{
   # We use $counter below to drive the insertion of a range of characters
   # and labels.  This is based on the file name, the branch_id, and
   # the change number in order to yield consistent numbering between
   # the various .revml files.  the file name portion is a number representing
   # the position of the file in @file_names.  This hash contains that
   # mapping.
   my %file_numbers = do {
      my $i = 0;
      map { ( $_ => $i++ ) } @file_names;
   };

   my $user_id = "test_user" ;
#   $user_id .= '@p4_t_client' if $which eq 'p4' ;

   my %rev_num ;

   ## We never get around to changes 7..9
   my %deleted_change_num = (
      ## Delete the 'del/f<x>' in change <x>
      ## except you can't delete anything in change 1, eh?
      map( ( "${prefix}del/f$_" => $_ ), (2..9) ),
   ) ;

   my %created_change_num = (
      ## Add the 'add/f<x>' in change <x>
      map( ( "${prefix}add/f$_" => $_ ), (1..9) ),
   ) ;

   my %head_revs;
   sub concoct_file_rev {
      ## Create a revision for a branch of a file
      my ( $branch ) = @_;
      my $abs_name   = $branch->{abs_name};
      my $name       = $branch->{name};
      my $branch_id  = $branch->{branch_id};
      my $change_num = $branch->{change_num};

      my $rev_num = ++$rev_num{"$name($branch_id)"};

      my $previous_id = do {
         my $base_rev =
            $head_revs{"$branch->{base_abs_name}($branch->{base_branch_id})"};

         defined $base_rev ? $base_rev->{id} : undef;
      };

      print STDERR "   $name#$rev_num:" if $debug ;

      # This is arranged so that the counter value for a branch is
      # before the counter value for the unbranched file in the
      # same change number.  This is so that the branched files
      # get committed after the previous change's files, but before
      # the next change's unbranched files.
      my $counter =
         sprintf "%03d",
            ( $change_num * keys( %file_numbers ) + $branch->{file_number} ) * 2
            + ( $branch->{is_branched} ? 0 : 1 );

      die "counter too big" if $counter > 254 ;

      my $char_num = $counter + 1;

      ## CVS on Win32 gets bummed out about ^Z in text files.
      $char_num++ if $^O =~ /Win32/ && $char_num == 26;

      my $is_binary = 0 <= index $name, "binary";

      my $content = $is_binary
        ? chr( $change_num & 0x07 ) x 100
        : sprintf(
            qq{%s#%d(%s) \\0x%02x="%s"},
            do {
               ## We don't want content differing based on
               ## branch_mode because the test suite compares
               ## output from one repository to the t/test-*.revml files
               ## this script builds for another repository.
               ( my $n = $name ) =~ s{^main[^/]*/}{}g;
               $n;
            },
            $rev_num,
            $branch->{branch_label},
            $char_num,
            chr( $char_num ),
         ) ;

      $content = "$content\n" x 200 if 0 <= index $name, "a_big_file" ;

      $content .= "\n" unless $is_binary;

      my $secs = $counter;
      my $mins = int( $secs / 60 );
      $secs -= $mins * 60;

      my $r = {
         name         => $name,
         type         => $is_binary ? "binary" : "text",
         encoding     => $is_binary ? "base64" : "none",
         user_id      => $user_id,
         branch_id    => $branch_id,
         previous_id  => $previous_id,
         content      => $content,
         time         => sprintf( "2000-01-02 12:%02d:%02dZ", $mins, $secs ),
         ## In p4, all files in a change number have an identical comment.
         ## We impose this on the other solutions to test foo->p4 change
         ## number aggregation.
         comment      => "comment $change_num\n",
      } ;

      $branch_ids{$branch_id} = undef;  ## for later emission.

      ## p4 doesn't handle modtime until very recently, and then it
      ## doesn't expose it easily.
      $r->{mod_time} = sprintf "2000-01-01 12:%02d:%02dZ", $mins, $secs
         unless $which eq 'p4';

      if ( $which eq 'p4' ) {
         $r->{p4_info}   = "Some info $which might emit about this file" ;
         $r->{rev_id}    = $rev_num ;
         ## In p4, you may have skipped some change numbers
         $r->{change_id} = ( $r->{rev_id} - 1 ) * 2 + 1 ;
         ## TODO: Delete this next line when we get VCP::Dest::p4 to sync
         ## change numbers
         $r->{change_id} = $change_num ;
      }
      elsif ( $which eq 'cvs' ) {
         $r->{cvs_info}  = "Some info $which might emit about this file" ;
         $r->{rev_id} = "1.$rev_num" ;
         $r->{rev_id} = "1.1.2.$rev_num" if $branch->{is_branched};
         # We provide a change ID to see if the label makes it in and
         # so that the label can be used to test incremental exports from
         # cvs.
         $r->{change_id} = $change_num ;
      }
      elsif ( $which eq 'revml' ) {
         $r->{cvs_info}  ="Some info about this file" ;
         $r->{rev_id}    = $branch->{is_branched} ? "1.$rev_num" : $rev_num ;
         $r->{change_id} = $change_num ;
      }
      elsif ( $which eq 'vss' ) {
         $r->{rev_id}    = $rev_num ;
         ## change_id is provided to drive the "is_first" detection
         ## below and so that extraction of incremental exports can
         ## use the ch_4 label.
         $r->{change_id} = $change_num ;
      }
      else {
         die "$which unhandled" ;
      }

      $r->{id} = "$abs_name#$r->{rev_id}";

      if ( $which eq "cvs" ) {
         ( my $branch_number = $r->{rev_id} ) =~ s/\.\d+\z//;
         ## Using branch number here instead of the $branch_id (which
         ## may be a CVS branch tag so that altering the CVS source by
         ## moving or changing or removing the branch's branch tag does
         ## not result in different filebranch_ids.
         $r->{source_filebranch_id} = "$abs_name<$branch_number>";
      }
      else {
         $r->{source_filebranch_id} = $abs_name;
      }

      $r->{source_repo_id} = "$which:test_repository";

      if (  defined $deleted_change_num{$name} 
         && $change_num == $deleted_change_num{$name}
      ) {
         $r->{action} = 'delete' ;
      }
      elsif ( $rev_num eq 1 ) {
         $r->{action} = 'add' ;
      }
      elsif ( 0 <= index $name, "readd" ) {
          if ( $change_num % 2 ) {
              ## Add it on the odd numbers
              $r->{action} = 'add' ;
          }
          else {
              $r->{action} = 'delete' ;
          }
      }
      else {
         $r->{action} = 'edit' ;
      }

      unless ( $r->{action} eq 'delete' || $counter % 2 ) {
         $r->{labels} = [
             "achoo$counter",
             "blessyou$counter",
         ] ;
      }

      $counter = sprintf "%02d", $counter + 1 ;

      push @{$changes[$change_num]}, $r ;
      if ( $debug ) {
         print STDERR " #$r->{rev_id}" ;
         print STDERR " \@$r->{change_id})" if defined $r->{change_id} ;
         print STDERR " ($r->{action})" ;
         print STDERR " base: $r->{previous_id}" if defined $r->{previous_id};
         print STDERR "\n" ;
      }

      $name =~ s/main-branch-[1-9]/main/
         if $which eq "p4" && 0 < index $name, "branched";

      $versions{$r->{id}} = $r;
      $head_revs{"$abs_name($branch->{branch_id})"        } = $r;

      return $r;
   }

   for my $change_num ( 1..6 ) {
      print STDERR "concocting \@$change_num:\n" if $debug ;
      ## We do the file names in sorted order because going in and out of
      ## some repositories like CVS folds all timestamps in a change to
      ## all be the same time (the cvs commit sets the timestamp), and
      ## we want the revml that comes out to be in the same order
      ## as the revml that went in.

      for my $master_name ( sort @file_names ) {

	 next
	    if (
	       defined $created_change_num{$master_name}
	       && $change_num < $created_change_num{$master_name} 
	    )
	    || (
	       defined $deleted_change_num{$master_name}
	    && $change_num > $deleted_change_num{$master_name}
	    )
         ;

         my $main_branch_id = $branch_mode eq "dir"
            ? (fileparse "$rev_root/$master_name")[1]
            : "";

         my $main_abs_name = "$rev_root/$master_name";

         my @branches = (
            {
               branch_id      => $main_branch_id,
               branch_label   => "",
               abs_name       => $main_abs_name,
               name           => $master_name,
               change_num     => $change_num,
               file_number    => $file_numbers{$master_name},
               ## All main branch revs are based on their predecessors
               base_abs_name  => $main_abs_name,
               base_branch_id => $main_branch_id,
            }
         );

         ## The branched file branches in change 2
         if ( ( 0 <= index $master_name, "branched" ) && $change_num >= 2 ) {
            my $name = $master_name;
            $name =~ s/main/main-branch-1/
               if $branch_mode eq "dir";

            my $abs_name = "$rev_root/$name";

            my $branch_id = $branch_mode eq "dir"
               ? (fileparse "$rev_root/$name")[1]
               : "main-branch-1";

            ## Do branched revs first because we want to make sure that in
            ## change 2, say, the branched rev is based on change 1's version
            ## not on change 2's non-branched version.  This is important
            ## because the emission loop assumes that the last mainbranch
            ## version is the base for the first branched version.
            unshift @branches, {
               %{$branches[0]},  ## copy most fields
               abs_name       => $abs_name,
               name           => $name,
               branch_id      => $branch_id,
               branch_label   => "main-branch-1",
               is_branched    => 1,
               ## The first branched rev is based on the parent branch,
               ## All later branched revs are based on their predecessors
               $change_num == 2
                  ? (
                     base_abs_name  => $main_abs_name,
                     base_branch_id => $main_branch_id,
                  )
                  : (
                     base_abs_name  => $abs_name,
                     base_branch_id => $branch_id,
                  ),
            };
         }

         push @revs, map concoct_file_rev( $_ ), @branches;
      }
      print STDERR "\n" if $debug ;
   }
}

## Emit the document

use Digest::MD5 qw( md5_base64 ) ;
use File::Basename ;
use RevML::Doctype 'DEFAULT' ;
use RevML::Writer qw( :all :dtd_tags ) ;

sub _emit_characters {
   my ( $buf ) = @_ ;

   setDataMode( 0 ) ;
   ## note that we don't let XML munge \r to be \n!
   while ( $buf =~ m{\G(?:
      (  [ \x00-\x08\x0b-\x1f\x7f-\xff])
      | ([^\x00-\x08\x0b-\x1f\x7f-\xff]*)
      )}gx
   ) {
      if ( defined $1 ) {
	 char( "", code => sprintf( "0x%02x", ord $1 ) ) ;
      }
      else {
	 characters( $2 ) ;
      }
   }

}


my $prog = basename $0 ;
my $f0 = "$prog.0" ;
my $f1 = "$prog.1" ;

binmode STDOUT ;

setDataMode 1 ;

xmlDecl ;
time '2000-01-01 00:00:00Z' ;
rep_type $which ;
rep_desc 'random text, for now' ;
if ( $which eq "revml" ) {
   ## These are ignored by other interfaces, so don't test with them.
   comment "A comment" ;
}

rev_root $rev_root;

# The name "main-branch-1" is used to make sure that these revs
# occur after the main/ branch revs.  This is because we sort the
# revml file by name and (currently) VCP::Source::revml assumes that
# the previous rev is always there before the current rev.

start_branches;
  for ( sort grep length, keys %branch_ids ) {
      start_branch;
         branch_id $_;
         dest_branch_id;#   "main-branch-1";
    #        if ( $which eq "p4" ) {
    #          p4_branch_spec <<END_P4_BRANCH_SPEC;
    #Branch:	main-branch-1
    #
    #Update:	2002/08/27 12:00:00
    #
    #Owner:	foo
    #
    #Description:
    #	Created by gentrevml
    #
    #Options:	unlocked
    #
    #View:
    #	//ignored/main/... //ignored/main-branch-1/...
    #
    #END_P4_BRANCH_SPEC
    #        }
      end_branch;
    }
end_branches;

## Note the overlapping range here.  Batch 1 (0 or 1) needs to have a digest
## of the rev _before_ the start of the batch unless it's in bootstrap mode.
my @change_nums = (
   ( ! $batch )
      ? (1..3)
   : $bootstrap
      ? (4..6)
      : (3..6)
) ;

## Build @files from @changes.  An older version built revml in change number
## order, but we now built in filename, change number order to make sorting
## of the output of vcp tests in to a predictable order possible.  This is
## because cvs->revml does not result in predictable revml order due to
## cvs oddities, so all the tests generate revml in name,rev order.
my %revs_by_name ;

for my $r ( sort {
      $a->{name}   cmp $b->{name}
                   ||
      $a->{rev_id} cmp $b->{rev_id}
   } grep { my $r = $_; grep( $_ eq $r->{change_id}, @change_nums ) } @revs
) {
   push @{$revs_by_name{$r->{name}}}, $r;
}

my @sorted_rev_names = sort {
   my @a = split "/", $a ;
   my @b = split "/", $b ;
   while ( @a && @b ) {
      my $result = shift( @a ) cmp shift( @b ) ;
      return $result if $result ;
   }
   return @a <=> @b ;
} keys %revs_by_name ;


for my $rev_name ( @sorted_rev_names ) {
   print STDERR "emitting $rev_name:\n" if $debug ;

   for my $r ( @{$revs_by_name{$rev_name}} ) {
      my $change_num = $r->{change_id} ;
      my $is_first = $change_num eq $change_nums[0] ;
      my $digest_mode  = $is_first && $batch && ! $bootstrap ;
      my $is_bootstrap = $is_first && ( ! $batch || $bootstrap );

      next if ( $is_first
	 && ( ! $batch || $digest_mode )
	 && $r->{action} eq 'delete'
      ) ;

      next if ( $which eq "vss"
	 && $r->{action} eq "delete"
         && $r->{name} eq "readd"
         && $change_num != $change_nums[-1]
      );

      print STDERR "   $r->{name}#$r->{rev_id}:" if $debug ;

      my $pr;

      if ( $r->{previous_id} ) {
         $pr = $versions{$r->{previous_id}};

         die "No previous rev found for $r->{name}($r->{branch_id})#$r->{rev_id}",
            ": key '$r->{previous_id}', does not exist in ",
            join " ", map "'$_'", sort keys %versions
            unless $pr;
      }

      start_rev id => $r->{id};
      name                 $r->{name} ;
      source_name          $r->{name} ;

      source_filebranch_id $r->{source_filebranch_id};
      source_repo_id       $r->{source_repo_id};

      type                 $r->{type} unless $which eq "vss" && $r->{action} eq "delete";

      if ( ! $digest_mode ) {
	 p4_info           $r->{p4_info}  if defined $r->{p4_info} ;
	 cvs_info          $r->{cvs_info} if defined $r->{cvs_info} ;
      }

      if( length $r->{branch_id} ) {
         branch_id        $r->{branch_id};
         source_branch_id $r->{branch_id};
      }
      rev_id              $r->{rev_id};
      source_rev_id       $r->{rev_id};
      if( defined $r->{change_id} ) { # && $which ne "vss" ;
         change_id        $r->{change_id};
         source_change_id $r->{change_id};
      }

      my $digestion = 1 ;

      if ( $digest_mode ) {
	 print STDERR " digest" if $debug ;
      }
      else {
	 time      $r->{time} unless $which eq "vss" && $r->{action} eq "delete";
	 mod_time  $r->{mod_time}
            if defined $r->{mod_time}
               && ! ( $which eq "vss" && $r->{action} eq "delete" );

	 user_id   $r->{user_id} unless $which eq "vss" && $r->{action} eq "delete";

         if ( $r->{labels} ) {
	    label $_ for @{$r->{labels}} ;
	 }

	 ## In p4, all files in a change number have an identical comment.
	 comment $r->{comment} unless $which eq "vss" && $r->{action} eq "delete";
         previous_id $r->{previous_id}
            if defined $r->{previous_id} && ! $is_bootstrap;

	 if ( $r->{action} eq 'delete' ) {
	    print STDERR " delete" if $debug ;
	    defaultWriter->delete() ;
	    $digestion = 0 ;
	 }
	 else {
	    if ( ! $pr
               || $pr->{action} eq "delete"
               || $is_bootstrap
               || $r->{encoding} ne "none"
            ) {
	       print STDERR " content" if $debug ;
	       start_content encoding => $r->{encoding} ;
	       if ( $r->{encoding} eq "none" ) {
		  _emit_characters $r->{content} ;
	       }
	       else {
	          setDataMode( 0 ) ;
	          characters encode_base64 $r->{content} ;
	       }
	       end_content ;
	       setDataMode( 1 ) ;
	    }
	    else {
               die "No previous rev for $r->{name}#$r->{rev_id}"
                  unless defined $pr;
               die "delta in change_id $change_num!"
                  unless $change_num > 1;
               my $bv_r = $pr;
               $bv_r = $versions{$bv_r->{previous_id}}
                  while $bv_r->{action} eq "delete";

	       print STDERR " delta" if $debug ;
	       start_delta type => 'diff-u', encoding => 'none' ;
	       _emit_characters(
                  diff \$bv_r->{content}, \$r->{content}, {
                     STYLE => "VCP::DiffFormat",
                  }
               );
	       end_delta ;
	       setDataMode( 1 ) ;
	    }
	 }
      }
      digest md5_base64( $r->{content} ), type => 'MD5', encoding => 'base64'
         if $digestion ;

      if ( $debug ) {
	 print STDERR " #$r->{rev_id}" ;
	 print STDERR " \@$r->{change_id})" if defined $r->{change_id} ;
	 print STDERR " ($r->{action})\n" ;
      }
   }
   print STDERR "\n" if $debug ;
}

END {
   if ( -f $f0 ) {
      unlink $f0 or warn "$!: $f0" ;
   }
   if ( -f $f1 ) {
      unlink $f1 or warn "$!: $f1" ;
   }
}

endAllTags ;