gentrevml #1

  • //
  • guest/
  • sean_mccune/
  • revml/
  • bin/
  • gentrevml
  • View
  • Commits
  • Open Download .zip Download (11 KB)
#!/usr/local/bin/perl -w

=head2 NAME

gentrevml - Generate a .revml file used by the t/ scripts

=head2 SYNOPSIS

   perl bin/gentrevml --(revml|p4|cvs) [--bootstrap] [--batch=1]

=head2 DESCRIPTION

The test suite uses a bas RevML file to check to see vcp it can copy in to
and out of a repository correctly.  This is done for each repository class.

Note that going through a repository may lose some information, so the
test suite can't always compare the input RevML to the output RevML.

Only the revml->revml case is known to be idempotent.

I chose to do this over using some base repository because not every user
is going to happen to have that repository, and (2) not every repository
will pass through all information correctly.

=head2 COPYRIGHT

Copyright 2000, Perforce Software, Inc.  All Rights Reserved.

This will be licensed under a suitable license at a future date.  Until
then, you may only use this for evaluation purposes.  Besides which, it's
in an early alpha state, so you shouldn't depend on it anyway.

=head2 AUTHOR

Barrie Slaymaker <barries@slaysys.com>

=cut

use Text::Diff ;
use VCP::DiffFormat ;
use Getopt::Long ;
use MIME::Base64 ;

use strict ;

my $which ;
my $debug ;

sub which {
   die "Only one mode allowed\n" if $which ;
   $which = shift ;
}

my $batch ;
my $bootstrap ;

BEGIN {
   ## Need to know how to name the output file before we can
   ## "use RevML::Writer".
   $batch = 0 ;
   Getopt::Long::Configure( qw( no_auto_abbrev no_getopt_compat ) ) ;
   unless (
      GetOptions(
	 'p4'           => \&which,
	 'cvs'          => \&which,
	 'revml'        => \&which,
	 'b|bootstrap'  => \$bootstrap,
	 'batch=i'      => \$batch,
	 'd|debug'      => \$debug,
      )
      && $which
   ) {
      require Pod::Usage ;
      Pod::Usage::pod2usage( exitval => 2, verbose => 3 ) ;
   }
}

if ( $debug ) {
   print STDERR "for $which\n" ;
   print STDERR "bootstrap mode ", $bootstrap ? "on" : "off", "\n" ;
   print STDERR "batch $batch\n" ;
   print STDERR "\n" ;
}

##
## BUILD  THE MANIFEST
##
my @file_names ;
## Put @files in alpha order.  p4 likes to output in alpha order, and this
## makes comparing p4->revml output to revml->p4 input easier.
for my $main (
   qw( a/deeply/buried/file a_big_file add binary del readd ),
#   "spacey file name"
) {
   if ( $main eq "add" || $main eq "del" ) {
      for my $file    ( qw( f1 f2 f3 f4 ) ) {
	 my $fn = "$main/$file" ;
	 next if $fn eq "del/f1" ; # Can't delete in change 1
	 push @file_names, $fn ;
      }
   }
   else {
      push @file_names, $main ;
   }
}


##
## BUILD REVISIONS IN MEMORY
##
my @changes ;

my $binary_counter = 0 ;

{
   my $user_id = "${which}_t_user" ;
   $user_id .= '@p4_t_client' if $which eq 'p4' ;

   my %rev_num ;

   ## We never get around to changes 7..9
   my %deleted_change_num = (
      ## Delete the 'del/f<x>' in change <x>
      ## except you can't delete anything in change 1, eh?
      map( ( "del/f$_" => $_ ), (2..9) ),
   ) ;

   my %created_change_num = (
      ## Add the 'add/f<x>' in change <x>
      map( ( "add/f$_" => $_ ), (1..9) ),
   ) ;

   my $counter = "00" ;

   for my $change_num ( 1..6 ) {
      print STDERR "concocting \@$change_num:\n" if $debug ;
      ## We do the file names in sorted order because going in and out of
      ## some repositories like CVS folds all timestamps in a change to
      ## all be the same time (the cvs commit sets the timestamp), and
      ## we want the revml that comes out to be in the same order
      ## as the revml that went in.
      for my $name ( sort @file_names ) {
	 next
	    if (
	       defined $created_change_num{$name}
	       && $change_num < $created_change_num{$name} 
	    )
	    || (
	       defined $deleted_change_num{$name}
	    && $change_num > $deleted_change_num{$name}
	    ) ;

	 ++$rev_num{$name} ;

	 print STDERR "   $name#$rev_num{$name}:" if $debug ;

	 die "counter too big" if $counter > 254 ;

	 my $content =  $name eq "binary" 
	   ? chr( $binary_counter++ & 0x07 ) x 100
	   : sprintf(
	       qq{%s, revision %d, char 0x%02x="%s"\n},
	       $name,
	       $rev_num{$name},
	       $counter + 1,
	       chr( $counter + 1 ),
	    ) ;

	 $content = $content x 200 if $name eq "a_big_file" ;

	 my $r = {
	    name     => $name,
	    type     => $name eq "binary" ? "binary" : "text",
	    encoding => $name eq "binary" ? "base64" : "none",
	    user_id  => $user_id,
	    content  => $content,
	    time     => "2000-01-01 12:00:${counter}Z",
	    ## In p4, all files in a change number have an identical comment.
	    ## We impose this on the other solutions to test foo->p4 change
	    ## number aggregation.
	    comment  => "comment $change_num\n",
	 } ;

	 ## p4 doesn't handle modtime until very recently, and then it
	 ## doesn't expose it easily.
	 $r->{mod_time} = "2000-01-01 12:01:${counter}Z" unless $which eq 'p4' ;

	 if ( $which eq 'p4' ) {
	    $r->{p4_info}   = "Some info $which might emit about this file" ;
	    $r->{rev_id}    = $rev_num{$name} ;
	    ## In p4, you may have skipped some change numbers
	    $r->{change_id} = ( $r->{rev_id} - 1 ) * 2 + 1 ;
	    ## TODO: Delete this next line when we get VCP::Dest::p4 to sync
	    ## change numbers
	    $r->{change_id} = $change_num ;
	 }
	 elsif ( $which eq 'cvs' ) {
	    $r->{cvs_info}  = "Some info $which might emit about this file" ;
	    $r->{rev_id} = "1.$rev_num{$name}" ;
	    # We provide a change ID to see if the label makes it in and
	    # so that the label can be used to test incremental exports from
	    # cvs.
	    $r->{change_id} = $change_num ;
	 }
	 elsif ( $which eq 'revml' ) {
	    $r->{cvs_info}  ="Some info about this file" ;
	    $r->{rev_id}    = $rev_num{$name} ;
	    $r->{change_id} = $change_num ;
	 }
	 else {
	    die "$which unhandled" ;
	 }

	 if (  defined $deleted_change_num{$name} 
	    && $change_num == $deleted_change_num{$name}
	 ) {
	    $r->{action} = 'delete' ;
	 }
	 elsif ( $rev_num{$name} eq 1 ) {
	    $r->{action} = 'add' ;
	 }
	 elsif ( $name eq "readd" ) {
	     if ( $change_num % 2 ) {
	         ## Add it on the odd numbers
		 $r->{action} = 'add' ;
	     }
	     else {
	         $r->{action} = 'delete' ;
	     }
	 }
	 else {
	    $r->{action} = 'edit' ;
	 }

	 unless ( $r->{action} eq 'delete' || $counter % 2 ) {
	    $r->{labels} = [
		"achoo$counter",
		"blessyou$counter",
	    ] ;
	 }

	 $counter = sprintf "%02d", $counter + 1 ;

	 push @{$changes[$change_num]}, $r ;
	 if ( $debug ) {
	    print STDERR " #$r->{rev_id}" ;
	    print STDERR " \@$r->{change_id})" if defined $r->{change_id} ;
	    print STDERR " ($r->{action})\n" ;
	 }
      }
      print STDERR "\n" if $debug ;
   }
}

## Emit the document

use Digest::MD5 qw( md5_base64 ) ;
use File::Basename ;
use RevML::Doctype 'DEFAULT' ;
use RevML::Writer qw( :all :dtd_tags ) ;

sub _emit_characters {
   my ( $buf ) = @_ ;

   setDataMode( 0 ) ;
   ## note that we don't let XML munge \r to be \n!
   while ( $buf =~ m{\G(?:
      (  [ \x00-\x08\x0b-\x1f\x7f-\xff])
      | ([^\x00-\x08\x0b-\x1f\x7f-\xff]*)
      )}gx
   ) {
      if ( defined $1 ) {
	 char( "", code => sprintf( "0x%02x", ord $1 ) ) ;
      }
      else {
	 characters( $2 ) ;
      }
   }

}


my $prog = basename $0 ;
my $f0 = "$prog.0" ;
my $f1 = "$prog.1" ;

binmode STDOUT ;

setDataMode 1 ;

xmlDecl ;
time '2000-01-01 00:00:00Z' ;
rep_type $which ;
rep_desc 'random text, for now' ;

my %prev ;

## TODO: Branching, moving, and binary files

# This is a bogus rev_root, we set it to see if it gets ignored on
# transfers with a destination rev_root specified on the command line.
# TODO: We should also see what happens when dest no rev_root is specified.
rev_root "foo/bar/bah" ;

## Note the overlapping range here.  Batch 1 (0 or 1) needs to have a digest
## of the rev _before_ the start of the batch unless it's in bootstrap mode.
my @change_nums = (
   ( ! $batch )
      ? (1..3)
   : $bootstrap
      ? (4..6)
      : (3..6)
) ;

## Build @files from @changes.  An older version built revml in change number
## order, but we now built in filename, change number order to make sorting
## of the output of vcp tests in to a predictable order possible.  This is
## because cvs->revml does not result in predictable revml order, so
## all the tests generate revml in name,rev order.
my %revs_by_name ;

for my $change_num ( @change_nums ) {
   for my $rev ( @{$changes[$change_num]} ) {
      push @{$revs_by_name{$rev->{name}}}, $rev ;
   }
}

my @sorted_rev_names = sort {
   my @a = split "/", $a ;
   my @b = split "/", $b ;
   while ( @a && @b ) {
      my $result = shift( @a ) cmp shift( @b ) ;
      return $result if $result ;
   }
   return @a <=> @b ;
} keys %revs_by_name ;

for my $rev_name ( @sorted_rev_names ) {
   print STDERR "emitting $rev_name:\n" if $debug ;

   for my $r ( @{$revs_by_name{$rev_name}} ) {
      my $change_num = $r->{change_id} ;
      my $is_first = $change_num eq $change_nums[0] ;
      my $digest_mode = $is_first && $batch && ! $bootstrap ;

      next if ( $is_first
	 && ( ! $batch || $digest_mode )
	 && $r->{action} eq 'delete'
      ) ;

      print STDERR "   $r->{name}#$r->{rev_id}:" if $debug ;

      my $pr = $prev{$r->{name}} ;

      start_rev ;
      name        $r->{name} ;
      type        $r->{type} ;

      if ( ! $digest_mode ) {
	 p4_info     $r->{p4_info}  if defined $r->{p4_info} ;
	 cvs_info    $r->{cvs_info} if defined $r->{cvs_info} ;
      }

      rev_id      $r->{rev_id} ;
      change_id   $r->{change_id}   if defined $r->{change_id} ;

      my $digestion = 1 ;

      if ( $digest_mode ) {
	 print STDERR " digest" if $debug ;
      }
      else {
	 time      $r->{time} ;
	 mod_time  $r->{mod_time} if defined $r->{mod_time} ;

	 user_id   $r->{user_id} ;

         if ( $r->{labels} ) {
	    label $_ for @{$r->{labels}} ;
	 }

	 ## In p4, all files in a change number have an identical comment.
	 comment $r->{comment} ;

	 if ( $r->{action} eq 'delete' ) {
	    print STDERR " delete" if $debug ;
	    defaultWriter->delete() ;
	    $digestion = 0 ;
	 }
	 else {
	    if ( ! $pr || $r->{encoding} ne "none" ) {
	       print STDERR " content" if $debug ;
	       start_content encoding => $r->{encoding} ;
	       if ( $r->{encoding} eq "none" ) {
		  _emit_characters $r->{content} ;
	       }
	       else {
	          setDataMode( 0 ) ;
	          characters encode_base64 $r->{content} ;
	       }
	       end_content ;
	       setDataMode( 1 ) ;
	    }
	    else {
	       print STDERR " delta" if $debug ;
	       base_rev_id $pr->{rev_id} ;
	       start_delta type => 'diff-u', encoding => 'none' ;
	       _emit_characters(
                  diff \$pr->{content}, \$r->{content}, {
                     STYLE => "VCP::DiffFormat",
                  }
               );
	       end_delta ;
	       setDataMode( 1 ) ;
	    }
	 }
      }
      digest md5_base64( $r->{content} ), type => 'MD5', encoding => 'base64'
         if $digestion ;

      $prev{$r->{name}} = $r->{action} eq "delete" ? undef : $r ;
      if ( $debug ) {
	 print STDERR " #$r->{rev_id}" ;
	 print STDERR " \@$r->{change_id})" if defined $r->{change_id} ;
	 print STDERR " ($r->{action})\n" ;
      }
   }
   print STDERR "\n" if $debug ;
}

END {
   if ( -f $f0 ) {
      unlink $f0 or warn "$!: $f0" ;
   }
   if ( -f $f1 ) {
      unlink $f1 or warn "$!: $f1" ;
   }
}

endAllTags ;
# Change User Description Committed
#1 1375 Sean McCune Creating my own branch for work on vcp.
//guest/perforce_software/revml/bin/gentrevml
#11 1358 Barrie Slaymaker Win32 changes
#10 1055 Barrie Slaymaker add sorting, revamp test suite, misc cleanup.
 Dest/revml is
not portable off my system yet (need to release ...::Diff)
#9 701 Barrie Slaymaker Fixed VCP::Dest::p4 re-rooting problem, further t/* cleanup
#8 695 Barrie Slaymaker Cleaned up support for binary files in VCP::Dest::revml and
altered test suite to deal with it better.  Added some thoughts
to the TODO file.
#7 619 Barrie Slaymaker Avoid using p4 print -s, it puts linebreaks in every 4098
characters or so.
#6 609 Barrie Slaymaker Add a file to the test procedure that it alternately added and
deleted (file is named "readd"). Fixed all destinations to handle
that.
#5 608 Barrie Slaymaker Lots of changes to get vcp to install better, now up to 0.066.
Many thanks to Matthew Attaway for testing & suggestions.
#4 480 Barrie Slaymaker 0.06 Wed Dec 20 23:19:15 EST 2000
   - bin/vcp: Added --versions, which loads all modules and checks them
     for a $VERSION and print the results out.  This should help with
     diagnosing out-of-sync modules.
   - Added $VERSION vars to a few modules :-).  Forgot to increment any
     $VERSION strings.
   - VCP::Dest::cvs: The directory "deeply" was not being `cvs add`ed on
     paths like "a/deeply/nested/file", assuming "deeply" had no files
     in it.
   - VCP::Dest::revml: fixed a bug that was causing files with a lot of
     linefeeds to be emitted in base64 instead of deltaed.  This means
     most text files.
   - Various minor cleanups of diagnostics and error messages, including
     exposing "Can't locate Foo.pm" when a VCP::Source or VCP::Dest
     module depends on a module that's not installed, as reported by
     Jeff Anton.
#3 478 Barrie Slaymaker 0.05 Mon Dec 18 07:27:53 EST 2000
   - Use `p4 labels //...@label` command as per Rober Cowham's suggestion, with
     the '-s' flag recommended by Christopher Siewald and
     Amaury.FORGEOTDARC@atsm.fr.  Though it's actually something like

       vcp: running /usr/bin/p4 -u safari -c safari -p localhost:5666 -s files
       //.../NtLkly //...@compiler_a3 //.../NtLkly //...@compiler_may3

     and so //on //for 50 parameters to get the speed up.  I use the
     //.../NtLkly "file" as //a separator between the lists of files in various
     //revisions.  Hope nobody has any files named that :-).  What I should do
     is choose a random label that doesn't occur in the labels list, I guess.
   - VCP::Source::revml and VCP::Dest::revml are now binary, control code, and
     "hibit ASCII" (I know, that's an oxymoron) clean.  The <comment>, <delta>,
     and <content> elements now escape anything other than tab, line feed,
     space, or printable chars (32 <= c <= ASCII 126) using a tag like '<char
     code="0x09">'.  The test suite tests all this.  Filenames should also
     be escaped this way, but I didn't get to that.
   - The decision whether to do deltas or encode the content in base64 is now
     based on how many characters would need to be escaped.
   - We now depend on the users' diff program to have a "-a" option to force it
     to diff even if the files look binary to it.  I need to use Diff.pm and
     adapt it for use on binary data.
   - VCP::Dest::cvs now makes sure that no two consecutive revisions of the
     same file have the same mod_time.  VCP::Source::p4 got so fast at pulling
     revisions from the repositories the test suite sets up that CVS was not
     noticing that files had changed.
   - VCP::Plugin now allows you to set a list of acceptable result codes, since
     we now use p4 in ways that make it return non-zero result codes.
   - VCP::Revs now croaks if you try to add two entries of the same VCP::Rev
     (ie matching filename and rev_id).
   - The <type> tag is now limited to "text" or "binary", and is meant to
     pass that level of info between foreign repositories.
   - The <p4_info> on each file now carries the one line p4 description of
     the file so that p4->p4 transferes can pick out the more detailed
     info.  VCP::Source::p4, VCP::Dest::p4 do this.
   - VCP::{Source,Dest}::{p4,cvs} now set binaryness on added files properly,
     I think.  For p4->p4, the native p4 type is preserved.  For CVS sources,
     seeing the keyword substitution flag 'o' or 'b' implies binaryness, for
     p4, seeing a filetype like qr/u?x?binary/ or qr/x?tempobj/ or "resource"
     implies binaryness (to non-p4 destinations).  NOTE: Seeing a 'o' or 'b'
     in a CVS source only ends up setting the 'b' option on the destination.
     That should be ok for most uses, but we can make it smarter for cvs->cvs
     transfers if need be.
#2 468 Barrie Slaymaker - VCP::Dest::p4 now does change number aggregation based on the
  comment field changing or whenever a new revision of a file with
  unsubmitted changes shows up on the input stream.  Since revisions of
  files are normally sorted in time order, this should work in a number
  of cases.  I'm sure we'll need to generalize it, perhaps with a time
  thresholding function.
- t/90cvs.t now tests cvs->p4 replication.
- VCP::Dest::p4 now doesn't try to `p4 submit` when no changes are
  pending.
- VCP::Rev now prevents the same label from being applied twice to
  a revision.  This was occuring because the "r_1"-style label that
  gets added to a target revision by VCP::Dest::p4 could duplicate
  a label "r_1" that happened to already be on a revision.
- Added t/00rev.t, the beginnings of a test suite for VCP::Rev.
- Tweaked bin/gentrevml to comment revisions with their change number
  instead of using a unique comment for every revision for non-p4
  t/test-*-in-0.revml files.  This was necessary to test cvs->p4
  functionality.
#1 467 Barrie Slaymaker Version 0.01, initial checkin in perforce public depot.