revml.pm #61

  • //
  • guest/
  • perforce_software/
  • revml/
  • lib/
  • VCP/
  • Source/
  • revml.pm
  • View
  • Commits
  • Open Download .zip Download (19 KB)
package VCP::Source::revml ;

=head1 NAME

VCP::Source::revml - Reads a RevML file

=head1 SYNOPSIS

## revml input class:

   vcp foo.revml                     [dest_spec]
   vcp foo.revml --uncompress        [dest_spec]
   vcp foo.revml --dtd <revml.dtd>   [dest_spec]
   vcp foo.revml --version <version> [dest_spec]
   vcp revml:foo.revml:/foo/bar/...  [dest_spec]

Where <source> is a filename for input; or missing or '-' for STDIN.

=head1 DESCRIPTION

This source driver allows L<vcp|vcp> to read a RevML file.

For now, all revisions are fully reconstituted in the working
directory in order to make sure that all of the patches apply cleanly.
This can require a huge amount of disk space, but it works (optimizing
this is on the TODO).

=head1 OPTIONS

=over

=item --dtd

   --dtd=0.33
   --version=0.33

Use the indicated DTD version (must be compiled in to VCP) to parse
the RevML file.

=item --uncompress

Force uncompression of gzipped input.
If the input file ends in '.gz', the uncompress flag is implied.

=back

=cut

=for DEVELOPER_USE_ONLY
To use an alternate DTD:
   vcp revml[:<source>] --dtd <dtd>

=cut

use VCP::Logger qw( pr BUG );

$VERSION = 0.1 ;

@ISA = qw( VCP::Source VCP::Utils::revml );

use strict ;

use Carp ;
use Digest::MD5 ;
use Fcntl ;
use File::Path;
use File::Spec;
use MIME::Base64 ;
use RevML::Doctype ;
use Symbol ;
use UNIVERSAL qw( isa ) ;
use XML::Parser ;
use Time::Local qw( timegm ) ;
use VCP::Debug ':debug' ;
use VCP::Patch ;
use VCP::RefCountedFile;
use VCP::Rev ;
use VCP::Source;
use VCP::Utils qw( empty shell_quote start_dir_rel2abs );
use VCP::Utils::revml;
#use base qw( VCP::Source VCP::Utils::revml ) ;

#use fields (
#   'DTD_OPTION',        ## The --dtd or --version flag from the command line
#   'DOCTYPE',           ## revml doc type
#   'HEADER',            ## The $header is held here until the first <rev> is read
#   'IN_FH',             ## The handle of the input revml file
#   'WORK_NAME',         ## The name of the working file (diff or content)
#   'WORK_FH',           ## The filehandle of working file
#   'REV',               ## The VCP::Rev containing all of this rev's meta info
#   'STACK',             ## A stack of currently open elements
#   'UNDECODED_CONTENT', ## Base64 content waiting to be decoded.
#   'FILESPEC_RE',       ## A perl5 re compiled from $self->repo_filespec
#   'UNCOMPRESS',        ## un-compress gzipped input
#   'HEADER_ENDED',      ## Set when the first <rev> is encountered
#   'FILES',             ## A hash of files that were created as we read the
#                        ## RevML.  This is keyed on revision ID and contains
#                        ## VCP::RefCountedFile objects.  get_source_file()
#                        ## deletes these and returns the RefCountedFiles
#                        ## objects, so the disk space is reclaimed whenever
#                        ## the end point is finished with the file.
#) ;


#=item new
#
#Creates a new instance.  The only parameter is '-dtd', which overrides
#the default DTD found by searching for modules matching RevML::DTD:v*.pm.
#
#=cut

sub new {
   my $self = shift->SUPER::new;

   my ( $spec, $options ) = @_ ;

   $self->parse_revml_repo_spec( $spec )
      unless empty $spec;

   $self->parse_options( $options );

   return $self;
}


sub options_spec {
   my $self = shift;
   return (
      $self->SUPER::options_spec,
      'dtd|version=s' => \$self->{DTD_OPTION},
      'uncompress'    => \$self->{UNCOMPRESS},
   );
}


sub init {
   my $self = shift ;

   $self->SUPER::init;

   $self->{DOCTYPE} = RevML::Doctype->new(
       $self->{DTD_OPTION},
       main::bundled_files()
   );

   my $file = $self->decide_actual_input_filename;

   # always un-compress if filename ends in ".gz"
   my $gzip;
   if ( $^O =~ /Win32/ ) {
      $self->{UNCOMPRESS} = 1 if $file =~ /\.gz$/i ;
      $gzip = "gzip.exe";
   }
   else {
      $self->{UNCOMPRESS} = 1 if $file =~ /\.gz$/ ;
      $gzip = "gzip";
   }

   my $fs = $self->repo_filespec;
   $self->{FILESPEC_RE} = ( ! empty $fs )
      ? do {
         $fs =~ s{^/+}{};
         $self->compile_path_re( $fs )
      }
      : qr{^};

   if ( $file eq '-' ) {
      if( $self->{UNCOMPRESS} ) {
         open( $self->{IN_FH}, "gzip --decompress --stdout - |" )
            or die "$!: gzip --decompress --stdout - |";
      }
      else {
         $self->{IN_FH}   = \*STDIN ;
      }
      ## TODO: Check IN_FH for writability when it's set to STDIN
      ## don't you mean readability?
   }
   else {
      require Symbol ;
      $self->{IN_FH} = Symbol::gensym ;

      if( $self->{UNCOMPRESS} ) {
         my $in_name = shell_quote $file;

         open( $self->{IN_FH}, "gzip --decompress --stdout $in_name |" )
            or die "$!: gzip --decompress --stdout $in_name |";
      }
      else {
         open( $self->{IN_FH}, "<$file" ) or die "$!: $file\n";
      }
   }

   $self->{WORK_FH} = Symbol::gensym ;

   return $self ;
}


# ??? call this twice or only once if already set????
sub decide_actual_input_filename {
   my $self = shift;

   ## This supports a brain-dead compatability mode where you can
   ## "just" say revml:filename.revml or even just filename.revml
   ## on the command line.  The parse routines will stick that
   ## in repo_filespec and not set the server.  If, however, the
   ## server is set, then the filespec is a pattern we need to use
   ## to select files.
   my $file = $self->repo_server;
   if ( empty $file ) {
      $self->repo_server( $self->repo_filespec );
      $self->repo_filespec( undef );
      $file = $self->repo_server;
   }

   $file = "-"
      if empty $file;

   $file = $self->repo_server(
      start_dir_rel2abs $file
   ) unless $file eq "-";

   $file = "-" if empty $file;

   return $file;
}



=item ui_set_revml_repo_spec

set the repo_spec, but die if no good.
this should be called from a ui handler that
will handle exceptions.



=cut

sub ui_set_revml_repo_spec {
   my $self = shift ;

   $self->parse_revml_repo_spec( @_ );
   my $file = $self->decide_actual_input_filename;
   die "Error: '-' signifies standard input, not a file name.\n"
      if $file eq '-';
   die "Error: '$file' is a directory.\n"
      if -d $file;
   die "Warning: '$file' not found!\n"
      unless -e $file;
   die "Warning: '$file' not is not a plain file!\n"
      unless -f $file;
   die "Warning: '$file' not readable!\n"
      unless -r $file;
}



sub handle_header {
   my $self = shift ;

   ## Save this off until we get our first rev from the input
   ( $self->{HEADER} ) = @_;

   rmtree [ $self->_db_store_location( 'source_files') ];

   $self->parse_revml_file ;
      ## Unlike normal repositories, we have to scan all this here
      ## so that all files get extracted from the input so that
      ## VCP::Source::metadb can access them.

   $self->dest->handle_header( $self->{HEADER} )
      if defined $self->{HEADER};
}


sub get_source_file {
   my $self = shift ;
   my $r ;
   ( $r ) = @_ ;

   die "can't check out ", $r->as_string, "\n"
      unless $r->is_real_rev;

   return $self->_db_store_location(
      'source_files',
      $r->source_name,
      $r->source_branch_id || "-",
      $r->source_rev_id
   );

      ## Pass ownership to the caller so it will be cleaned up when the
      ## caller is finished with it.
}


sub parse_revml_file {
   my $self = shift ;

   my @stack ;
   $self->{STACK} = \@stack ;
   $self->{HEADER_ENDED} = 0;

   my $char_handler = sub {
      my $expat = shift ;
      my $pelt = $stack[-1] ; ## parent element
      my $tag = $pelt->{NAME} ;
      $pelt->{TEXT} .= $_[0] if exists $pelt->{TEXT} && defined $pelt->{TEXT};
      my $sub = "${tag}_characters" ;
      $self->$sub( @_ ) if $self->can( $sub ) ;
   } ;

   my $p = XML::Parser->new(
      Handlers => {
         Start => sub {
	    my $expat = shift ;
	    my $tag = shift ;

	    if ( $tag eq "char" ) {
	       while ( @_ ) {
	          my ( $attr, $value ) = ( shift, shift ) ;
#print STDERR $value, "=" ;
		  if ( $attr eq "code" ) {
		     if ( $value =~ s{^0x}{} ) {
			$value = chr( hex( $value ) ) ;
		     }
		     else {
			$value = chr( $value ) ;
		     }
#print STDERR ord $value, "\n" ;
		     $char_handler->( $expat, $value ) ;
		  }
	       }
	       return ;
	    }

            ## TODO: suss out "container" elements from the doctype.
	    push @stack, {
	       NAME => $tag,
	       @_,
	       ( $self->can( "${tag}_characters" )
                  || 0 <= index "revml,rev,branch,branches,", $tag . ","
               ) 
                  ? ()
                  : ( TEXT => "" ),
	    } ;

	    my $sub = "start_$tag" ;
	    $self->$sub( @_ ) if $self->can( $sub ) ;
	 },

	 End => sub {
	    my $expat = shift ;
	    my $tag = shift ;
	    return if $tag eq "char" ;

#print STDERR "</$tag>\n" ;
	    die "Unexpected </$tag>, expected </$stack[-1]>\n"
	       unless $tag eq $stack[-1]->{NAME} ;
	    my $sub = "end_$tag" ;
	    $self->$sub( @_ ) if $self->can( $sub ) ;
	    my $elt = pop @stack ;

	    if ( @stack ) {
               if (
                  exists $elt->{TEXT}
                  && defined $elt->{TEXT}
               ) {
                  ## Save all the meta fields for start_content() or start_diff()
                  if ( $tag eq 'label' ) {
                     push @{$stack[-1]->{labels}}, $elt->{TEXT} ;
                  }
                  elsif ( $tag eq 'earlier_id' ) {
                     push @{$stack[-1]->{earlier_ids}}, $elt->{TEXT} ;
                  }
                  elsif ( $stack[-1]->{NAME} eq "revml" ) {
                     die "Header field $tag after first rev\n"
                        if $self->{HEADER_ENDED};
                     ## ASSume none of these occur after first rev.
                     $self->{HEADER}->{$tag} = $elt->{TEXT} ;
                     if ( $tag eq "rev_root" ) {
                        $self->rev_root( $elt->{TEXT} );
                     }
                  }
                  else {
                     $stack[-1]->{$tag} = $elt->{TEXT} ;
                  }
               }
               else {
                  ## It's a node with child nodes.
                  delete $elt->{NAME};

                  if ( $stack[-1]->{NAME} eq "revml" && $tag ne "rev" ) {
                     die "Header field $tag after first rev\n"
                        if $self->{HEADER_ENDED};
                     ## ASSume none of these occur after first rev.
                     $self->{HEADER}->{$tag} = $elt;
                  }
                  else {
                     $stack[-1]->{$tag} = $elt;
                  }
               }
	    }
	 },

	 Char => $char_handler,
      },
   ) ;
   $p->parse( $self->{IN_FH} ) ;
}


sub start_rev {
   my $self = shift ;

   ## Make sure no older rev is lying around to confuse us.
   $self->{REV} = undef ;
   $self->{HEADER_ENDED} = 1;
}

## RevML is contstrained so that the diff and content tags are after all of
## the meta info for a revision.  And we really don't want to hold
## the entire content of a file in memory, in case it's large.  So we
## intercept start_content and start_diff and initialize the REV
## member as well as opening a place to catch all of the data that gets
## extracted from the file.
sub init_rev_meta {
   my $self = shift ;
   my ( $depth ) = @_;

   my $rev_elt = $self->{STACK}->[-($depth||2)] ;
   my $r = VCP::Rev->new() ;
   ## All revml tag naes are lc, all internal data member names are uc
#require Data::Dumper ; print Data::Dumper::Dumper( $self->{STACK} ) ;

   for my $key ( grep /^[a-z_0-9]+$/, keys %$rev_elt ) {
      ## We know that all kids *in use today* of <rev> are pure PCDATA
      ## Later, we'll need sub-attributes.
      ## TODO: Flatten the element tree by preficing attribute names
      ## with '@'?.
      my $setter = "set_$key";
      $r->$setter( $rev_elt->{$key} ) ;
   }

   if ( !$r->is_placeholder_rev ) {
      my $work_path =
         $self->_db_store_location(
            'source_files',
            $r->name,
            $r->branch_id || "-",
            $r->rev_id
         );

      $self->{FILES}->{$r->id} = $work_path;

      $self->mkpdir( $work_path ) ;
   }

   $self->{REV} = $r ;
   return ;
}


sub start_content {
   my $self = shift ;

   $self->init_rev_meta ;
   $self->{WORK_NAME} = $self->{FILES}->{$self->{REV}->id};
   $self->{UNDECODED_CONTENT} = "" ;

   debug "writing $self->{WORK_NAME}" if debugging;
   sysopen $self->{WORK_FH}, $self->{WORK_NAME}, O_WRONLY | O_CREAT | O_TRUNC
      or die "$!: $self->{WORK_NAME}" ;
   ## The binmode here is to make sure we don't convert \n to \r\n and
   ## to allow ^Z out the door (^Z is EOF on windows, and they take those
   ## things rather more seriously there than on Unix).
   binmode $self->{WORK_FH};
}


sub content_characters {
   my $self = shift ;
   if ( $self->{STACK}->[-1]->{encoding} eq "base64" ) {
      $self->{UNDECODED_CONTENT} .= shift ;
      if ( $self->{UNDECODED_CONTENT} =~ s{(.*\n)}{} ) {
	 syswrite( $self->{WORK_FH}, decode_base64( $1 ) )
	    or die "$! writing $self->{WORK_NAME}" ;
      }
   }
   elsif ( $self->{STACK}->[-1]->{encoding} eq "none" ) {
# print STDERR map( sprintf( " %02x=$_", ord ), $_[0] =~ m/(.)/gs ), "\n" ;
      syswrite $self->{WORK_FH}, $_[0]
         or die "$! writing $self->{WORK_NAME}" ;
   }
   else {
      die "unknown encoding '$self->{STACK}->[-1]->{encoding}'\n";
   }
   return ;
}

sub end_content {
   my $self = shift ;
   
   if ( length $self->{UNDECODED_CONTENT} ) {
      syswrite( $self->{WORK_FH}, decode_base64( $self->{UNDECODED_CONTENT} ) )
	 or die "$! writing $self->{WORK_NAME}" ;
   }
   close $self->{WORK_FH} or die "$! closing $self->{WORK_NAME}" ;
}

sub start_delta {
   my $self = shift ;

   $self->init_rev_meta ;
   my $r = $self->{REV} ;
   $self->{WORK_NAME} = $self->_db_store_location(
      'source_files',
      $r->name,
      $r->branch_id || "-",
      'delta'
   ) ;
   sysopen $self->{WORK_FH}, $self->{WORK_NAME}, O_WRONLY | O_CREAT | O_TRUNC
      or die "$!: $self->{WORK_NAME}" ;
   ## See comment in start_content :)
   binmode $self->{WORK_FH};
}


## TODO: Could keep deltas in memory if they're small.
*delta_characters = \&content_characters ;
## grumble...name used once warning...grumble
*delta_characters = \&content_characters ;

sub end_delta {
   my $self = shift ;

   close $self->{WORK_FH} or die "$! closing $self->{WORK_NAME}" ;

#print STDERR `hexdump -cx $self->{WORK_NAME}` ;

   my $r = $self->{REV} ;
   my $abs_name = $self->rev_root . "/" . $r->name;
   return if $abs_name !~ $self->{FILESPEC_RE};

   my $bv_r = $self->queued_rev( $r->previous_id ) ;
   $bv_r = $self->queued_rev( $bv_r->previous_id )
       while $bv_r && ! exists $self->{FILES}->{$bv_r->id};

   die "No original content to patch for ", $r->as_string
      unless defined $bv_r;

   my $source_fn = $self->{FILES}->{$bv_r->id};
   my $dest_fn   = $self->{FILES}->{$r->id};

   if ( -s $self->{WORK_NAME} ) {
      vcp_patch( $source_fn, $dest_fn, $self->{WORK_NAME} );
      unless ( $ENV{VCPNODELETE} ) {
         unlink $self->{WORK_NAME}
            or pr "$! unlinking $self->{WORK_NAME}\n" ;
      }
   }
   else {
      ## TODO: Don't assume working link()
      debug "linking '$source_fn', '$dest_fn'"
         if debugging ;

      link $source_fn, $dest_fn
         or die "$!: linking '$source_fn', '$dest_fn'";
   }
}


## Convert ISO8601 UTC time to local time since the epoch
sub end_time {
   my $self = shift ;

   my $timestr = $self->{STACK}->[-1]->{TEXT};
   ## TODO: Get parser context here & give file, line, and column. filename
   ## and rev, while we're scheduling more work for the future.
   confess "Malformed time value $timestr\n"
      unless $timestr =~ /^\d\d\d\d(\D\d\d){5}/ ;
   confess "Non-UTC time value $timestr\n" unless substr $timestr, -1 eq 'Z' ;
   my @f = split( /\D/, $timestr ) ;
   --$f[1] ; # Month of year needs to be 0..11
   $self->{STACK}->[-1]->{TEXT} = timegm( reverse @f ) ;
}

# double assign => avoid used once warning
*end_mod_time = *end_mod_time = \&end_time ;


## TODO: Verify that we should be using a Base64 encoded MD5 digest,
## according to <delta>'s attributes.  Oh, and same goes for <content>'s
## encoding.

## TODO: workaround backfilling if the destination is revml, since
## it can't put the original content in place.  We'll need to flag
## some kind of special pass-through mode for that.

sub end_digest {
   my $self = shift ;

   $self->init_rev_meta unless defined $self->{REV} ;

   my $r = $self->{REV} ;
   my $abs_name = $self->rev_root . "/" . $r->name;
   return if $abs_name !~ $self->{FILESPEC_RE};

   my $original_digest = $self->{STACK}->[-1]->{TEXT};

   my $work_path = $self->{FILES}->{$r->id};

   if ( $r->is_base_rev ) {
      ## Don't bother checking the digest if the destination returns
      ## FALSE, meaning that a backfill is not possible with that destination.
      ## VCP::Dest::revml does this.
      if ( $self->{HEADER} ) {
         $self->dest->handle_header( $self->{HEADER} );
         $self->{HEADER} = undef;
      }

      return unless $self->dest->backfill( $r, $work_path );
   }

   my $d = Digest::MD5->new() ;
   sysopen F, $work_path, O_RDONLY
      or die "$! opening '$work_path' for digestion for ",
         $r->as_string,
         "\n";

   ## See comment for binmode in start_content :)
   binmode F;
   $d->addfile( \*F ) ;
   close F ;
   my $reconstituted_digest = $d->b64digest ;

   ## TODO: provide an option to turn this in to a warning
   ## TODO: make this abort writing anything to the dest, but continue
   ## processing, so as to deliver as many error messages as possible.
   unless ( $original_digest eq $reconstituted_digest ) {
      my $reject_file_name = $r->name ;
      $reject_file_name =~ s{[^A-Za-z0-9 -.]+}{-}g ;
      $reject_file_name =~ s{^-+}{}g ;
      my $reject_file_path = File::Spec->catfile(
         File::Spec->tmpdir,
	 $reject_file_name
      ) ;

      link $work_path, $reject_file_path 
         or die "digest check failed for ", $r->as_string, "\n",
	 "   failed to leave copy in '$reject_file_path': $!\n" ;

      die "digest check failed for ", $r->as_string, "\n",
	 "   copy left in '$reject_file_path'\n",
         "   got      digest: $reconstituted_digest\n",
         "   expected digest: $original_digest\n";
   }
}


## Having this and no sub rev_characters causes the parser to accumulate
## content.
sub end_rev {
   my $self = shift ;

   $self->init_rev_meta(1) unless $self->{REV};

   BUG "rev_root not set" unless defined $self->rev_root;
   my $abs_name = $self->rev_root . "/" . $self->{REV}->name;
   return if $abs_name !~ $self->{FILESPEC_RE};

   $self->queue_rev( $self->{REV} );
   $self->store_cached_revs; ## Flush the queue after each rev
                             ## so we don't alter the order of the
                             ## revml file.  Also reduces memory
                             ## consumption for fat trees.
   
   $self->{REV} = undef ;
}


=head1 AUTHOR

Barrie Slaymaker <barries@slaysys.com>

=head1 COPYRIGHT

Copyright (c) 2000, 2001, 2002 Perforce Software, Inc.
All rights reserved.

See L<VCP::License|VCP::License> (C<vcp help license>) for the terms of use.

=cut

1 ;
# Change User Description Committed
#61 4515 Barrie Slaymaker - VCP::*::revml supports <release_id>
#60 4507 Barrie Slaymaker - RevML:
    - added <action>, removed <delete>, <placeholder> and <move>
    - added <from_id> for clones (and eventually merge actions)
    - Simplified DTD (can't branch DTD based on which action
      any more)
- VCP::Source::cvs, VCP::Filter::changesets and VCP::Dest::p4
  support from_id in <action>clone</action> records
- VCP::Dest::perl_data added
- VCP::Rev::action() "branch" added, no more undefined action
  strings
- "placeholder" action removed
#59 4407 Barrie Slaymaker - VCP::Source::revml preserves the order of revisions it reads from the RevML file
#58 4154 Barrie Slaymaker - dist/vcp.exe passes almost all tests
#57 4021 Barrie Slaymaker - Remove all phashes and all base & fields pragmas
- Work around SWASHGET error
#56 4012 Barrie Slaymaker - Remove dependance on pseudohashes (deprecated Perl feature)
#55 3970 Barrie Slaymaker - VCP::Source handles rev queing, uses disk to reduce RAM
- Lots of other fixes
#54 3930 Barrie Slaymaker - VCP::Source::cvs and VCP::Dest::p4 handle cloning deletes
- "placeholder" actions and is_placeholder_rev() deprecated in
  favor of is_branch_rev() and is_clone_rev().
- Misc cleanups and minor bugfixes
#53 3855 Barrie Slaymaker - vcp scan, filter, transfer basically functional
    - Need more work in re: storage format, etc, but functional
#52 3850 Barrie Slaymaker - No longer stores all revs in memory
#51 3836 Barrie Slaymaker - Sources no longer cache all revs in RAM before sending
#50 3820 Barrie Slaymaker - VCP::Source::revml now uses VCP::Source's queueing methods
    - For maintainability only, does not decrease memory util.
#49 3813 Barrie Slaymaker - VCP::Rev::previous() is no more
#48 3811 Barrie Slaymaker - fetch_*() and get_rev() renamed get_source_file()
#47 3800 Barrie Slaymaker - <branches> removed from all code
#46 3774 Barrie Slaymaker - VCP::Source::revml minor cleanups
#45 3698 Barrie Slaymaker - Passes all VSS, cvs, and revml tests
#44 3690 Barrie Slaymaker - VCP::Source::revml no longer emits scads of undefined value warnings
#43 3677 Barrie Slaymaker - rev_root sanity check is now case insensitive on Win32
- Parens in source filespecs are now treated as regular
  characters, not capture groups
- ** is not treated as '...'
#42 3532 John Fetkovich changed File::Spec->rel2abs( blah, start_dir )
to      start_dir_rel2abs blah
everywhere.

which
   does the same thing
   and is defined in VCP::Utils
#41 3511 John Fetkovich $self setting tweak
#40 3499 John Fetkovich - implement recoverable and non-recoverable exceptions in arc
  handlers.  A user may accept a value that generated a
  recoverable exception.  Otherwise, the question will be
  re-asked.
- changed exceptions text in ui_set_revml_repo_spec.
#39 3493 John Fetkovich refined ui_set_revml_repo_spec
#38 3492 John Fetkovich interative ui question re-asked if exception generated when
arc handlers are run.  a single test case for source revml
input file has been tested.
#37 3490 John Fetkovich doc fix
#36 3489 Barrie Slaymaker - Document options emitted to .vcp files.
#35 3462 Barrie Slaymaker - Make sure bootstrap regexps get compiled
#34 3460 Barrie Slaymaker - Revamp Plugin/Source/Dest hierarchy to allow for
  reguritating options in to .vcp files
#33 3436 Barrie Slaymaker - A source spec of "revml:" now defaults to "revml:-"
#32 3431 Barrie Slaymaker - Source revml file name is relative to start_dir, not cwd
#31 3420 Barrie Slaymaker - Minor debugging improvement
#30 3331 John Fetkovich Small change in source revml state machine.
       split 'sub init' from 'sub new' in Source/revml.pm
       and Dest/revml.pm
#29 3156 Barrie Slaymaker Fix a misplaced shell quoting operation, simplify the code.
#28 3155 Barrie Slaymaker Convert to logging using VCP::Logger to reduce stdout/err spew.
       Simplify & speed up debugging quite a bit.
       Provide more verbose information in logs.
       Print to STDERR progress reports to keep users from wondering
       what's going on.
       Breaks test; halfway through upgrading run3() to an inline
       function for speed and for VCP specific features.
#27 3133 Barrie Slaymaker Make destinations call back to sources to check out files to
       simplify the architecture (is_metadata_only() no longer needed)
       and make it more optimizable (checkouts can be batched).
#26 3120 Barrie Slaymaker Move changeset aggregation in to its own filter.
#25 3112 Barrie Slaymaker Reduce memory footprint when handling large numbers
       of revisions.
#24 2972 Barrie Slaymaker Interim checkin
#23 2938 John Fetkovich added empty() calls
#22 2837 John Fetkovich Use parse_options rather than using Getopt::Long
       directly.
#21 2802 John Fetkovich Added a source_repo_id to each revision, and repo_id to each
Source and Dest.  The repo_ids include repository type
(cvs,p4,revml,vss,...) and the repo_server fields.  Changed the
$self->...->set() and $self->...->get() lines in VCP::Dest::* to
pass in a conglomerated key value, by passing in the key as an
ARRAY ref.  Also various restructuring in VCP::DB.pm,
VCP::DB_file.pm and VCP::DB_file::sdbm.pm related to this
change.
#20 2764 John Fetkovich add --compress switch to dest::revml
       add --uncompress switch to source::revml

       use gzip to compress/uncompress revml files
#19 2640 Barrie Slaymaker VCP::Source::revml now supports revision name wildcard matching.
#18 2453 John Fetkovich removed compilation of revml.
 will be making that a separate executable.
#17 2059 Barrie Slaymaker Support for branching in p4->p4 added
#16 2042 Barrie Slaymaker Basic source::p4 branching support
#15 2026 Barrie Slaymaker VCP::8::cvs now supoprt branching
#14 2017 Barrie Slaymaker Interim checkin of id=/base_version_id for revml: and
       branch_diagram:
#13 2015 Barrie Slaymaker submit changes
#12 2014 Barrie Slaymaker Give helpful error messages if the vcp command can't read/write
       RevML due to a missing required module.
#11 2009 Barrie Slaymaker lots of fixes, improve core support for branches and VCP::Source::cvs
       now supports branches.
#10 1998 Barrie Slaymaker Initial, revml and core VCP support for branches
#9 1367 Barrie Slaymaker lots of docco updates
#8 1358 Barrie Slaymaker Win32 changes
#7 1175 Barrie Slaymaker Implement VCP::Patch, roll 0.26 release.
#6 1022 Barrie Slaymaker Perl "$foo\_..." => "${foo}_..." cleanup by
Peter Prymmer <PPrymmer@factset.com>.
#5 628 Barrie Slaymaker Cleaned up POD in bin/vcp, added BSD-style license.
#4 480 Barrie Slaymaker 0.06 Wed Dec 20 23:19:15 EST 2000
   - bin/vcp: Added --versions, which loads all modules and checks them
     for a $VERSION and print the results out.  This should help with
     diagnosing out-of-sync modules.
   - Added $VERSION vars to a few modules :-).  Forgot to increment any
     $VERSION strings.
   - VCP::Dest::cvs: The directory "deeply" was not being `cvs add`ed on
     paths like "a/deeply/nested/file", assuming "deeply" had no files
     in it.
   - VCP::Dest::revml: fixed a bug that was causing files with a lot of
     linefeeds to be emitted in base64 instead of deltaed.  This means
     most text files.
   - Various minor cleanups of diagnostics and error messages, including
     exposing "Can't locate Foo.pm" when a VCP::Source or VCP::Dest
     module depends on a module that's not installed, as reported by
     Jeff Anton.
#3 478 Barrie Slaymaker 0.05 Mon Dec 18 07:27:53 EST 2000
   - Use `p4 labels //...@label` command as per Rober Cowham's suggestion, with
     the '-s' flag recommended by Christopher Siewald and
     Amaury.FORGEOTDARC@atsm.fr.  Though it's actually something like

       vcp: running /usr/bin/p4 -u safari -c safari -p localhost:5666 -s files
       //.../NtLkly //...@compiler_a3 //.../NtLkly //...@compiler_may3

     and so //on //for 50 parameters to get the speed up.  I use the
     //.../NtLkly "file" as //a separator between the lists of files in various
     //revisions.  Hope nobody has any files named that :-).  What I should do
     is choose a random label that doesn't occur in the labels list, I guess.
   - VCP::Source::revml and VCP::Dest::revml are now binary, control code, and
     "hibit ASCII" (I know, that's an oxymoron) clean.  The <comment>, <delta>,
     and <content> elements now escape anything other than tab, line feed,
     space, or printable chars (32 <= c <= ASCII 126) using a tag like '<char
     code="0x09">'.  The test suite tests all this.  Filenames should also
     be escaped this way, but I didn't get to that.
   - The decision whether to do deltas or encode the content in base64 is now
     based on how many characters would need to be escaped.
   - We now depend on the users' diff program to have a "-a" option to force it
     to diff even if the files look binary to it.  I need to use Diff.pm and
     adapt it for use on binary data.
   - VCP::Dest::cvs now makes sure that no two consecutive revisions of the
     same file have the same mod_time.  VCP::Source::p4 got so fast at pulling
     revisions from the repositories the test suite sets up that CVS was not
     noticing that files had changed.
   - VCP::Plugin now allows you to set a list of acceptable result codes, since
     we now use p4 in ways that make it return non-zero result codes.
   - VCP::Revs now croaks if you try to add two entries of the same VCP::Rev
     (ie matching filename and rev_id).
   - The <type> tag is now limited to "text" or "binary", and is meant to
     pass that level of info between foreign repositories.
   - The <p4_info> on each file now carries the one line p4 description of
     the file so that p4->p4 transferes can pick out the more detailed
     info.  VCP::Source::p4, VCP::Dest::p4 do this.
   - VCP::{Source,Dest}::{p4,cvs} now set binaryness on added files properly,
     I think.  For p4->p4, the native p4 type is preserved.  For CVS sources,
     seeing the keyword substitution flag 'o' or 'b' implies binaryness, for
     p4, seeing a filetype like qr/u?x?binary/ or qr/x?tempobj/ or "resource"
     implies binaryness (to non-p4 destinations).  NOTE: Seeing a 'o' or 'b'
     in a CVS source only ends up setting the 'b' option on the destination.
     That should be ok for most uses, but we can make it smarter for cvs->cvs
     transfers if need be.
#2 468 Barrie Slaymaker - VCP::Dest::p4 now does change number aggregation based on the
  comment field changing or whenever a new revision of a file with
  unsubmitted changes shows up on the input stream.  Since revisions of
  files are normally sorted in time order, this should work in a number
  of cases.  I'm sure we'll need to generalize it, perhaps with a time
  thresholding function.
- t/90cvs.t now tests cvs->p4 replication.
- VCP::Dest::p4 now doesn't try to `p4 submit` when no changes are
  pending.
- VCP::Rev now prevents the same label from being applied twice to
  a revision.  This was occuring because the "r_1"-style label that
  gets added to a target revision by VCP::Dest::p4 could duplicate
  a label "r_1" that happened to already be on a revision.
- Added t/00rev.t, the beginnings of a test suite for VCP::Rev.
- Tweaked bin/gentrevml to comment revisions with their change number
  instead of using a unique comment for every revision for non-p4
  t/test-*-in-0.revml files.  This was necessary to test cvs->p4
  functionality.
#1 467 Barrie Slaymaker Version 0.01, initial checkin in perforce public depot.