#!/usr/local/bin/perl -w =head2 NAME gentrevml - Generate a .revml file used by the t/ scripts =head2 SYNOPSIS perl bin/gentrevml --(cvs|p4|revml|svn|vss) [--bootstrap] [--no-a-big-file] [--batch=1] =head2 DESCRIPTION The test suite uses a basic RevML file to check to see vcp it can copy in to and out of a repository correctly. This is done for each repository class. Note that going through a repository may lose some information, so the test suite can't always compare the input RevML to the output RevML. Only the revml->revml case is known to be idempotent. I chose to do this over using some base repository because not every user is going to happen to have that repository, and (2) not every repository will pass through all information correctly. =head2 COPYRIGHT Copyright 2000, 2003 Perforce Software, Inc. All Rights Reserved. This will be licensed under a suitable license at a future date. Until then, you may only use this for evaluation purposes. Besides which, it's in an early alpha state, so you shouldn't depend on it anyway. =head2 AUTHOR Barrie Slaymaker =cut # NOTE: The keys this program uses are slightly different than # VCP::Rev's uids and ->as_string output: the branch_id is always # surrounded by "()" here, even if it is "". There, the "()" is omitted # for the sake of readability if the branch_id is undef or has no length use Text::Diff ; use File::Basename ; use VCP::DiffFormat ; use Getopt::Long ; use MIME::Base64 ; use strict ; my $which ; my $debug ; sub which { die "Only one mode allowed\n" if $which ; $which = shift ; } my $batch ; my $bootstrap ; my $no_big_files ; BEGIN { ## Need to know how to name the output file before we can ## "use RevML::Writer". $batch = 0 ; Getopt::Long::Configure( qw( no_auto_abbrev no_getopt_compat ) ) ; unless ( GetOptions( "cvs" => \&which, "p4" => \&which, "revml" => \&which, "svn" => \&which, "vss" => \&which, "b|bootstrap" => \$bootstrap, "batch=i" => \$batch, "d|debug" => \$debug, "no-big-files" => \$no_big_files, ) && $which ) { require Pod::Usage ; Pod::Usage::pod2usage( exitval => 2, verbose => 3 ) ; } } $bootstrap = 0 unless $batch; ## Just in case if ( $debug ) { print STDERR "for $which\n" ; print STDERR "bootstrap mode ", $bootstrap ? "on" : "off", "\n" ; print STDERR "batch $batch\n" ; print STDERR "\n" ; } ## Note the overlapping range here. Batch 1 (0 or 1) needs to have a digest ## of the rev _before_ the start of the batch unless it's in bootstrap mode. my @change_nums = ( ( ! $batch ) ? (1..3) : $bootstrap ? (4..6) : (3..6) ) ; ## ## BUILD THE MANIFEST ## my $branch_mode = ( 0 <= index "|p4|svn|vss|", "|$which|" ) ? "dir" : "rev"; my @file_names ; ## Put @files in alpha order. p4 likes to output in alpha order, and this ## makes comparing p4->revml output to revml->p4 input easier. for my $main ( qw( a/deeply/buried/file add binary branched branched-no-change del readd ), ! $no_big_files ? "a_big_file" : (), # "spacey file name" ) { if ( $main eq "add" || $main eq "del" ) { for my $file ( qw( f1 f2 f3 f4 ) ) { my $fn = "$main/$file" ; next if $fn eq "del/f1" ; # Can't delete in change 1 push @file_names, $fn ; } } else { push @file_names, $main ; } } my $prefix = $branch_mode eq "dir" ? "main/" : ""; @file_names = map "$prefix$_", @file_names; # rev_root is so far ignored in the test suite since most # commands specify a destination directory. This value is chosen # to make that obvious. my $rev_root = "/ignored"; sub comment_for_change { my ( $change_num ) = @_; my $comment = "comment '\"$change_num\n"; if ( $change_num != 2 ) { for ( $change_num % 5 ) { $comment = "- $comment" if $_ == 1; $comment = "-- $comment" if $_ == 2; $comment = "-$comment" if $_ == 3; $comment = "--$comment" if $_ == 4; } } return $comment; } ## ## BUILD REVISIONS IN MEMORY ## my @revs; my @changes; my %versions; { # We use $counter below to drive the insertion of a range of characters # and labels. This is based on the file name, the branch_id, and # the change number in order to yield consistent numbering between # the various .revml files. the file name portion is a number representing # the position of the file in @file_names. This hash contains that # mapping. my %file_numbers = do { my $i = 0; map { ( $_ => $i++ ) } @file_names; }; my $user_id = "test_user" ; # $user_id .= "@p4_t_client" if $which eq "p4" ; my %rev_num ; ## We never get around to changes 7..9 my %deleted_change_num = ( ## Delete the "del/f" in change ## except you can't delete anything in change 1, eh? map( ( "${prefix}del/f$_" => $_ ), (2..9) ), ) ; if ( $which eq "vss" ) { $deleted_change_num{"${prefix}readd"} = 2; } my %created_change_num = ( ## Add the "add/f" in change map( ( "${prefix}add/f$_" => $_ ), (1..9) ), ) ; my %head_revs; sub concoct_file_rev { ## Create a revision for a branch of a file my ( $rev_spec ) = @_; my $abs_name = $rev_spec->{abs_name}; my $name = $rev_spec->{name}; my $branch_id = $rev_spec->{branch_id}; my $change_num = $rev_spec->{change_num}; my $comment = $rev_spec->{comment}; $comment = comment_for_change $change_num unless defined $comment; ## Key for %rev_num and %head_revs my $previous = $head_revs{"$rev_spec->{base_abs_name}($rev_spec->{base_branch_id})"}; my $rev_num = $rev_spec->{is_branch} ? $which eq "vss" ? do { ## VSS branch creation (where we use placeholders) is a ## numbered operation. The child is branched at the ## parent's rev number plus one. my $prev_id = $previous->{rev_id}; $prev_id =~ s/^[0-9]+\.//; $prev_id + 1; } : ( 0 <= index "|p4|svn|", "|$which|" ) ? 1 : 0 : ( $rev_num{"$name($branch_id)"} || 0 ) + 1; $rev_num{"$name($branch_id)"} = $rev_num; my $previous_id = defined $previous ? $previous->{id} : undef; print STDERR " $name#$rev_num:" if $debug ; # This is arranged so that the counter value for a branch is # before the counter value for the unbranched file in the # same change number. This is so that the branched files # get committed after the previous change's files, but before # the next change's unbranched files. my $counter = sprintf "%03d", ( $change_num * keys( %file_numbers ) + $rev_spec->{file_number} ) * 2 + ( $rev_spec->{is_branched} ? 0 : 1 ); die "counter too big" if $counter > 254 ; my $char_num = $counter + 1; ## CVS on Win32 gets bummed out about ^Z in text files. $char_num++ if $^O =~ /Win32/ && $char_num == 26; my $is_binary = 0 <= index $name, "binary"; my $content = $is_binary ? chr( $change_num & 0x07 ) x 100 : sprintf( qq{%s@%d(%s) \\0x%02x="%s"}, do { ## We don't want content differing based on ## branch_mode because the test suite compares ## output from one repository to the t/test-*.revml files ## this script builds for another repository. ( my $n = $name ) =~ s{^main[^/]*/}{}g; $n; }, $change_num, $rev_spec->{branch_label}, $char_num, chr( $char_num ), ) ; $content = "$content\n" x 200 if 0 <= index $name, "a_big_file" ; $content .= "\n" unless $is_binary; $content = $previous->{content} if $rev_spec->{no_change}; my $bin_char_count = $content =~ tr/\x00-\x08\x0b-\x1f\x7f-\xff// ; my $total_char_count = length $content; my $legal_xml_char_count = $total_char_count - $bin_char_count; ## use the same heuristics VCP::Dest::revml uses, a testing nono, ## but there's not necessarily a better way and this is ok for ## regression tests. my $encoding = ( $legal_xml_char_count + $bin_char_count * 20 ) <= $total_char_count * 77/57 ? "none" : "base64"; my $secs = $counter; my $mins = int( $secs / 60 ); $secs -= $mins * 60; my $r = { _change_num => $change_num, name => $name, type => $is_binary ? "binary" : "text", encoding => $encoding, user_id => $user_id, branch_id => $branch_id, previous_id => $previous_id, content => $content, time => sprintf( "2000-01-02 12:%02d:%02dZ", $mins, $secs ), ## In p4, all files in a change number have an identical comment. ## We impose this on the other solutions to test foo->p4 change ## number aggregation. comment => $comment, } ; ## p4 doesn't handle modtime until very recently, and then it ## doesn't expose it by default. $r->{mod_time} = sprintf "2000-01-01 12:%02d:%02dZ", $mins, $secs unless 0 <= index "|p4|svn|", $which; if ( $rev_spec->{is_branch} ) { $r->{action} = "branch"; if ( $which eq "cvs" ) { $r->{time} = undef; delete $r->{user_id}; ## CVS branch operations don't record the user # $r->{user_id} = "unknown_CVS_user"; } } elsif ( $rev_num eq 1 ) { $r->{action} = "add"; } elsif ( 0 <= index $name, "readd" ) { if ( $which eq "vss" ) { $r->{action} = $change_num == $deleted_change_num{$name} ? "delete" : "edit"; $rev_num{"$name($branch_id)"} = 0 if $r->{action} eq "delete"; } else { if ( $change_num % 2 ) { ## Add it on the odd numbers $r->{action} = "add" ; } else { $r->{action} = "delete" ; } } } elsif ( defined $deleted_change_num{$name} && $change_num == $deleted_change_num{$name} ) { $r->{action} = "delete" ; } else { $r->{action} = "edit" ; } if ( $which eq "p4" ) { $r->{p4_info} = "Some info $which might emit about this file" ; $r->{rev_id} = $rev_num ; ## In p4, you may have skipped some change numbers $r->{change_id} = ( $r->{rev_id} - 1 ) * 2 + 1 ; ## TODO: Delete this next line when we get VCP::Dest::p4 to sync ## change numbers $r->{change_id} = $change_num ; } elsif ( $which eq "svn" ) { $r->{svn_info} = "Some info $which might emit about this file" ; ## Add one to the revision number to allow for directory ## creation. $r->{rev_id} = $change_num + 1; $r->{change_id} = $change_num + 1; } elsif ( $which eq "cvs" ) { $r->{cvs_info} = "Some info $which might emit about this file" ; $r->{rev_id} = "1.$rev_num" ; $r->{rev_id} = "1.1.2.$rev_num" if $rev_spec->{is_branched}; $r->{action} = "edit" if $r->{action} eq "add"; } elsif ( $which eq "revml" ) { $r->{cvs_info} ="Some info about this file" ; $r->{rev_id} = $rev_spec->{is_branched} ? "1.$rev_num" : $rev_num ; $r->{change_id} = $change_num ; } elsif ( $which eq "vss" ) { ## Source::vss has to fake rev numbered deletions with a ## ".1" revision. my $is_delete = $r->{action} eq "delete"; my $rev_id = $rev_num; if ( exists $deleted_change_num{$name} && $change_num > $deleted_change_num{$name} ) { $rev_id = "1.$rev_id"; } else { $rev_id = "0.$rev_id"; } $r->{user_id} = undef if $is_delete; $r->{rev_id} = $rev_id; ## VSS does not allow comments on branch points $r->{comment} = undef if $r->{action} eq "branch"; $r->{action} = "edit" if $r->{action} eq "add"; } else { die "$which unhandled" ; } my $id_char = $which eq "svn" ? "\@" : "#"; $r->{id} = "$abs_name$id_char$r->{rev_id}"; if ( $which eq "cvs" ) { ( my $branch_number = $r->{rev_id} ) =~ s/\.\d+\z//; $branch_number = "" unless $branch_number =~ tr/.//; ## Using branch number here instead of the $branch_id (which ## may be a CVS branch tag so that altering the CVS source by ## moving or changing or removing the branch's branch tag does ## not result in different filebranch_ids. $r->{source_filebranch_id} = "$abs_name<$branch_number>"; } else { $r->{source_filebranch_id} = $abs_name; } $r->{source_repo_id} = "$which:test_repository"; ## $counter normally jumps by two, it only gets incremented ## by a 1 when there's a branch involved. We don't want to ## label *every* revision, so we label every other revision ## (on average). We don't label revisions that correspond ## to "delete" actions, most, if not all, SCMs can't label ## those. unless ( $r->{action} eq "delete" || $counter & 2 || $rev_spec->{no_labels} ) { $r->{labels} = [ "achoo$counter", "blessyou$counter", ] ; ## Label operations cause the version number to be ## incremented in VSS. $rev_num{"$name($branch_id)"} += 2 if $which eq "vss"; } $counter = sprintf "%02d", $counter + 1 ; push @{$changes[$change_num]}, $r ; if ( $debug ) { print STDERR " #$r->{rev_id}" ; print STDERR " \@$r->{change_id}" if defined $r->{change_id} ; print STDERR " ($r->{action})" ; print STDERR " base: $r->{previous_id}" if defined $r->{previous_id}; print STDERR "\n" ; } $name =~ s/main-branch-[1-9]/main/ if $branch_mode eq "dir" && 0 < index $name, "branched"; $versions{$r->{id}} = $r; $head_revs{"$abs_name($rev_spec->{branch_id})" } = $r; return $r; } for my $change_num ( 1..6 ) { print STDERR "concocting \@$change_num:\n" if $debug ; ## We do the file names in sorted order because going in and out of ## some repositories like CVS folds all timestamps in a change to ## all be the same time (the cvs commit sets the timestamp), and ## we want the revml that comes out to be in the same order ## as the revml that went in. for my $master_name ( sort @file_names ) { next if ( ( defined $created_change_num{$master_name} && $change_num < $created_change_num{$master_name} ) || ( defined $deleted_change_num{$master_name} && $change_num > $deleted_change_num{$master_name} && ! ( $which eq "vss" && 0 <= index $master_name, "readd" ) ) ); my $main_branch_id = $branch_mode eq "dir" ? (fileparse "$rev_root/$master_name")[1] : ""; my $main_abs_name = "$rev_root/$master_name"; my %main_branch_rev = ( ## The file's revision for this change on the main branch branch_id => $main_branch_id, branch_label => "", abs_name => $main_abs_name, name => $master_name, change_num => $change_num, file_number => $file_numbers{$master_name}, ## All main branch revs are based on their predecessors base_abs_name => $main_abs_name, base_branch_id => $main_branch_id, ); my @rev_specs; ## The branched file branches in change 2 ## Do branched revs first because we want to make sure that in ## change 2, say, the branched rev is based on change 1's version ## not on change 2's non-branched version. This is important ## because the emission loop assumes that the last mainbranch ## version is the base for the first branched version. if ( ( 0 <= index $master_name, "branched" ) && $change_num >= 2 ) { my $name = $master_name; $name =~ s/main/main-branch-1/ if $branch_mode eq "dir"; my $branch_abs_name = "$rev_root/$name"; my $branch_id = $branch_mode eq "dir" ? (fileparse "$rev_root/$name")[1] : "main-branch-1"; my %branched_fields = ( %main_branch_rev, ## copy most fields for the placeholder and ## any branch revisions abs_name => $branch_abs_name, name => $name, branch_id => $branch_id, branch_label => "main-branch-1", is_branched => 1, ); if ( $change_num == 2 ) { ## The placeholder rev is based on the parent branch, ## All later branched revs are based on their predecessors push @rev_specs, { ## A placeholder revision %branched_fields, is_branch => 1, no_change => 1, no_labels => 1, ## We turn on no_labels because CVS can't ## store labels on a branched file that has ## not been changed because there is no rev ## to label. I think (TODO: double check). base_abs_name => $main_abs_name, base_branch_id => $main_branch_id, $which eq "cvs" ? ( comment => "[vcp] create branch '$branch_id'", ) : (), }; } elsif ( ! ( 0 <= index $master_name, "no-change" ) ) { push @rev_specs, { %branched_fields, base_abs_name => $branch_abs_name, base_branch_id => $branch_id, }; } } push @revs, map concoct_file_rev( $_ ), @rev_specs, \%main_branch_rev; } print STDERR "\n" if $debug ; } } ## Emit the document use Digest::MD5 qw( md5_base64 ) ; use File::Basename ; use RevML::Doctype 'DEFAULT' ; use RevML::Writer qw( :all :dtd_tags ) ; sub _emit_characters { my ( $buf ) = @_ ; setDataMode( 0 ) ; ## note that we don't let XML munge \r to be \n! while ( $buf =~ m{\G(?: ( [ \x00-\x08\x0b-\x1f\x7f-\xff]) | ([^\x00-\x08\x0b-\x1f\x7f-\xff]*) )}gx ) { if ( defined $1 ) { char( "", code => sprintf( "0x%02x", ord $1 ) ) ; } else { characters( $2 ) ; } } } my $prog = basename $0 ; my $f0 = "$prog.0" ; my $f1 = "$prog.1" ; binmode STDOUT ; setDataMode 1 ; xmlDecl ; time '2000-01-01 00:00:00Z' ; rep_type $which ; rep_desc 'random text, for now' ; if ( $which eq "revml" ) { ## These are ignored by other interfaces, so don't test with them. comment "A comment" ; } rev_root $rev_root; # The name "main-branch-1" is used to make sure that these revs # occur after the main/ branch revs. This is because we sort the # revml file by name and (currently) VCP::Source::revml assumes that # the previous rev is always there before the current rev. my %revs_by_name; ## For svn, convert labels in to "/tags/..." branches if ( $which eq "svn" ) { my @label_revs; for my $r ( @revs ) { for ( @{$r->{labels}} ) { my $name = "tags/$_/$r->{name}"; my $rev_id = $r->{rev_id} + 1; my $id = "/ignored/$name\@$rev_id"; my $branch_id = "/ignored/tags/$_$r->{branch_id}"; my $source_filebranch_id = "/ignored/tags/$_$r->{source_filebranch_id}"; $id =~ s{(.)/ignored/}{$1/}; $branch_id =~ s{(.)/ignored/}{$1/}; $source_filebranch_id =~ s{(.)/ignored/}{$1/}; my $change_num = $r->{_change_num} + 1; my $lr = { _change_num => $change_num, id => $id, previous_id => $r->{id}, action => "branch", name => $name, user_id => $r->{user_id}, time => $r->{time}, source_filebranch_id => $source_filebranch_id, branch_id => $branch_id, source_repo_id => $r->{source_repo_id}, rev_id => $rev_id, change_id => $r->{change_id} + 1, comment => comment_for_change( $change_num ), }; push @label_revs, $lr; } @{$r->{labels}} = (); } push @revs, @label_revs; } ## Build @files from @changes. An older version built revml in change number ## order, but we now built in filename, change number order to make sorting ## of the output of vcp tests in to a predictable order possible. This is ## because cvs->revml does not result in predictable revml order due to ## cvs oddities, so all the tests generate revml in name,rev order. for my $r ( sort { $a->{name} cmp $b->{name} || do { my @a = split /\D+/, $a->{rev_id}; my @b = split /\D+/, $b->{rev_id}; my $result; while ( @a && @b ) { $result = shift( @a ) <=> shift( @b ) ; last if $result ; } $result || @a <=> @b; } } grep { my $r = $_; grep( $_ eq $r->{_change_num}, @change_nums ) } @revs ) { push @{$revs_by_name{$r->{name}}}, $r; } my @sorted_rev_names = sort { my @a = split "/", $a ; my @b = split "/", $b ; while ( @a && @b ) { my $result = shift( @a ) cmp shift( @b ) ; return $result if $result ; } return @a <=> @b ; } keys %revs_by_name ; for my $rev_name ( @sorted_rev_names ) { print STDERR "emitting $rev_name:\n" if $debug ; for my $r ( @{$revs_by_name{$rev_name}} ) { my $change_num = $r->{_change_num} ; my $is_first = $change_num eq $change_nums[0] ; my $digest_mode = $is_first && $batch && ! $bootstrap ; my $is_bootstrap = $is_first && ( ! $batch || $bootstrap ); next if ( $is_first && ( ! $batch || $digest_mode ) && $r->{action} eq "delete" ) ; print STDERR " $r->{name}#$r->{rev_id}:" if $debug ; my $pr; if ( $r->{previous_id} ) { $pr = $versions{$r->{previous_id}}; $pr = $versions{$pr->{previous_id}} if $pr->{action} eq "branch" && $which ne "vss"; die "No previous rev found for $r->{name}($r->{branch_id})#$r->{rev_id}", ": key '$r->{previous_id}', does not exist in ", join " ", map "'$_'", sort keys %versions unless $pr; } start_rev id => $r->{id}; name $r->{name}; source_name $r->{name}; source_filebranch_id $r->{source_filebranch_id}; source_repo_id $r->{source_repo_id}; action $digest_mode ? "digest" : $r->{action}; ## There are a number of data fields that are simply not ## tracked by VSS. if ( $r->{action} ne "branch" ) { type $r->{type} unless $which eq "vss" && $r->{action} eq "delete"; if ( ! $digest_mode ) { p4_info $r->{p4_info} if defined $r->{p4_info} ; cvs_info $r->{cvs_info} if defined $r->{cvs_info} ; } } if( length $r->{branch_id} ) { branch_id $r->{branch_id}; source_branch_id $r->{branch_id}; } rev_id $r->{rev_id}; source_rev_id $r->{rev_id}; if( defined $r->{change_id} && $which ne "vss" && $which ne "cvs" ) { ## change_id is used in the internal logic of this program ## but is not actually created by ## VCP::Source::cvs or VCP::Source::vss change_id $r->{change_id}; source_change_id $r->{change_id}; } my $digestion = 1 ; if ( $r->{action} eq "branch" ) { time $r->{time} if defined $r->{time} && ! ( $which eq "vss" && $r->{action} eq "delete" ); user_id $r->{user_id} if defined $r->{user_id}; comment $r->{comment} if defined $r->{comment} && ! ( $which eq "vss" && $r->{action} eq "delete" ); previous_id $r->{previous_id} if defined $r->{previous_id}; $digestion = 0; } elsif ( $digest_mode ) { print STDERR " digest" if $debug ; } else { time $r->{time} unless $which eq "vss" && $r->{action} eq "delete"; mod_time $r->{mod_time} if defined $r->{mod_time} && ! ( $which eq "vss" && $r->{action} eq "delete" ); user_id $r->{user_id} if defined $r->{user_id}; if ( $r->{labels} ) { label $_ for @{$r->{labels}} ; } comment $r->{comment} if defined $r->{comment} && ! ( $which eq "vss" && $r->{action} eq "delete" ); previous_id $r->{previous_id} if defined $r->{previous_id};# && ! $is_bootstrap; if ( $which eq "revml" ) { ## Stuff some text in to these tags to see if it makes ## it through reading, altering, writing unscathed. ## these should be "real" ids, but we're just ## testing passthrough for now. ## TODO: use real ids and test these in more conversions earlier_id "fake earlier id#1"; earlier_id "fake earlier id#2"; } if ( $r->{action} eq "delete" ) { print STDERR " delete" if $debug ; $digestion = 0 ; } else { if ( ! $pr || $pr->{action} eq "delete" || $is_bootstrap || $r->{encoding} ne "none" ) { print STDERR " content" if $debug ; start_content encoding => $r->{encoding} ; if ( $r->{encoding} eq "none" ) { _emit_characters $r->{content} ; } else { setDataMode( 0 ) ; characters encode_base64 $r->{content} ; } end_content ; setDataMode( 1 ) ; } else { die "No previous rev for $r->{name}#$r->{rev_id}" unless defined $pr; die "delta in change $change_num!" unless $change_num > 1; my $bv_r = $pr; $bv_r = $versions{$bv_r->{previous_id}} while $bv_r->{action} eq "delete" || $bv_r->{action} eq "branch"; print STDERR " delta" if $debug ; start_delta type => "diff-u", encoding => "none" ; _emit_characters( diff \$bv_r->{content}, \$r->{content}, { STYLE => "VCP::DiffFormat", } ); end_delta ; setDataMode( 1 ) ; } } } digest md5_base64( $r->{content} ), type => "MD5", encoding => "base64" if $digestion ; END_REV: if ( $debug ) { print STDERR " #$r->{rev_id}" ; print STDERR " \@$r->{change_id}" if defined $r->{change_id} ; print STDERR " ($r->{action})\n" ; } } print STDERR "\n" if $debug ; } END { if ( -f $f0 ) { unlink $f0 or warn "$!: $f0" ; } if ( -f $f1 ) { unlink $f1 or warn "$!: $f1" ; } } endAllTags ;