#!/usr/bin/perl -w # # PVCS to Perforce converter, phase I: extract PVCS metadata # # Copyright 1997 Perforce Software. All rights reserved. # Written by James Strickland, July 1997 # Modified by Robert Cowham, October 2003 # # # PVCS archives are stored in a non-documented, binary format. However, all # the metadata (data about the revisions in the archive) is available in plain # text using the PVCS vlog command. This script reads a stream output by # vlog and extracts all useful metadata, writing the result to files which # contain no extraneous information and which are easy to parse. # # vlog output consists of a header (specifying the archive and workfile names, # labels and so on) and a block of text for each revision in the archive. # The block of text for each revision is preceded by a line # of dashes, and is of the form # # Rev <revision number> # <possible Locked by: line> # Checked in: <date> # Last modified: <date> # Author id: <author> lines deleted/added/moved: <i>/<j>/<k> # <optional Branches: line> # <description> # # The end of output for an archive is marked by a line of equal signs. # Thus, a description can be terminated by either a row of dashes or # a row of equal signs. # # Note that ordering of lines is not important except that the header must # precede the revision info. Note also that it is possible for there to be # no revisions at all, hence no revision info. # # Of course, if Intersolv changes the output of vlog, # this script may have to change to match it! require 5.0; use strict; use integer; use Time::Local; use lib '.'; use convert; use Change; use File::Path; my $branch_count=0; my %month = ( "Jan" => 0, "Feb" => 1, "Mar" => 2, "Apr" => 3, "May" => 4, "Jun" => 5, "Jul" => 6, "Aug" => 7, "Sep" => 8, "Oct" => 9, "Nov" => 10,"Dec" => 11 ); mkpath($convert::metadata_dir); # ensure the metadata directory exists mkpath("$convert::metadata_dir/labels"); # ensure the metadata directory exists # find all files under the root (the equivalent of an ls -R >filelist) my %archivefile; print "Listing all files under $convert::root. " . scalar(localtime()) . "\n"; unlink("$convert::metadata_dir/filelist"); open(FILELIST,">$convert::metadata_dir/filelist") or die "can't open: $!\n"; if (!$convert::projects) { list_archives($convert::root, "/"); } else { my $proj; foreach $proj (split(/\|/, $convert::projects)) { list_archives($convert::root, $proj); } } close(FILELIST); # Run vlog on all files in the whole tree (vlog given the argument @filelist) # - warnings are redirected to the file vlogerrs # NOTE: YOU CAN'T USE THE -q FLAG TO VLOG BECAUSE IT ELIMINATES ALL WARNINGS! unlink("$convert::metadata_dir/pvcs_properties"); print "Extracting metadata from all PVCS archive files under $convert::root " . scalar(localtime()) . "\n"; runcmd("vlog -xevlogerrs \@$convert::metadata_dir/filelist > $convert::metadata_dir/pvcs_properties"); open(F,"vlogerrs"); if(<F>) { die "vlog produced error output - see the file 'vlogerrs'" if(!$convert::ignore_vlogerrs); my $errmsg = "vlog produced error output - see the file 'vlogerrs' - these files are being ignored"; print $errmsg; convert::log($errmsg); while(<F>) { convert::log($_); } } close(F); my $msg="can't open"; open(INPUT, "<$convert::metadata_dir/pvcs_properties") or die $msg; open(FILES, ">$convert::metadata_dir/files") or die $msg; open(LABELS_SUMMARY, ">$convert::metadata_dir/labels_summary") or die $msg; open(LABELS, ">$convert::metadata_dir/labels_details") or die $msg; open(CHANGES, ">$convert::metadata_dir/changes.ns") or die $msg; # ns for "not sorted" open(BRANCHES,">$convert::metadata_dir/branches") or die $msg; unlink("$convert::metadata_dir/changes"); # get rid of any existing change file to avoid confusion my (%label_processed, %label_warned); while(!eof(INPUT)) { read_archive_metadata(\*INPUT); } sub read_archive_metadata { my $input = shift; my ($archive,$workfile,$expand_keywords,$generate_delta,%branch_label); my $separator = "^-{30,}"; # line of (sufficient number of) dashes my $terminator = "^={30,}"; # line of (sufficient number of) equal signs # read the header HEADER_LOOP: while(<$input>) { HEADER_SWITCH: { if (/$separator/o) { last HEADER_LOOP; } if(/$terminator/o) { # ignore it if it has no revisions print "empty archive $archive ignored\n"; return; } if(/^Archive:\s*([^\s].*)/) { $archive=convert::forward_slash($1); # use slash as pathname separator, not backslash $workfile=$archivefile{lc($archive)}; $workfile = lc($workfile) if($convert::lowercase_filenames); } elsif(/^Attributes:/) { # attributes appear one per line, each one prefixed by 3 spaces while(<$input>) { redo HEADER_SWITCH if(!/^ /); if(/EXPANDKEYWORDS/) { $expand_keywords = !/NO/; }; if(/GENERATEDELTA/) { $generate_delta = !/NO/; }; } } elsif(/^Version labels:/) { # labels appear one per line, each one prefixed by 3 spaces while(<$input>) { redo HEADER_SWITCH if(!/^ /); if(/^ "([^"]*)" = ([0-9\.\*]*)/) { defined($archive) or die "Huh? Version labels before Archive:\n"; my ($no_space_label,$rev) = ($1,$2); my $raw_label = $no_space_label; if( $no_space_label =~ s%[\000-\031 @#,/\\]%_%g ) { if (!$label_warned{$no_space_label}) { $label_warned{$no_space_label} = 1; print "Label \"$raw_label\" was mapped to \"$no_space_label\"\n"; } } if( $no_space_label =~ s%[\*]%\!%g ) { if (!$label_warned{$no_space_label}) { $label_warned{$no_space_label} = 1; print "Label \"$raw_label\" was mapped to \"$no_space_label\"\n"; } } if( $no_space_label =~ /^\d+$/ ) { $no_space_label = "_$no_space_label"; if (!$label_warned{$no_space_label}) { $label_warned{$no_space_label} = 1; print "Label \"$raw_label\" was mapped to \"$no_space_label\"\n"; } } if($rev =~ /\*$/) { # "floating" label (branch label) $rev =~ s/\.\*$//; # strip off the ".*" $branch_label{$rev}=$no_space_label; $branch_label{$rev} = lc($branch_label{$rev}) if($convert::lowercase_branchnames); } else { if (!$label_processed{$no_space_label}) { $label_processed{$no_space_label} = 1; print LABELS_SUMMARY "$no_space_label\n"; unlink("$convert::metadata_dir/labels/$no_space_label"); # zap contents } my $msg="can't open"; print LABELS "$no_space_label#$archive#$rev\n"; } } } } } # HEADER_SWITCH } # HEADER_LOOP # the header has been read return if(!defined($archive) || !defined($workfile)); # determine file type based on either filename extension or the attributes # in the PVCS archive. my $file_type; for (keys(%convert::filetype_regex)) { if($workfile =~ /$convert::filetype_regex{$_}/i) { $file_type = $_; last; } } if(!defined($file_type)) { # no filename extension match if($generate_delta) { $file_type = ( $expand_keywords ? "ktext" : "text" ); } else { if($expand_keywords) { print "EXPANDKEYWORDS and NOGENERATEDELTA in $workfile; "; print " taken to be binary\n"; } $file_type = "binary"; } } print FILES "$archive#$workfile#$file_type\n"; # read the revision info my $finished=0; while(!$finished) { my ($indentation,$revision,$timestamp,$author,$change_description); # ignore lines until Rev line found # (there shouldn't be any lines before the Rev line - this is just # defensive programming) while(<$input>) { if(/^(\s*)Rev\s+([0-9\.]*)/) { $indentation=$1; $revision=$2; last; } } # look for Checked in timestamp while(<$input>) { if(/^${indentation}Checked in:/) { if(/^${indentation}Checked in:\s*(\d+) ([a-zA-Z]{3}) (\d{4}) (\d+):(\d\d):(\d\d)/) { # timelocal takes second, minute, hour, day, month, year # in the range 0..59, 0..59, 0..23, 1..31, 0..11, 0..99 # The two digit year has assumptions made about it such that # any time before 2037 (when the 32-bit seconds-since-1970 time # will run out) is handled correctly. i.e. 97 -> 1997, 1 -> 2001 # PVCS, thank Goodness, gives us 4 digit years. $timestamp=timelocal($6,$5,$4,$1,$month{$2},($3)%100); } elsif(/^${indentation}Checked in:\s*([a-zA-Z]{3}) (\d+) (\d{4}) (\d+):(\d\d):(\d\d)/) { $timestamp=timelocal($6,$5,$4,$2,$month{$1},($3)%100); } elsif(/^${indentation}Checked in:\s*([a-zA-Z]{3}) (\d+) (\d+):(\d\d):(\d\d) (\d{4})/) { $timestamp=timelocal($5,$4,$3,$2,$month{$1},($6)%100); } else { print "unrecognized date format - must be\n"; print "dd mmm yyyy hh:mm:ss\nOR\nmmm dd yyyy hh:mm:ss\n"; print "OR\nmmm dd hh:mm:ss yyyy\n"; print "where mmm is a three letter English month abbreviation.\n"; print "This is likely because you've specified a different date format in your\n"; print "PVCS configuration file. Remove that specification and try again.\n"; die; } last; } } # look for Author id line while(<$input>) { if(/^${indentation}Author id:\s*([^\s]*)/) { $author=$1; $author = lc($author) if($convert::lowercase_usernames); if( $author =~ s/[ \000-\031]/_/g ) { print "spaces or unprintable characters in author id "; print "\"$author\" were mapped to _\n"; } last; } } # ok, now we rely on order - after the Author id line there is an optional # Branches line, and then the rest is the change description $change_description = ""; while(<$input>) { $finished = /$terminator/o; last if( $finished || /$separator/o); if(/^${indentation}Branches:\s+/) { next if($convert::ignore_branches); chomp; my @branch_list = split(/\s+/,$'); print BRANCHES "$archive#$revision"; # print out branch point my $rev; foreach $rev (@branch_list) { if(exists($branch_label{$rev})) { print BRANCHES "#$branch_label{$rev}"; } else { print BRANCHES "#$convert::branch_prefix" . ++$branch_count; } print BRANCHES "#$rev"; } print BRANCHES "\n"; next; # not part of the change description } if(/^${indentation}(.*)/) { $change_description .= substr($_,length($indentation)); } } last if(!defined($revision) || !defined($timestamp) || !defined($author)); my $change = new Change( { 'timestamp' => $timestamp, 'author' => $author, 'change_description' => $change_description, 'changelist' => [ join('#',$archive, $revision) ] } ); $change->put(\*CHANGES); } } sub list_archives { my $rt=shift; my $proj=shift; my $archive; my $workfile; unlink("$convert::metadata_dir/archive_list"); runcmd("pcli list -pr\"$rt\" -id$convert::pvcs_user:$convert::pvcs_passwd -z -l -aArchivePath \"$proj\" > $convert::metadata_dir/archive_list"); open(ARCHIVE_LIST,"<$convert::metadata_dir/archive_list") or die "can't open: $!\n"; while(<ARCHIVE_LIST>) { if(/^(\/.*)$/) { $workfile=$1; my $raw_workfile = $workfile; if( $workfile =~ s%[\000-\031@#\\]%_%g ) { print "Workfile \"$raw_workfile\" was mapped to \"$workfile\"\n"; } $workfile = lc($workfile) if($convert::lowercase_filenames); } elsif(/^ArchivePath=(.*)$/) { $archive=$1; if ($archive !~ /null/) { if (exclude_file($workfile)) { my $msg = "Ignoring file: $workfile\n"; print $msg; convert::log($msg); } else { if ($archive =~ / /) { # Deal with spaces in filenames my $temp = $archive; $temp =~ s@\\\\@\\\\\\\\@; # Double up leading \\ as it is read differently when quoted it seems print FILELIST "\"$temp\"\n"; } else { print FILELIST "$archive\n"; } } $archivefile{lc(convert::forward_slash($archive))} = $workfile; } } } close(ARCHIVE_LIST); } sub runcmd { my $cmd = shift; convert::log($cmd); my $result = system($cmd); if ($result) { die "***** Failed to run command: $cmd -> $result\n"; } } sub exclude_file # True if file should be excluded { my $file = shift; if (!$convert::exclude) { return 0; } # note use of Quote Meta encapsulation and file matching at start of string if ($convert::exclude =~ /\|/) { my $pat; foreach $pat (split(/\|/, $convert::exclude)) { if ($file =~ /^\Q$pat\E/) { return 1; } } } elsif ($file =~ /^\Q$convert::exclude\E/) { return 1; } return 0; }
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#4 | 7349 | Robert Cowham |
- Tweak logging - Add option to ignore vlogerrs in case of corrupt PVCS |
||
#3 | 7295 | Robert Cowham | Handle leading \\ when filenames are quoted due to space - pvcs oddity. | ||
#2 | 7115 | Robert Cowham |
Incorporate Sven's changes: Uses the officially support version P4Perl 2008.2 mkdepot.pl now only uses one connection to the Perforce server in tagged mode. Branching enabled. This was never properly implemented and partly disabled. Branching should work now as expected. |
||
#1 | 4664 | Robert Cowham | Branch into permanent location. | ||
//guest/robert_cowham/perforce/utils/pvcstop4/main/readvlog.pl | |||||
#1 | 4647 | Robert Cowham |
Rename //guest/robert_cowham/perforce/utils/pvcstop4/... To //guest/robert_cowham/perforce/utils/pvcstop4/main/... |
||
//guest/robert_cowham/perforce/utils/pvcstop4/readvlog.pl | |||||
#7 | 3791 | Robert Cowham | Added exclude option (but not fully tested!) | ||
#6 | 3789 | Robert Cowham | Fixed problem with dir separators. | ||
#5 | 3728 | Robert Cowham | Merged in old changes. | ||
#4 | 3727 | Robert Cowham | Hayden's changes | ||
#3 | 3721 | Robert Cowham |
Various changes (tested at a client): - Use PCLI to read file location info (e.g. if moved) - Use P4Perl for speed - Removed DB_File as it has a bug - Do labels differently for vastly improved speed |
||
#2 | 3638 | Robert Cowham |
Renamed some files with windows extensions. Imported changes from Vsstop4 scripts: - Use P4Perl if installed - Use DB_File if installed - Rework labelling algorithm for speed Net result should be much improved performance. Next step is to use pcli. |
||
#1 | 2289 | Robert Cowham | Initial version from Perforce web page |