#!/usr/bin/perl -w # # PVCS to Perforce converter, phase I: extract PVCS metadata # # Copyright 1997 Perforce Software. All rights reserved. # Written by James Strickland, July 1997 # Modified by Robert Cowham, October 2003 # # # PVCS archives are stored in a non-documented, binary format. However, all # the metadata (data about the revisions in the archive) is available in plain # text using the PVCS vlog command. This script reads a stream output by # vlog and extracts all useful metadata, writing the result to files which # contain no extraneous information and which are easy to parse. # # vlog output consists of a header (specifying the archive and workfile names, # labels and so on) and a block of text for each revision in the archive. # The block of text for each revision is preceded by a line # of dashes, and is of the form # # Rev # # Checked in: # Last modified: # Author id: lines deleted/added/moved: // # # # # The end of output for an archive is marked by a line of equal signs. # Thus, a description can be terminated by either a row of dashes or # a row of equal signs. # # Note that ordering of lines is not important except that the header must # precede the revision info. Note also that it is possible for there to be # no revisions at all, hence no revision info. # # Of course, if Intersolv changes the output of vlog, # this script may have to change to match it! require 5.0; use strict; use integer; use Time::Local; use lib '.'; use convert; use Change; use File::Path; my $branch_count=0; my %month = ( "Jan" => 0, "Feb" => 1, "Mar" => 2, "Apr" => 3, "May" => 4, "Jun" => 5, "Jul" => 6, "Aug" => 7, "Sep" => 8, "Oct" => 9, "Nov" => 10,"Dec" => 11 ); mkpath($convert::metadata_dir); # ensure the metadata directory exists mkpath("$convert::metadata_dir/labels"); # ensure the metadata directory exists # find all files under the root (the equivalent of an ls -R >filelist) my %archivefile; print "Listing all files under $convert::root. " . scalar(localtime()) . "\n"; unlink("$convert::metadata_dir/filelist"); open(FILELIST,">$convert::metadata_dir/filelist") or die "can't open: $!\n"; if (!$convert::projects) { list_archives($convert::root, "/"); } else { my $proj; foreach $proj (split(/\|/, $convert::projects)) { list_archives($convert::root, $proj); } } close(FILELIST); # Run vlog on all files in the whole tree (vlog given the argument @filelist) # - warnings are redirected to the file vlogerrs # NOTE: YOU CAN'T USE THE -q FLAG TO VLOG BECAUSE IT ELIMINATES ALL WARNINGS! unlink("$convert::metadata_dir/pvcs_properties"); print "Extracting metadata from all PVCS archive files under $convert::root " . scalar(localtime()) . "\n"; runcmd("vlog -xevlogerrs \@$convert::metadata_dir/filelist > $convert::metadata_dir/pvcs_properties"); open(F,"vlogerrs"); die "vlog produced error output - see the file 'vlogerrs'" if(); close(F); unlink("vlogerrs"); my $msg="can't open"; open(INPUT, "<$convert::metadata_dir/pvcs_properties") or die $msg; open(FILES, ">$convert::metadata_dir/files") or die $msg; open(LABELS_SUMMARY, ">$convert::metadata_dir/labels_summary") or die $msg; open(LABELS, ">$convert::metadata_dir/labels_details") or die $msg; open(CHANGES, ">$convert::metadata_dir/changes.ns") or die $msg; # ns for "not sorted" open(BRANCHES,">$convert::metadata_dir/branches") or die $msg; unlink("$convert::metadata_dir/changes"); # get rid of any existing change file to avoid confusion my (%label_processed, %label_warned); while(!eof(INPUT)) { read_archive_metadata(\*INPUT); } sub read_archive_metadata { my $input = shift; my ($archive,$workfile,$expand_keywords,$generate_delta,%branch_label); my $separator = "^-{30,}"; # line of (sufficient number of) dashes my $terminator = "^={30,}"; # line of (sufficient number of) equal signs # read the header HEADER_LOOP: while(<$input>) { HEADER_SWITCH: { if (/$separator/o) { last HEADER_LOOP; } if(/$terminator/o) { # ignore it if it has no revisions print "empty archive $archive ignored\n"; return; } if(/^Archive:\s*([^\s].*)/) { $archive=convert::forward_slash($1); # use slash as pathname separator, not backslash $workfile=$archivefile{lc($archive)}; $workfile = lc($workfile) if($convert::lowercase_filenames); } elsif(/^Attributes:/) { # attributes appear one per line, each one prefixed by 3 spaces while(<$input>) { redo HEADER_SWITCH if(!/^ /); if(/EXPANDKEYWORDS/) { $expand_keywords = !/NO/; }; if(/GENERATEDELTA/) { $generate_delta = !/NO/; }; } } elsif(/^Version labels:/) { # labels appear one per line, each one prefixed by 3 spaces while(<$input>) { redo HEADER_SWITCH if(!/^ /); if(/^ "([^"]*)" = ([0-9\.\*]*)/) { defined($archive) or die "Huh? Version labels before Archive:\n"; my ($no_space_label,$rev) = ($1,$2); my $raw_label = $no_space_label; if( $no_space_label =~ s%[\000-\031 @#,/\\]%_%g ) { if (!$label_warned{$no_space_label}) { $label_warned{$no_space_label} = 1; print "Label \"$raw_label\" was mapped to \"$no_space_label\"\n"; } } if( $no_space_label =~ s%[\*]%\!%g ) { if (!$label_warned{$no_space_label}) { $label_warned{$no_space_label} = 1; print "Label \"$raw_label\" was mapped to \"$no_space_label\"\n"; } } if( $no_space_label =~ /^\d+$/ ) { $no_space_label = "_$no_space_label"; if (!$label_warned{$no_space_label}) { $label_warned{$no_space_label} = 1; print "Label \"$raw_label\" was mapped to \"$no_space_label\"\n"; } } if($rev =~ /\*$/) { # "floating" label (branch label) $rev =~ s/\.\*$//; # strip off the ".*" $branch_label{$rev}=$no_space_label; $branch_label{$rev} = lc($branch_label{$rev}) if($convert::lowercase_branchnames); } else { if (!$label_processed{$no_space_label}) { $label_processed{$no_space_label} = 1; print LABELS_SUMMARY "$no_space_label\n"; unlink("$convert::metadata_dir/labels/$no_space_label"); # zap contents } my $msg="can't open"; print LABELS "$no_space_label#$archive#$rev\n"; } } } } } # HEADER_SWITCH } # HEADER_LOOP # the header has been read return if(!defined($archive) || !defined($workfile)); # determine file type based on either filename extension or the attributes # in the PVCS archive. my $file_type; for (keys(%convert::filetype_regex)) { if($workfile =~ /$convert::filetype_regex{$_}/i) { $file_type = $_; last; } } if(!defined($file_type)) { # no filename extension match if($generate_delta) { $file_type = ( $expand_keywords ? "ktext" : "text" ); } else { if($expand_keywords) { print "EXPANDKEYWORDS and NOGENERATEDELTA in $workfile; "; print " taken to be binary\n"; } $file_type = "binary"; } } print FILES "$archive#$workfile#$file_type\n"; # read the revision info my $finished=0; while(!$finished) { my ($indentation,$revision,$timestamp,$author,$change_description); # ignore lines until Rev line found # (there shouldn't be any lines before the Rev line - this is just # defensive programming) while(<$input>) { if(/^(\s*)Rev\s+([0-9\.]*)/) { $indentation=$1; $revision=$2; last; } } # look for Checked in timestamp while(<$input>) { if(/^${indentation}Checked in:/) { if(/^${indentation}Checked in:\s*(\d+) ([a-zA-Z]{3}) (\d{4}) (\d+):(\d\d):(\d\d)/) { # timelocal takes second, minute, hour, day, month, year # in the range 0..59, 0..59, 0..23, 1..31, 0..11, 0..99 # The two digit year has assumptions made about it such that # any time before 2037 (when the 32-bit seconds-since-1970 time # will run out) is handled correctly. i.e. 97 -> 1997, 1 -> 2001 # PVCS, thank Goodness, gives us 4 digit years. $timestamp=timelocal($6,$5,$4,$1,$month{$2},($3)%100); } elsif(/^${indentation}Checked in:\s*([a-zA-Z]{3}) (\d+) (\d{4}) (\d+):(\d\d):(\d\d)/) { $timestamp=timelocal($6,$5,$4,$2,$month{$1},($3)%100); } elsif(/^${indentation}Checked in:\s*([a-zA-Z]{3}) (\d+) (\d+):(\d\d):(\d\d) (\d{4})/) { $timestamp=timelocal($5,$4,$3,$2,$month{$1},($6)%100); } else { print "unrecognized date format - must be\n"; print "dd mmm yyyy hh:mm:ss\nOR\nmmm dd yyyy hh:mm:ss\n"; print "OR\nmmm dd hh:mm:ss yyyy\n"; print "where mmm is a three letter English month abbreviation.\n"; print "This is likely because you've specified a different date format in your\n"; print "PVCS configuration file. Remove that specification and try again.\n"; die; } last; } } # look for Author id line while(<$input>) { if(/^${indentation}Author id:\s*([^\s]*)/) { $author=$1; $author = lc($author) if($convert::lowercase_usernames); if( $author =~ s/[ \000-\031]/_/g ) { print "spaces or unprintable characters in author id "; print "\"$author\" were mapped to _\n"; } last; } } # ok, now we rely on order - after the Author id line there is an optional # Branches line, and then the rest is the change description $change_description = ""; while(<$input>) { $finished = /$terminator/o; last if( $finished || /$separator/o); if(/^${indentation}Branches:\s+/) { next if($convert::ignore_branches); chomp; my @branch_list = split(/\s+/,$'); print BRANCHES "$archive#$revision"; # print out branch point my $rev; foreach $rev (@branch_list) { if(exists($branch_label{$rev})) { print BRANCHES "#$branch_label{$rev}"; } else { print BRANCHES "#$convert::branch_prefix" . ++$branch_count; } print BRANCHES "#$rev"; } print BRANCHES "\n"; next; # not part of the change description } if(/^${indentation}(.*)/) { $change_description .= substr($_,length($indentation)); } } last if(!defined($revision) || !defined($timestamp) || !defined($author)); my $change = new Change( { 'timestamp' => $timestamp, 'author' => $author, 'change_description' => $change_description, 'changelist' => [ join('#',$archive, $revision) ] } ); $change->put(\*CHANGES); } } sub list_archives { my $rt=shift; my $proj=shift; my $archive; my $workfile; unlink("$convert::metadata_dir/archive_list"); runcmd("pcli list -pr\"$rt\" -id$convert::pvcs_user:$convert::pvcs_passwd -z -l -aArchivePath \"$proj\" > $convert::metadata_dir/archive_list"); open(ARCHIVE_LIST,"<$convert::metadata_dir/archive_list") or die "can't open: $!\n"; while() { if(/^(\/.*)$/) { $workfile=$1; my $raw_workfile = $workfile; if( $workfile =~ s%[\000-\031@#\\]%_%g ) { print "Workfile \"$raw_workfile\" was mapped to \"$workfile\"\n"; } $workfile = lc($workfile) if($convert::lowercase_filenames); } elsif(/^ArchivePath=(.*)$/) { $archive=$1; if ($archive !~ /null/) { if (exclude_file($workfile)) { my $msg = "Ignoring file: $workfile\n"; print $msg; convert::log($msg); } else { if ($archive =~ / /) { # Deal with spaces in filenames print FILELIST "\"$archive\"\n"; } else { print FILELIST "$archive\n"; } } $archivefile{lc(convert::forward_slash($archive))} = $workfile; } } } close(ARCHIVE_LIST); } sub runcmd { my $cmd = shift; my $result = system($cmd); if ($result) { die "***** Failed to run command: $cmd -> $result\n"; } } sub exclude_file # True if file should be excluded { my $file = shift; if (!$convert::exclude) { return 0; } # note use of Quote Meta encapsulation and file matching at start of string if ($convert::exclude =~ /\|/) { my $pat; foreach $pat (split(/\|/, $convert::exclude)) { if ($file =~ /^\Q$pat\E/) { return 1; } } } elsif ($file =~ /^\Q$convert::exclude\E/) { return 1; } return 0; }