#!/usr/bin/perl -w # # PVCS to Perforce converter, phase I: extract PVCS metadata # # Copyright 1997 Perforce Software. All rights reserved. # Written by James Strickland, July 1997 # # # PVCS archives are stored in a non-documented, binary format. However, all # the metadata (data about the revisions in the archive) is available in plain # text using the PVCS vlog command. This script reads a stream output by # vlog and extracts all useful metadata, writing the result to files which # contain no extraneous information and which are easy to parse. # # vlog output consists of a header (specifying the archive and workfile names, # labels and so on) and a block of text for each revision in the archive. # The block of text for each revision is preceded by a line # of dashes, and is of the form # # Rev # # Checked in: # Last modified: # Author id: lines deleted/added/moved: // # # # # The end of output for an archive is marked by a line of equal signs. # Thus, a description can be terminated by either a row of dashes or # a row of equal signs. # # Note that ordering of lines is not important except that the header must # precede the revision info. Note also that it is possible for there to be # no revisions at all, hence no revision info. # # Of course, if Intersolv changes the output of vlog, # this script may have to change to match it! require 5.0; use strict; use integer; use Time::Local; use lib '.'; use convert; use Change; use File::Path; my $branch_count=0; my %month = ( "Jan" => 0, "Feb" => 1, "Mar" => 2, "Apr" => 3, "May" => 4, "Jun" => 5, "Jul" => 6, "Aug" => 7, "Sep" => 8, "Oct" => 9, "Nov" => 10,"Dec" => 11 ); mkpath($convert::metadata_dir); # ensure the metadata directory exists mkpath("$convert::metadata_dir/labels"); # ensure the metadata directory exists my $msg="can't open"; open(FILES, ">$convert::metadata_dir/files") or die $msg; open(LABELS_SUMMARY, ">$convert::metadata_dir/labels_summary") or die $msg; open(LABELS, ">$convert::metadata_dir/labels_details") or die $msg; open(CHANGES, ">$convert::metadata_dir/changes.ns") or die $msg; # ns for "not sorted" open(BRANCHES,">$convert::metadata_dir/branches") or die $msg; unlink("$convert::metadata_dir/changes"); # get rid of any existing change file to avoid confusion my (%label_processed, %label_warned); while(!eof(STDIN)) { read_archive_metadata(\*STDIN); } sub read_archive_metadata { my $input = shift; my ($archive,$workfile,$expand_keywords,$generate_delta,%branch_label); my $separator = "^-{30,}"; # line of (sufficient number of) dashes my $terminator = "^={30,}"; # line of (sufficient number of) equal signs # read the header HEADER_LOOP: while(<$input>) { HEADER_SWITCH: { last HEADER_LOOP if/$separator/o; if(/$terminator/o) { # ignore it if it has no revisions print "empty archive $archive ignored\n"; return; } if(/^Archive:\s*([^\s].*)/) { $archive=convert::forward_slash($1); # use slash as pathname separator, not backslash } elsif(/^Workfile:\s*([^\s].*)/) { $workfile=$1; if( $workfile =~ s%[\000-\031@#/\\]%_%g ) { print "unprintable character(s), /, \\, @ or # in workfile "; print "\"$workfile\" were mapped to _\n"; } $workfile = lc($workfile) if($convert::lowercase_filenames); } elsif(/^Attributes:/) { # attributes appear one per line, each one prefixed by 3 spaces while(<$input>) { redo HEADER_SWITCH if(!/^ /); if(/EXPANDKEYWORDS/) { $expand_keywords = !/NO/; }; if(/GENERATEDELTA/) { $generate_delta = !/NO/; }; } } elsif(/^Version labels:/) { # labels appear one per line, each one prefixed by 3 spaces while(<$input>) { redo HEADER_SWITCH if(!/^ /); if(/^ "([^"]*)" = ([0-9\.\*]*)/) { defined($archive) or die "Huh? Version labels before Archive:\n"; my ($no_space_label,$rev) = ($1,$2); if( $no_space_label =~ s%[\000-\031 @#/\\]%_%g ) { if (!$label_warned{$no_space_label}) { $label_warned{$no_space_label} = 1; print "spaces or unprintable characters or /, \\, @ or # in label "; print "\"$no_space_label\" were mapped to _\n"; } } if( $no_space_label =~ /^\d+$/ ) { $no_space_label = "_$no_space_label"; if (!$label_warned{$no_space_label}) { $label_warned{$no_space_label} = 1; print "Underscore prepended to all numeric label "; print "\"$no_space_label\"\n"; } } if($rev =~ /\*$/) { # "floating" label (branch label) $rev =~ s/\.\*$//; # strip off the ".*" $branch_label{$rev}=$no_space_label; $branch_label{$rev} = lc($branch_label{$rev}) if($convert::lowercase_branchnames); } else { if (!$label_processed{$no_space_label}) { $label_processed{$no_space_label} = 1; print LABELS_SUMMARY "$no_space_label\n"; unlink("$convert::metadata_dir/labels/$no_space_label"); # zap contents } my $msg="can't open"; print LABELS "$no_space_label#$archive#$rev\n"; } } } } } # HEADER_SWITCH } # HEADER_LOOP # the header has been read return if(!defined($archive) || !defined($workfile)); # determine file type based on either filename extension or the attributes # in the PVCS archive. my $file_type; for (keys(%convert::filetype_regex)) { if($workfile =~ /$convert::filetype_regex{$_}/i) { $file_type = $_; last; } } if(!defined($file_type)) { # no filename extension match if($generate_delta) { $file_type = ( $expand_keywords ? "ktext" : "text" ); } else { if($expand_keywords) { print "EXPANDKEYWORDS and NOGENERATEDELTA in $workfile; "; print " taken to be binary\n"; } $file_type = "binary"; } } print FILES "$archive#$workfile#$file_type\n"; # read the revision info my $finished=0; while(!$finished) { my ($indentation,$revision,$timestamp,$author,$change_description); # ignore lines until Rev line found # (there shouldn't be any lines before the Rev line - this is just # defensive programming) while(<$input>) { if(/^(\s*)Rev\s+([0-9\.]*)/) { $indentation=$1; $revision=$2; last; } } # look for Checked in timestamp while(<$input>) { if(/^${indentation}Checked in:/) { if(/^${indentation}Checked in:\s*(\d+) ([a-zA-Z]{3}) (\d{4}) (\d+):(\d\d):(\d\d)/) { # timelocal takes second, minute, hour, day, month, year # in the range 0..59, 0..59, 0..23, 1..31, 0..11, 0..99 # The two digit year has assumptions made about it such that # any time before 2037 (when the 32-bit seconds-since-1970 time # will run out) is handled correctly. i.e. 97 -> 1997, 1 -> 2001 # PVCS, thank Goodness, gives us 4 digit years. $timestamp=timelocal($6,$5,$4,$1,$month{$2},($3)%100); } elsif(/^${indentation}Checked in:\s*([a-zA-Z]{3}) (\d+) (\d{4}) (\d+):(\d\d):(\d\d)/) { $timestamp=timelocal($6,$5,$4,$2,$month{$1},($3)%100); } elsif(/^${indentation}Checked in:\s*([a-zA-Z]{3}) (\d+) (\d+):(\d\d):(\d\d) (\d{4})/) { $timestamp=timelocal($5,$4,$3,$2,$month{$1},($6)%100); } else { print "unrecognized date format - must be\n"; print "dd mmm yyyy hh:mm:ss\nOR\nmmm dd yyyy hh:mm:ss\n"; print "OR\nmmm dd hh:mm:ss yyyy\n"; print "where mmm is a three letter English month abbreviation.\n"; print "This is likely because you've specified a different date format in your\n"; print "PVCS configuration file. Remove that specification and try again.\n"; die; } last; } } # look for Author id line while(<$input>) { if(/^${indentation}Author id:\s*([^\s]*)/) { $author=$1; $author = lc($author) if($convert::lowercase_usernames); if( $author =~ s/[ \000-\031]/_/g ) { print "spaces or unprintable characters in author id "; print "\"$author\" were mapped to _\n"; } last; } } # ok, now we rely on order - after the Author id line there is an optional # Branches line, and then the rest is the change description $change_description = ""; while(<$input>) { $finished = /$terminator/o; last if( $finished || /$separator/o); if(/^${indentation}Branches:\s+/) { next if($convert::ignore_branches); chomp; my @branch_list = split(/\s+/,$'); print BRANCHES "$archive#$revision"; # print out branch point my $rev; foreach $rev (@branch_list) { if(exists($branch_label{$rev})) { print BRANCHES "#$branch_label{$rev}"; } else { print BRANCHES "#$convert::branch_prefix" . ++$branch_count; } print BRANCHES "#$rev"; } print BRANCHES "\n"; next; # not part of the change description } if(/^${indentation}(.*)/) { $change_description .= substr($_,length($indentation)); } } last if(!defined($revision) || !defined($timestamp) || !defined($author)); my $change = new Change( { 'timestamp' => $timestamp, 'author' => $author, 'change_description' => $change_description, 'changelist' => [ join('#',$archive, $revision) ] } ); $change->put(\*CHANGES); } }