#!/usr/bin/perl -w
#
# PVCS to Perforce converter, phase I: extract PVCS metadata
#
# Copyright 1997 Perforce Software. All rights reserved.
# Written by James Strickland, July 1997
#
#
# PVCS archives are stored in a non-documented, binary format. However, all
# the metadata (data about the revisions in the archive) is available in plain
# text using the PVCS vlog command. This script reads a stream output by
# vlog and extracts all useful metadata, writing the result to files which
# contain no extraneous information and which are easy to parse.
#
# vlog output consists of a header (specifying the archive and workfile names,
# labels and so on) and a block of text for each revision in the archive.
# The block of text for each revision is preceded by a line
# of dashes, and is of the form
#
# Rev <revision number>
# <possible Locked by: line>
# Checked in: <date>
# Last modified: <date>
# Author id: <author> lines deleted/added/moved: <i>/<j>/<k>
# <optional Branches: line>
# <description>
#
# The end of output for an archive is marked by a line of equal signs.
# Thus, a description can be terminated by either a row of dashes or
# a row of equal signs.
#
# Note that ordering of lines is not important except that the header must
# precede the revision info. Note also that it is possible for there to be
# no revisions at all, hence no revision info.
#
# Of course, if Intersolv changes the output of vlog,
# this script may have to change to match it!
require 5.0;
use strict;
use integer;
use Time::Local;
use lib '.';
use convert;
use Change;
use File::Path;
my $branch_count=0;
my %month = (
"Jan" => 0, "Feb" => 1, "Mar" => 2,
"Apr" => 3, "May" => 4, "Jun" => 5,
"Jul" => 6, "Aug" => 7, "Sep" => 8,
"Oct" => 9, "Nov" => 10,"Dec" => 11
);
mkpath($convert::metadata_dir); # ensure the metadata directory exists
mkpath("$convert::metadata_dir/labels"); # ensure the metadata directory exists
my $msg="can't open";
open(FILES, ">$convert::metadata_dir/files") or die $msg;
open(LABELS_SUMMARY, ">$convert::metadata_dir/labels_summary") or die $msg;
open(LABELS, ">$convert::metadata_dir/labels_details") or die $msg;
open(CHANGES, ">$convert::metadata_dir/changes.ns") or die $msg; # ns for "not sorted"
open(BRANCHES,">$convert::metadata_dir/branches") or die $msg;
unlink("$convert::metadata_dir/changes"); # get rid of any existing change file to avoid confusion
my (%label_processed, %label_warned);
while(!eof(STDIN)) {
read_archive_metadata(\*STDIN);
}
sub read_archive_metadata
{
my $input = shift;
my ($archive,$workfile,$expand_keywords,$generate_delta,%branch_label);
my $separator = "^-{30,}"; # line of (sufficient number of) dashes
my $terminator = "^={30,}"; # line of (sufficient number of) equal signs
# read the header
HEADER_LOOP:
while(<$input>) {
HEADER_SWITCH: {
last HEADER_LOOP if/$separator/o;
if(/$terminator/o) { # ignore it if it has no revisions
print "empty archive $archive ignored\n";
return;
}
if(/^Archive:\s*([^\s].*)/) {
$archive=convert::forward_slash($1); # use slash as pathname separator, not backslash
} elsif(/^Workfile:\s*([^\s].*)/) {
$workfile=$1;
if( $workfile =~ s%[\000-\031@#/\\]%_%g ) {
print "unprintable character(s), /, \\, @ or # in workfile ";
print "\"$workfile\" were mapped to _\n";
}
$workfile = lc($workfile) if($convert::lowercase_filenames);
} elsif(/^Attributes:/) {
# attributes appear one per line, each one prefixed by 3 spaces
while(<$input>) {
redo HEADER_SWITCH if(!/^ /);
if(/EXPANDKEYWORDS/) { $expand_keywords = !/NO/; };
if(/GENERATEDELTA/) { $generate_delta = !/NO/; };
}
} elsif(/^Version labels:/) {
# labels appear one per line, each one prefixed by 3 spaces
while(<$input>) {
redo HEADER_SWITCH if(!/^ /);
if(/^ "([^"]*)" = ([0-9\.\*]*)/) {
defined($archive) or die "Huh? Version labels before Archive:\n";
my ($no_space_label,$rev) = ($1,$2);
if( $no_space_label =~ s%[\000-\031 @#/\\]%_%g ) {
if (!$label_warned{$no_space_label}) {
$label_warned{$no_space_label} = 1;
print "spaces or unprintable characters or /, \\, @ or # in label ";
print "\"$no_space_label\" were mapped to _\n";
}
}
if( $no_space_label =~ /^\d+$/ ) {
$no_space_label = "_$no_space_label";
if (!$label_warned{$no_space_label}) {
$label_warned{$no_space_label} = 1;
print "Underscore prepended to all numeric label ";
print "\"$no_space_label\"\n";
}
}
if($rev =~ /\*$/) { # "floating" label (branch label)
$rev =~ s/\.\*$//; # strip off the ".*"
$branch_label{$rev}=$no_space_label;
$branch_label{$rev} = lc($branch_label{$rev}) if($convert::lowercase_branchnames);
} else {
if (!$label_processed{$no_space_label}) {
$label_processed{$no_space_label} = 1;
print LABELS_SUMMARY "$no_space_label\n";
unlink("$convert::metadata_dir/labels/$no_space_label"); # zap contents
}
my $msg="can't open";
print LABELS "$no_space_label#$archive#$rev\n";
}
}
}
}
} # HEADER_SWITCH
} # HEADER_LOOP
# the header has been read
return if(!defined($archive) || !defined($workfile));
# determine file type based on either filename extension or the attributes
# in the PVCS archive.
my $file_type;
for (keys(%convert::filetype_regex)) {
if($workfile =~ /$convert::filetype_regex{$_}/i) {
$file_type = $_;
last;
}
}
if(!defined($file_type)) { # no filename extension match
if($generate_delta) {
$file_type = ( $expand_keywords ? "ktext" : "text" );
} else {
if($expand_keywords) {
print "EXPANDKEYWORDS and NOGENERATEDELTA in $workfile; ";
print " taken to be binary\n";
}
$file_type = "binary";
}
}
print FILES "$archive#$workfile#$file_type\n";
# read the revision info
my $finished=0;
while(!$finished) {
my ($indentation,$revision,$timestamp,$author,$change_description);
# ignore lines until Rev line found
# (there shouldn't be any lines before the Rev line - this is just
# defensive programming)
while(<$input>) {
if(/^(\s*)Rev\s+([0-9\.]*)/) {
$indentation=$1;
$revision=$2;
last;
}
}
# look for Checked in timestamp
while(<$input>) {
if(/^${indentation}Checked in:/) {
if(/^${indentation}Checked in:\s*(\d+) ([a-zA-Z]{3}) (\d{4}) (\d+):(\d\d):(\d\d)/) {
# timelocal takes second, minute, hour, day, month, year
# in the range 0..59, 0..59, 0..23, 1..31, 0..11, 0..99
# The two digit year has assumptions made about it such that
# any time before 2037 (when the 32-bit seconds-since-1970 time
# will run out) is handled correctly. i.e. 97 -> 1997, 1 -> 2001
# PVCS, thank Goodness, gives us 4 digit years.
$timestamp=timelocal($6,$5,$4,$1,$month{$2},($3)%100);
} elsif(/^${indentation}Checked in:\s*([a-zA-Z]{3}) (\d+) (\d{4}) (\d+):(\d\d):(\d\d)/) {
$timestamp=timelocal($6,$5,$4,$2,$month{$1},($3)%100);
} elsif(/^${indentation}Checked in:\s*([a-zA-Z]{3}) (\d+) (\d+):(\d\d):(\d\d) (\d{4})/) {
$timestamp=timelocal($5,$4,$3,$2,$month{$1},($6)%100);
} else {
print "unrecognized date format - must be\n";
print "dd mmm yyyy hh:mm:ss\nOR\nmmm dd yyyy hh:mm:ss\n";
print "OR\nmmm dd hh:mm:ss yyyy\n";
print "where mmm is a three letter English month abbreviation.\n";
print "This is likely because you've specified a different date format in your\n";
print "PVCS configuration file. Remove that specification and try again.\n";
die;
}
last;
}
}
# look for Author id line
while(<$input>) {
if(/^${indentation}Author id:\s*([^\s]*)/) {
$author=$1;
$author = lc($author) if($convert::lowercase_usernames);
if( $author =~ s/[ \000-\031]/_/g ) {
print "spaces or unprintable characters in author id ";
print "\"$author\" were mapped to _\n";
}
last;
}
}
# ok, now we rely on order - after the Author id line there is an optional
# Branches line, and then the rest is the change description
$change_description = "";
while(<$input>) {
$finished = /$terminator/o;
last if( $finished || /$separator/o);
if(/^${indentation}Branches:\s+/) {
next if($convert::ignore_branches);
chomp;
my @branch_list = split(/\s+/,$');
print BRANCHES "$archive#$revision"; # print out branch point
my $rev;
foreach $rev (@branch_list) {
if(exists($branch_label{$rev})) {
print BRANCHES "#$branch_label{$rev}";
} else {
print BRANCHES "#$convert::branch_prefix" . ++$branch_count;
}
print BRANCHES "#$rev";
}
print BRANCHES "\n";
next; # not part of the change description
}
if(/^${indentation}(.*)/) {
$change_description .= substr($_,length($indentation));
}
}
last if(!defined($revision) || !defined($timestamp) || !defined($author));
my $change = new Change( {
'timestamp' => $timestamp,
'author' => $author,
'change_description' => $change_description,
'changelist' => [ join('#',$archive, $revision) ] } );
$change->put(\*CHANGES);
}
}