#!/usr/bin/perl -w # # VSS to Perforce converter, phase I: extract metadata # # Copyright 1998 Perforce Software. All rights reserved. # Written by James Strickland, April 1998 # # This script parses the output of "ss properties -R root_project -O@tempfile" # to generate a list of files and their file types (including "tempobj" # for files which only have the last version stored). Each file history # is then obtained with "ss history filename". # # Note: there is *no* way to obtain the history of files which are currently # deleted! The only workaround would seem to be to "ss recover" the files # before doing the conversion, then "p4 delete" them afterward. This converter # does not attempt to do that. # # It is necessary to do things this way because of the following VSS quirks: # - some revisions aren't really revisions, they're just label operations; # ss history -R output for revisions which are labels doesn't include # the project they're in and hence is ambiguous # - ss history -R does not output any information about version 1 of a file # - the only way to discover the type of a file is with the "ss filetype" # or "ss properties" command # - running "ss filetype -R" is much faster than running "ss properties -R" # but it's missing one crucial bit of information: Store only latest version # Without this the converter will try to include file revisions it can't # get, and there is no way of knowing whether the failure is due to # storing only the latest version or that SourceSafe has lost the version. # - without using -O@ SourceSafe's output is ambigous - it word wraps filenames # and if the wrap occurs at a space it does not output the space # # # The output of ss history consists of a header which specifies the filename, # then a series of multi-line data blocks starting with the line # ***************** Version n ***************** # # The rest of the lines in the data block are allowed to occur in any order, # except for lines starting with "Comment:" or "Label comment:". These must # always appear last, and are terminated by a blank line followed by the start # of another data block. require 5.0; use strict; use integer; use Time::Local; use lib '.'; use convert; use Change; convert::emkdir($convert::metadata_dir); # ensure the metadata directory exists my $msg="can't open"; open(FILES, ">$convert::metadata_dir/files") or die $msg; # Parse the output of "ss properties" - we use a really simple scheme here # which relies solely on the fields "File:", "Type:" and # "Store only latest version:" always appearing, and appearing in that order. # Note that filenames with spaces are no problem - they are, in effect, # quoted by :\s+ on the left and \n on the right. Possible values for # "Type" are "Binary" and "Text", which translates nicely to Perforce's # "binary" and "text". Possible values for "Store only latest version:" # are "Yes" and "No". # Modified to allow more than one VSS root dir to be specified my $cmd; unlink("$convert::metadata_dir/ssproperties"); # otherwise VSS appends to it if ($convert::root =~ /|/) { my $rt; foreach $rt (split(/\|/, $convert::root)) { # Append to the output $cmd = "ss properties -R $rt -O\@$convert::metadata_dir/ssproperties $convert::ss_options"; system($cmd); } } else { $cmd = "ss properties -R $convert::root -O\@$convert::metadata_dir/ssproperties $convert::ss_options"; system($cmd); } open(LOGFILE,">>logfile") or die "can't open logfile: $!"; print LOGFILE "\n\nReading from: $cmd\n"; open(SS,"<$convert::metadata_dir/ssproperties"); my ($file,$type); FILE_OR_PROJECT: while( $_=myread() ) { chomp; s/\r$//; my ($field,$value) = split(/:\s*/,$_,2); next unless defined($value); if( $field eq "File") { $file = $value; if( $file =~ /[\000-\031@#%\*]/ || $file =~ /\.\.\./ ) { die "..., @, #, %, * or unprintable characters not allowed in filename '$file' - sorry, rename it and start over.\n"; } } elsif( $field eq "Type") { $type=lc($value); } elsif( $field eq "Store only latest version") { die "parse error" if(!defined($file)||!defined($type)); if($value eq "Yes") { $type="tempobj"; } $file =~ s/^\s+//; print FILES "$type $file\n"; undef $file; undef $type; } } print LOGFILE "\n\nDone with: $cmd\n"; close(FILES); open(FILES, "<$convert::metadata_dir/files") or die $msg; # for input open(LABELS, ">$convert::metadata_dir/labels") or die $msg; open(CHANGES, ">$convert::metadata_dir/changes.ns") or die $msg; # ns for "not sorted" unlink("$convert::metadata_dir/changes"); # get rid of any existing change file to avoid confusion while() { my ($type,$file) = split(/ /,$_,2); chomp($file); read_history($file); } close(LOGFILE); my (%label_comments, %label_warned); my (%label_timestamp); sub read_history # expects open file descriptors CHANGES, LABELS, BRANCHES { my $file = shift; my $cmd = "ss history \"$file\" $convert::ss_options"; print LOGFILE "\n\nReading from: $cmd\n"; open(SS,"$cmd |"); read_comment(); # ignore everything up to the first data block # (previously I verified that it started with "History of $file ..." # except that MS word wraps long filenames with spaces so just hang it) my (@pending_labels); ITEM: while($_=myread()) { my ($op,$version,$user,$timestamp,$label,$comment); if(/^\*{17}( +Version (\d+) +\*{17})?/) { my ($version,$user,$timestamp,$label,$comment); $version = $2; $_=myread(); if(/^Label: "(.+)"$/) { $label = $1; $_=myread(); } ($user,$timestamp) = parse_user_and_timestamp($_); $_=myread(); if(/Labeled/) { # this revision isn't really a revision - it's just a label my $before = $label; # label transformations without a warning # labels must not contain umlauts $label =~ s/\204/ae/g; $label =~ s/\224/oe/g; $label =~ s/\201/ue/g; # delete contigous exclamation marks and append "[local] instead" if ($label =~ s/ *!!+ *//g) { $label .= "_[local]"; } # replace spaces within labels silently as they appear in almost every label $label =~ s/ /_/g; # replace more special characters silently $label =~ s@/@-@g; $label =~ tr/()/[]/; # label transformations with a warning # replace special characters if( $label =~ s%[\000-\031 @#&,<>()'!\%\*/'\\]%_%g && !$label_warned{$label}) { print "label \"$before\" changed to \"$label\"\n"; $label_warned{$label} = 1; } if( $label =~ s/\.\.\./_/g && !$label_warned{$label} ) { print "Removed ellipsis (...) in \"$label\"\n"; $label_warned{$label} = 1; } if( $label =~ /^\d+$/ ) { $label = "_$label"; print "Underscore prepended to all numeric label \"$label\"\n"; $label_warned{$label} = 1; } # Prefix label if specified $label = $convert::label_prefix . $label; push @pending_labels,$label; # assume all the comments for a particular label are the same... $comment = read_comment(); $label_comments{$label} = $comment if ! defined($label_comments{$label}); $label_timestamp{$label} = $timestamp if ! defined($label_timestamp{$label}); next ITEM; } # this is a "real" revision - either "Checked in projectname" or "Branched" $comment = read_comment(); my $change = new Change( { 'timestamp' => $timestamp, 'author' => $user, 'change_description' => $comment, 'changelist' => [ "$version $file" ] } ); $change->put(\*CHANGES); while(@pending_labels) { $label = pop(@pending_labels); print LABELS "$version $label $label_timestamp{$label} $file\n"; # use 10 *'s as end of comments marker print LABELS "$label_comments{$label}**********\n"; # only need to spit out the comment the first time... $label_comments{$label} = ""; } next ITEM; } else { die "parsing messed up on '$_'\n"; } } } sub parse_user_and_timestamp { $_=shift; # # Swiss-Format: User: Date: dd.mm.yy Time: hh:mm # if ( ! m@^User: (.*)\b\s+Date:\s+(\d+).(\d+).(\d+)\s+Time:\s+(\d+):(\d+)@ ) # US-Format: User: Date: mm/dd/yy Time: hh:mm[ap] if ( ! m@^User: (.*)\b\s+Date:\s+(\d+)/(\d+)/(\d+)\s+Time:\s+(\d+):(\d+)([ap])@ ) { # 2002-03-04/ps # A problem here most likely indicates a row of asterisks in a # comment. Probably the regexps in this file with "\*{17}" should be # changed to fix this, but I have no more time to do _and_ test # it... print "timestamp problem: $_\n"; die "can't parse timestamp" ; } my $user = $1; $user=lc($user) if($convert::lowercase_usernames); # timelocal takes second, minute, hour, day, month, year # in the range 0..59, 0..59, 0..23, 1..31, 0..11, 0..99 # The two digit year has assumptions made about it such that # any time before 2037 (when the 32-bit seconds-since-1970 time # will run out) is handled correctly. i.e. 97 -> 1997, 1 -> 2001 # # swiss german time settings # my $timestamp=timelocal(0, $6, $5, $2, $3 -1, $4); # U.S. time settings my $hour = $5 % 12; if($7 eq 'p') { $hour += 12; } # my $hour = $5; # Default for American mm/dd/yy my $timestamp=timelocal(0, $6, $hour, $3, $2 -1, $4); # my $timestamp=timelocal(0, $6, $hour, $2 -1, $3, $4); # convert user names because they must not contain umlauts $user =~ s/\204/ae/g; $user =~ s/\224/oe/g; $user =~ s/\201/ue/g; # # nifty table for more complicated username conversion (if desired) # my %usertable = ( "user_j" => "joe.user", # "developer_d" => "donald.developer", # ); # if ( !exists $usertable{$user} ) { # print "unknown user $user\n"; # } else { # $user = $usertable{$user}; # } return ($user,$timestamp); } sub read_comment { my $comment=""; while($_=myread()) { # convert a few umlauts from DOS-Codepage to Latin-1 # ä ö ü Ä Ö Ü ß $_ =~ tr/\204\224\201\216\231\232\341/äöüÄÖÜß/; # SAB 980503 - Comment can be terminated either by a new version # banner or by a Label separator... ## ***************** Version 28 ***************** ## User: User1 Date: 3/19/98 Time: 2:16p ## Checked in $/Projects/Common/AppUtils ## Comment: setup CoffFormat build configuration ## ## ********************** ## Label: "demo #1" ## User: User2 Date: 3/16/98 Time: 4:23a ## Labeled ## Label comment: Proposed demo. ## ## ********************** ## Label: "demo." ## User: User2 Date: 3/13/98 Time: 2:35a ## Labeled ## Label comment: Project ready for demo. ## ## ***************** Version 27 ***************** ## User: User2 Date: 2/17/98 Time: 10:27p ## Checked in $/Projects/Common/AppUtils ## Comment: Fixed location of output .lib/.bsc files. ## # if(/^\*{17} +Version (\d+) +\*{17}/) { if(/^\*{17}/) { $comment =~ s/^(Label comment|Comment): //; $comment =~ s/\n\s+$/\n/; # strip trailing blank lines pushback($_); return $comment; } $comment .= $_; } $comment =~ s/^Comment: //; return $comment; } # functions to read from SS with pushback my @linebuffer; sub myread { my $line; $line = pop @linebuffer; if (! defined($line)) { $line = ; print LOGFILE "${line}" if defined($line) && $convert::debug_level > 1; } return $line; } sub pushback { push @linebuffer,shift; }