#!/usr/bin/perl -w # # VSS to Perforce converter, phase I: extract metadata # # Copyright 1998 Perforce Software. All rights reserved. # Written by James Strickland, April 1998 # # This script parses the output of "ss properties -R root_project -O@tempfile" # to generate a list of files and their file types (including "tempobj" # for files which only have the last version stored). Each file history # is then obtained with "ss history filename". # # Note: there is *no* way to obtain the history of files which are currently # deleted! The only workaround would seem to be to "ss recover" the files # before doing the conversion, then "p4 delete" them afterward. This converter # does not attempt to do that. # # It is necessary to do things this way because of the following VSS quirks: # - some revisions aren't really revisions, they're just label operations; # ss history -R output for revisions which are labels doesn't include # the project they're in and hence is ambiguous # - ss history -R does not output any information about version 1 of a file # - the only way to discover the type of a file is with the "ss filetype" # or "ss properties" command # - running "ss filetype -R" is much faster than running "ss properties -R" # but it's missing one crucial bit of information: Store only latest version # Without this the converter will try to include file revisions it can't # get, and there is no way of knowing whether the failure is due to # storing only the latest version or that SourceSafe has lost the version. # - without using -O@ SourceSafe's output is ambigous - it word wraps filenames # and if the wrap occurs at a space it does not output the space # # # The output of ss history consists of a header which specifies the filename, # then a series of multi-line data blocks starting with the line # ***************** Version n ***************** # # The rest of the lines in the data block are allowed to occur in any order, # except for lines starting with "Comment:" or "Label comment:". These must # always appear last, and are terminated by a blank line followed by the start # of another data block. require 5.0; use strict; use integer; use Time::Local; use lib '.'; use convert; use Change; convert::emkdir($convert::metadata_dir); # ensure the metadata directory exists my $msg="can't open"; open(FILES, ">$convert::metadata_dir/files") or die $msg; # Parse the output of "ss properties" - we use a really simple scheme here # which relies solely on the fields "File:", "Type:" and # "Store only latest version:" always appearing, and appearing in that order. # Note that filenames with spaces are no problem - they are, in effect, # quoted by :\s+ on the left and \n on the right. Possible values for # "Type" are "Binary" and "Text", which translates nicely to Perforce's # "binary" and "text". Possible values for "Store only latest version:" # are "Yes" and "No". # Modified to allow more than one VSS root dir to be specified my $cmd; unlink("$convert::metadata_dir/ssproperties"); # otherwise VSS appends to it if ($convert::root =~ /|/) { my $rt; foreach $rt (split(/\|/, $convert::root)) { # Append to the output $cmd = "ss properties -R $rt -O\@$convert::metadata_dir/ssproperties $convert::ss_options"; system($cmd); } } else { $cmd = "ss properties -R $convert::root -O\@$convert::metadata_dir/ssproperties $convert::ss_options"; system($cmd); } open(LOGFILE,">>logfile") or die "can't open logfile: $!"; print LOGFILE "\n\nReading from: $cmd\n"; open(SS,"<$convert::metadata_dir/ssproperties"); my ($file,$type); FILE_OR_PROJECT: while( $_=myread() ) { chomp; s/\r$//; my ($field,$value) = split(/:\s*/,$_,2); next unless defined($value); if( $field eq "File") { $file = $value; if( $file =~ /[\000-\031@#%\*]/ || $file =~ /\.\.\./ ) { die "..., @, #, %, * or unprintable characters not allowed in filename '$file' - sorry, rename it and start over.\n"; } } elsif( $field eq "Type") { $type=lc($value); } elsif( $field eq "Store only latest version") { die "parse error" if(!defined($file)||!defined($type)); if($value eq "Yes") { $type="tempobj"; } $file =~ s/^\s+//; print FILES "$type $file\n"; undef $file; undef $type; } } print LOGFILE "\n\nDone with: $cmd\n"; close(FILES); open(FILES, "<$convert::metadata_dir/files") or die $msg; # for input open(LABELS, ">$convert::metadata_dir/labels") or die $msg; open(CHANGES, ">$convert::metadata_dir/changes.ns") or die $msg; # ns for "not sorted" unlink("$convert::metadata_dir/changes"); # get rid of any existing change file to avoid confusion while(<FILES>) { my ($type,$file) = split(/ /,$_,2); chomp($file); read_history($file); } close(LOGFILE); my (%label_comments, %label_warned); my (%label_timestamp); sub read_history # expects open file descriptors CHANGES, LABELS, BRANCHES { my $file = shift; my $cmd = "ss history \"$file\" $convert::ss_options"; print LOGFILE "\n\nReading from: $cmd\n"; open(SS,"$cmd |"); read_comment(); # ignore everything up to the first data block # (previously I verified that it started with "History of $file ..." # except that MS word wraps long filenames with spaces so just hang it) my (@pending_labels); ITEM: while($_=myread()) { my ($op,$version,$user,$timestamp,$label,$comment); if(/^\*{17}( +Version (\d+) +\*{17})?/) { my ($version,$user,$timestamp,$label,$comment); $version = $2; $_=myread(); if(/^Label: "(.+)"$/) { $label = $1; $_=myread(); } ($user,$timestamp) = parse_user_and_timestamp($_); $_=myread(); if(/Labeled/) { # this revision isn't really a revision - it's just a label my $before = $label; # label transformations without a warning # labels must not contain umlauts $label =~ s/\204/ae/g; $label =~ s/\224/oe/g; $label =~ s/\201/ue/g; # delete contigous exclamation marks and append "[local] instead" if ($label =~ s/ *!!+ *//g) { $label .= "_[local]"; } # replace spaces within labels silently as they appear in almost every label $label =~ s/ /_/g; # replace more special characters silently $label =~ s@/@-@g; $label =~ tr/()/[]/; # label transformations with a warning # replace special characters if( $label =~ s%[\000-\031 @#&,<>()'!\%\*/'\\]%_%g && !$label_warned{$label}) { print "label \"$before\" changed to \"$label\"\n"; $label_warned{$label} = 1; } if( $label =~ s/\.\.\./_/g && !$label_warned{$label} ) { print "Removed ellipsis (...) in \"$label\"\n"; $label_warned{$label} = 1; } if( $label =~ /^\d+$/ ) { $label = "_$label"; print "Underscore prepended to all numeric label \"$label\"\n"; $label_warned{$label} = 1; } # Prefix label if specified $label = $convert::label_prefix . $label; push @pending_labels,$label; # assume all the comments for a particular label are the same... $comment = read_comment(); $label_comments{$label} = $comment if ! defined($label_comments{$label}); $label_timestamp{$label} = $timestamp if ! defined($label_timestamp{$label}); next ITEM; } # this is a "real" revision - either "Checked in projectname" or "Branched" $comment = read_comment(); my $change = new Change( { 'timestamp' => $timestamp, 'author' => $user, 'change_description' => $comment, 'changelist' => [ "$version $file" ] } ); $change->put(\*CHANGES); while(@pending_labels) { $label = pop(@pending_labels); print LABELS "$version $label $label_timestamp{$label} $file\n"; # use 10 *'s as end of comments marker print LABELS "$label_comments{$label}**********\n"; # only need to spit out the comment the first time... $label_comments{$label} = ""; } next ITEM; } else { die "parsing messed up on '$_'\n"; } } } sub parse_user_and_timestamp { $_=shift; # # Swiss-Format: User: <username> Date: dd.mm.yy Time: hh:mm # if ( ! m@^User: (.*)\b\s+Date:\s+(\d+).(\d+).(\d+)\s+Time:\s+(\d+):(\d+)@ ) # US-Format: User: <username> Date: mm/dd/yy Time: hh:mm[ap] if ( ! m@^User: (.*)\b\s+Date:\s+(\d+)/(\d+)/(\d+)\s+Time:\s+(\d+):(\d+)([ap])@ ) { # 2002-03-04/ps # A problem here most likely indicates a row of asterisks in a # comment. Probably the regexps in this file with "\*{17}" should be # changed to fix this, but I have no more time to do _and_ test # it... print "timestamp problem: $_\n"; die "can't parse timestamp" ; } my $user = $1; $user=lc($user) if($convert::lowercase_usernames); # timelocal takes second, minute, hour, day, month, year # in the range 0..59, 0..59, 0..23, 1..31, 0..11, 0..99 # The two digit year has assumptions made about it such that # any time before 2037 (when the 32-bit seconds-since-1970 time # will run out) is handled correctly. i.e. 97 -> 1997, 1 -> 2001 # # swiss german time settings # my $timestamp=timelocal(0, $6, $5, $2, $3 -1, $4); # U.S. time settings my $hour = $5 % 12; if($7 eq 'p') { $hour += 12; } my $timestamp=timelocal(0, $6, $hour, $3, $2 -1, $4); # convert user names because they must not contain umlauts $user =~ s/\204/ae/g; $user =~ s/\224/oe/g; $user =~ s/\201/ue/g; # # nifty table for more complicated username conversion (if desired) # my %usertable = ( "user_j" => "joe.user", # "developer_d" => "donald.developer", # ); # if ( !exists $usertable{$user} ) { # print "unknown user $user\n"; # } else { # $user = $usertable{$user}; # } return ($user,$timestamp); } sub read_comment { my $comment=""; while($_=myread()) { # convert a few umlauts from DOS-Codepage to Latin-1 # $_ =~ tr/\204\224\201\216\231\232\341//; # SAB 980503 - Comment can be terminated either by a new version # banner or by a Label separator... ## ***************** Version 28 ***************** ## User: User1 Date: 3/19/98 Time: 2:16p ## Checked in $/Projects/Common/AppUtils ## Comment: setup CoffFormat build configuration ## ## ********************** ## Label: "demo #1" ## User: User2 Date: 3/16/98 Time: 4:23a ## Labeled ## Label comment: Proposed demo. ## ## ********************** ## Label: "demo." ## User: User2 Date: 3/13/98 Time: 2:35a ## Labeled ## Label comment: Project ready for demo. ## ## ***************** Version 27 ***************** ## User: User2 Date: 2/17/98 Time: 10:27p ## Checked in $/Projects/Common/AppUtils ## Comment: Fixed location of output .lib/.bsc files. ## # if(/^\*{17} +Version (\d+) +\*{17}/) { if(/^\*{17}/) { $comment =~ s/^(Label comment|Comment): //; $comment =~ s/\n\s+$/\n/; # strip trailing blank lines pushback($_); return $comment; } $comment .= $_; } $comment =~ s/^Comment: //; return $comment; } # functions to read from SS with pushback my @linebuffer; sub myread { my $line; $line = pop @linebuffer; if (! defined($line)) { $line = <SS>; print LOGFILE "${line}" if defined($line) && $convert::debug_level > 1; } return $line; } sub pushback { push @linebuffer,shift; }
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#3 | 1513 | Peter Steiner |
- writing to the logfile in verify.pl didn't work - convert.pm: get_vss_file failed sometimes (when there was a file in the working directory with the same name as one in the VSS archive) - the timestamp of a label is now set from the VSS label timestamp - files can be added using perforce typemaps for determining the file type (though this is disabled because it is likely to break verify.pl when the revisions are compared using different keyword expansion methods...). See convert::typemap_regexp - new option lowercase_extensions (similar to lowercase_filenames) - option convert::perform_verify (no more need to press a key during nightly conversion tests) - label name transformations to cope better with special character restrictions. You probably want to adjust for your own purposes; see readhist.pl line 169ff. - output of convert.pl adorned with timestamps - changes for Swiss time format are prepared (in the comments of readhist.pl), though this probably should be done with specifying a regexp in the config file - various fixes when using german umlauts, especially in Windows login names propagated to VSS, in labels and comments |
||
#2 | 1512 | Peter Steiner |
merged the changes from the "official" perforce version from 2001-12-21, available from ftp://ftp.perforce.com/pub/perforce/r01.1/tools/convert/vss/vsstop4.zip (this should have been in changelist 1511 too) |
||
#1 | 1510 | Peter Steiner | integrated from Roberts vsstop4 branch | ||
//guest/robert_cowham/perforce/utils/vsstop4/readhist.pl | |||||
#1 | 237 | Robert Cowham |
Improved version of vsstop4. Makes life easier if importing into a depot which already contains stuff you want to keep. Also handles other people updating the depot at the same time. See changes labelled RHGC, and new items in config file. |