#! /usr/bin/perl -w use strict; =head1 Notices Originally developed for Perforce by VIZIM (www.vizim.com) Copyright (c) 2014 Perforce Software, Inc. and VIZIM Worldwide, Inc. All rights reserved. Please see LICENSE.txt in top-level folder of this distribution for license information =cut use v5.14.0; # Earliest version testing was performed against use File::Copy; use File::Path; use File::Basename; use File::Spec::Functions qw(rel2abs catfile); use IO::Handle; use Time::Local; use Getopt::Long; my $APPNAME = 'TFSHistory.pl'; my $versionMajor = 7; my $versionMinor = '03'; my $APPWHAT = "TFS repository history extract; Version $versionMajor.$versionMinor"; my $APPNOTICES = "Copyright (c) 2014 Perforce Software, Inc. and VIZIM Worldwide, Inc. All rights reserved. See LICENSE.txt for license information."; =head1 TFS extract tool This tool extracts history information from a TFS repository. =cut =head2 Operational Context The extraction runs from a TFS workspace that has the collection to be extracted as its operational context. =cut #################### # # Program constants and globals # #################### $| = 1; # force STDOUT to keep logs up to date in case of failure/ abort =head2 Stop file A clean, coordinated stop of an extraction is accomplished by creating a stop file in the directory of execution. Existence of the stop file is the important characteristic. Content of the stop file, if any, is ignored. Extraction will not be initiated if the stop file exists. The full path to the stop file is identified in the first few lines of output from this tool. =cut my $pathStopFile = rel2abs( catfile( '.', 'History.stop' ) ); =head2 Report Access mode Normally, access errors terminate extraction processing to allow restart of the extraction process. This is appropriate as TFS servers have been observed to fail when presented with a long series of historic requests. There is also a known 2012 TFS access error invovlving user GUIDs. The Report Access mode provides a mechanism for the extraction to identify corrupt user GUIDs that would impact the import. Extractions with access errors are not useable for import. To assure that extractions with access errors are not used by generation processing, an ACCESS tag is added to the extraction. As an invalid tag, ACCESS will cause generation processing to fail. =cut my $modeAccessReport = 0; my $reportAccessCounter = 0; =head1 Progress Tracking Extraction often requires hours of elapsed time to complete. Progress tracking provides both a heart-beat and a completion estimate. The primary factor influencing extraction time is the number of TFS requests required to extract information for a changeset. Each changeset is one request. The number of files involved is not a significant factor to extraction. =cut my $progressEvery = 50; my $historyFilename = 'history.raw'; my $hHistory = undef; #################### # # Messaging support # #################### my $pathLog = ''; my $hLog = undef; my $ifERRORReported = 0; my $errorContext = undef; sub Msg($) { print "$_[0]\n"; print $hLog "$_[0]\n" if defined $hLog; } sub MsgERROR($) { print STDERR "$errorContext\n" if defined $errorContext; print STDERR "***" . $_[0] . "***\n"; print $hLog "$errorContext\n" if defined $errorContext && defined $hLog; print $hLog "***" . $_[0] . "***\n" if defined $hLog; $errorContext = undef; $ifERRORReported = 1; return 1; } sub MsgERRORResponse($@) { my ($error, @results) = @_; MsgERROR( $error ); Msg( "Response details:" ); foreach my $line (@results) { chomp $line; Msg( ".. $line" ); } } sub CoordinateExit(;$) { my $exitCode = $_[0]; $exitCode = $ifERRORReported if ! defined $exitCode; $errorContext = undef; Msg( "\n" ); MsgERROR( "Processing terminated by errors" ) if $exitCode != 0; Msg( ">>> Processing completed without errors" ) if $exitCode == 0; close $hLog if defined $hLog; close $hHistory if defined $hHistory; exit( $exitCode ); } sub OptionUsage(;$) { my $errorMessage = $_[0]; MsgERROR( $errorMessage ) if defined $errorMessage; print "$APPWHAT $APPNOTICES Usage: $APPNAME -V $APPNAME [-h | -?] $APPNAME [options] FIRST [ LAST ] Options: -log LOG - Create a copy of all message output in the log file LOG. Default is no output to STDOUT only. -progress COUNT - Issue progress message every COUNT changeset extractions. COUNT defaults to $progressEvery. 0 is no progress tracking. -rawhistory RAWHISTORY - Generate raw history information into the file RAWHISTORY. Error if RAWHISTORY exists (to avoid loss due to overwrite). If not specified, RAWHISTORY is $historyFilename. -accessreport - Access problems are reported but not as errors. Normally access problems are reported as errors. Arguments: FIRST - First changeset to start extraction from. Must be greater than 0. LAST - Last changeset to extract. If not specified, the last changeset known. Must be larger than or equal to FIRST. Adjusted to be the last known changeset if specified value is greater than the last known changeset. "; exit 0; } sub OptionVersion() { print "$APPWHAT\n"; exit 0; } #################### # # Progress tracking # #################### my $progressEventTotal = 0; # total progress events my $progressTimeStart = undef; # time() at start of progress tracking my $progressTimeLast = undef; # time() as of last progress report my @progressTimes = (); # seconds per unit during last reporting interval my $progressFullHistory = 0; # number of progress events to fill history buffer my $PROGRESS_IDX_FIRST = 0; my $PROGRESS_IDX_LAST = 9; my $PROGRESS_IDX_COUNT = ($PROGRESS_IDX_LAST - $PROGRESS_IDX_FIRST) + 1; my $progressIDX = $PROGRESS_IDX_FIRST; sub TimeHHMMSS($) { return sprintf "%2d:%02d:%02d", $_[0]/3600, ($_[0]/60) % 60, $_[0] % 60; } sub ProgressInit() { my $idx; for( $idx = $PROGRESS_IDX_FIRST; $idx <= $PROGRESS_IDX_LAST; ++$idx ) { $progressTimes[$idx] = 0; } $progressFullHistory = $PROGRESS_IDX_COUNT * $progressEvery; $progressEventTotal = 0; $progressTimeStart = time(); $progressTimeLast = $progressTimeStart; if( $progressEvery <= 0 ) { Msg( "No progress tracking" ); } else { Msg( "Progress tracking every $progressEvery changesets" ); } } sub Progress($$) { my ($progressThis, $progressLast) = @_; return if $progressEvery <= 0; ++$progressEventTotal; if( ($progressThis == $progressLast) || ($progressEventTotal % $progressEvery == 0) ) { my $now = time(); $progressTimes[$progressIDX] = $now - $progressTimeLast; ++$progressIDX; $progressIDX = $PROGRESS_IDX_FIRST if $progressIDX > $PROGRESS_IDX_LAST; my $historicTime = 0; for( my $idx = $PROGRESS_IDX_FIRST; $idx <= $PROGRESS_IDX_LAST; ++$idx ) { $historicTime += $progressTimes[$idx]; } my $events = $progressEventTotal >= $progressFullHistory ? $progressFullHistory : $progressEventTotal; my $remaining = $progressLast - $progressThis; $remaining = int( $remaining * ($historicTime / $events) ); =head2 Progress Output Progress output uses the format: NN of LL Last HHMMSS Elapsed HHMMSS Remaining HHMMSS NN is the most recent changeset processed and LL is the last changeset to process. HHMMSS is a time value. Last is the time required to complete the most recent processing, Elapsed is the time since start of processing, and Remaining is an estimate of the time required to complete extraction. =cut Msg( sprintf "%6d of %6d Last %s Elapsed %s Remaining %s", $progressThis, $progressLast, TimeHHMMSS( $now - $progressTimeLast ), TimeHHMMSS( $now - $progressTimeStart ), TimeHHMMSS( $remaining ) ); $progressTimeLast = $now; } } sub ProgressLast() { } #################### # # General utility functions. # #################### sub utilAssurePathFile($) { my $FilePath = $_[0]; my ($name, $Dir, $suffix) = fileparse( $FilePath, (qr(\.[^\.]+),qr(\.))); unless( -e $Dir ) { mkpath( $Dir, 0, 0777 ); } } sub XMLTextEncode($) { my $Text = $_[0]; return '' unless defined $Text; # safety $Text =~ s/\&/\&/g; # must be first encode $Text =~ s/\>/\>/g; $Text =~ s/\\&1`; my $exitCode = $? >> 8; MsgERRORResponse( "tf exit code $exitCode was not expected $exitGood", @results ) if $exitCode != 0 && $exitCode != $exitGood; return ($exitCode, @results); } sub EstablishFirstLast() { my ($first, $last) = (undef, undef); my ($exitCode, @results) = RunTFCommand( "changeset /latest /noprompt" ); return (undef, undef) if $exitCode != 0; my $knownLast = 0; foreach my $line (@results) { if( $line =~ m!^Changeset\: (\d+)! ) { $knownLast = $1; last; } } if( $knownLast == 0 ) { MsgERRORResponse( "Can't establish last known changeset", @results ); return (undef, undef); } Msg( "Last known changeset: $knownLast" ); if( scalar @ARGV == 1 ) { $first = $ARGV[0]; $last = $knownLast; } else { $first = $ARGV[0]; $last = $ARGV[1]; if( $last > $knownLast ) { Msg( "LAST limited to $knownLast" ); $last = $knownLast; } } MsgERROR( "FIRST must be >= 1" ) if $first < 1; MsgERROR( "LAST must be >= 1" ) if $last < 1; MsgERROR( "FIRST ($first) can not be greater than LAST ($last)" ) if $first > $last; return $ifERRORReported ? (undef, undef) : ($first, $last); } my %actions = (); my %changesetKeywords = (); sub ExtractionSummary() { my $now = time(); print $hHistory "\n" if $modeAccessReport && $reportAccessCounter > 0; print $hHistory "\n"; } =head1 Extraction Information Extraction information is encoded using XML style tags and constructs. However, the extraction information is not intended to be a valid XML document. Extracted information is fronted by a TFSEXTRACT tag. The tag has a version attribute indicating the version of this script that created the extract. Unless otherwise stated, all values are XML encodings of values from the TFS output. Generation provides manipulation of these values if appropriate. =cut my %mon2num = qw( jan 1 feb 2 mar 3 apr 4 may 5 jun 6 jul 7 aug 8 sep 9 oct 10 nov 11 dec 12 ); sub TFSDateAsUTC($) { my $TFSDate = $_[0]; my ($day, $month, $mday, $year, $hour, $min, $sec, $AMPM ) = ($1, $2, $3, $4, $5, $6, $7, $8) if $TFSDate =~ m!(\S+),\s+(\S+)\s+(\d+),\s+(\d+)\s+(\d+)\:(\d+)\:(\d+)\s+(\S+)!; if( ! defined $day ) { MsgERROR( "Don't recognized date: '$TFSDate'" ); return "? $TFSDate"; } $month = $mon2num{ lc substr($month, 0, 3) }; $hour = 0 if $AMPM eq 'AM' && $hour == 12; $hour += 12 if $AMPM eq 'PM' && $hour != 12; my $timeUTC = timelocal( $sec, $min, $hour, $mday, $month - 1 , $year - 1900 ); ($sec, $min, $hour, $mday, $month, $year) = gmtime($timeUTC); =head2 Action Time Action time is represented by the UTC of the extracted date-time followed by a YEAR.MON.DAY:HOUR:MIN:SEC of the UTC date-time. The YEAR.MON information is provided for convenience and to help users avoid context relative conversion errors. The UTC conversion assumes that the date-time from TFS is the local version of an internal UTC value. The import needs a UTC value. An operational timezone dependence is created between the extraction and other phases if the UTC conversion is done at other stages of the import process. The extracted TFS date-time is not included in the extracted information. =cut return sprintf "%d %04d.%02d.%02d:%02d:%02d:%02d", $timeUTC, $year+1900, $month+1, $mday, $hour, $min, $sec; } sub Extract($$) { my ($first, $last) = @_; print $hHistory "\n"; while( ! $ifERRORReported && $first <= $last ) { $errorContext = "Changeset $first description"; if( -e $pathStopFile ) { MsgERROR( "Stop file encountered" ); last; } my $context = "Changeset $first access failure"; my ($exitCode, @results) = RunTFCommand( "changeset $first /noprompt", 100 ); if( $modeAccessReport && $exitCode == 100 ) { ++$reportAccessCounter; print $hHistory "\n"; Msg( $context ); Progress( $first, $last ); ++$first; next; } elsif( $exitCode != 0 ) { MsgERROR( $context ); last; } my ($changeset, $user, $date) = ('', '', ''); my ($comment, $other, $blanks) = ('', '', ''); my @items = (); my $isComment = 0; my $isItem = 0; my $isOther = 0; foreach my $line (@results) { chomp $line; # Lines that start with a non-blank character are usually keywords. # A select set of keywords significant to the extract have # specific identification and processing. All other lines are # collectively part of what is called the other. if( $line =~ m!^Changeset\: (.+)! ) { $changeset = $1; $isComment = 0; $isItem = 0; $isOther = 0; } elsif( $line =~ m!^User\: (.+)$! ) { $user = $1; $isComment = 0; $isItem = 0; $isOther = 0; } elsif( $line =~ m!^Date\: (.+)$! ) { $date = TFSDateAsUTC( $1 ); $isComment = 0; $isItem = 0; $isOther = 0; } elsif( $line =~ m!^Comment\:$! ) { $isComment = 1; $isItem = 0; $isOther = 0; $blanks = ''; } elsif( $line =~ m!^Items\:$! ) { $isComment = 0; $isItem = 1; $isOther = 0; $blanks = ''; } elsif( $line =~ m!^(\S.+)\:$! ) { my $otherKeyword = $1; ++$changesetKeywords{$otherKeyword}; $isComment = 0; $isItem = 0; $isOther = 1; $other .= "$blanks" if $other ne ''; $other .= "$otherKeyword:\n"; $blanks = ''; # Values are established by the context of a keyword. Anything outside # a keyword context is an error that needs to be reported. } elsif( $line =~ m!^\s*$! ) { $blanks .= "\n"; } elsif( $isComment && $line ne '' ) { $comment .= "\n" if $comment ne ''; $comment .= "$blanks$line"; $blanks = ''; } elsif( $isOther && $line ne '' ) { $other .= "\n" if $other ne ''; $other .= "$blanks$line"; $blanks = ''; } elsif( $isItem && $line =~ m!^\s+([^\$]+)(\$.+)$! ) { my ($action, $reference) = ($1, $2); $action =~ s!\s+$!!; ++$actions{$action}; push @items, { 'action' => $action, 'reference' => $reference }; } else { MsgERROR( "Don't understand value line '$line'" ); last; } } =head2 Changeset Start The start of information associated with a changeset is specified by a CSS (ChangeSet Start) tag. The CSS tag has attributes that describe the changeset 'number', 'user', and 'date'. COMMENT and NOTE tags are values of the CSS tag. The COMMENT values are associated with the 'Comment:' keyword. Other keywords and their value lines are collectively the value of the NOTE tag. =cut print $hHistory "\n"; print $hHistory " " . XMLTextEncode( $comment ) . "\n"; print $hHistory " " . XMLTextEncode( $other ) . "\n" if $other ne ''; =head2 Changeset Items Each item associated with a changeset is represented by an ITEM tag within the extraction. The ITEM tag has attributes describing the 'action' and 'reference' of the TFS item. ITEMs may reference either a directory or a file. Item type does not impact generation so the extraction does not spend time establishing the type of the ITEM's reference. =cut foreach my $item (@items) { if( -e $pathStopFile ) { MsgERROR( "Stop file encountered" ); last; } print $hHistory " {action}\" reference\=\"$item->{reference}\"/>\n"; } =head2 Changeset End The end of items associated with the most previous CSS tag is indicated by a CSE (ChangeSet End) tag. =cut print $hHistory "\n"; Progress( $first, $last ); ++$first; } } #################### # # Main processing point # #################### my $optHelp = 0; my $optVersion = 0; Getopt::Long::Configure( "auto_abbrev", "no_ignore_case" ); OptionUsage( "Invalid specification" ) unless( GetOptions( "help|?" => \$optHelp, "log=s" => \$pathLog, "accessreport" => \$modeAccessReport, "progress=i" => \$progressEvery, "rawhistory=s" => \$historyFilename, "Version" => \$optVersion ) ); # Help and version are one description and we're done OptionVersion() if $optVersion; OptionUsage() if $optHelp || scalar @ARGV == 0; # Must be one or two arguments. 0 has already been covered above. OptionUsage( "Don't understand arguments" ) if( scalar @ARGV > 2 ); # Establish a log if( $pathLog ne '' ) { $pathLog = rel2abs( $pathLog ); utilAssurePathFile( $pathLog ); Msg("Activity logged in: $pathLog"); open $hLog, ">", $pathLog; } # Identify the processing. Msg( $APPWHAT ); # Verify the TFS context and establish the changeset range. my ($first, $last) = EstablishFirstLast(); CoordinateExit() unless defined $first; Msg( "History for changeset $first" ) if $first == $last; Msg( "History for changesets $first to $last" ) if $first != $last; Msg( "Extraction stop file: $pathStopFile" ); my $historyPath = rel2abs( $historyFilename ); Msg( "Extracted history information to: $historyPath" ); if( -e $historyPath ) { MsgERROR( "History file exists '$historyFilename'" ); CoordinateExit( ); } else { utilAssurePathFile( $historyPath ); open $hHistory, '>', $historyPath; $hHistory->autoflush(1); print $hHistory "\n"; } ProgressInit(); Extract( $first, $last ); ExtractionSummary(); ProgressLast(); CoordinateExit();