#! /usr/bin/perl -w
use strict;
=head1 Notices
Originally developed for Perforce by VIZIM (www.vizim.com)
Copyright (c) 2014 Perforce Software, Inc. and VIZIM Worldwide, Inc.
All rights reserved.
Please see LICENSE.txt in top-level folder of this distribution for
license information
=cut
use v5.14.0; # Earliest version testing was performed against
use File::Copy;
use File::Path;
use File::Basename;
use File::Spec::Functions qw(rel2abs catfile);
use IO::Handle;
use Time::Local;
use Getopt::Long;
my $APPNAME = 'TFSHistory.pl';
my $versionMajor = 7;
my $versionMinor = '07';
my $APPWHAT = "TFS repository history extract; Version $versionMajor.$versionMinor";
my $APPNOTICES = "Copyright (c) 2014 Perforce Software, Inc. and VIZIM Worldwide, Inc.
All rights reserved.
See LICENSE.txt for license information.";
=head1 TFS extract tool
This tool extracts history information from a TFS repository.
=cut
=head2 Operational Context
The extraction runs from a TFS workspace that has the collection
to be extracted as its operational context.
=cut
####################
#
# Program constants and globals
#
####################
$| = 1; # force STDOUT to keep logs up to date in case of failure/ abort
=head2 Stop file
A clean, coordinated stop of an extraction is accomplished by creating a stop
file in the directory of execution.
Existence of the stop file is the important characteristic.
Content of the stop file, if any, is ignored.
Extraction will not be initiated if the stop file exists.
The full path to the stop file is identified in the first few lines of output
from this tool.
=cut
my $pathStopFile = rel2abs( catfile( '.', 'History.stop' ) );
=head2 Report Access mode
Normally, access errors terminate extraction processing to allow restart
of the extraction process.
This is appropriate as TFS servers have been observed to fail when presented
with a long series of historic requests.
There is also a known 2012 TFS access error invovlving user GUIDs.
The Report Access mode provides a mechanism for the extraction to identify
corrupt user GUIDs that would impact the import.
Extractions with access errors are not useable for import.
To assure that extractions with access errors are not used by generation
processing, an ACCESS tag is added to the extraction.
As an invalid tag, ACCESS will cause generation processing to fail.
=cut
my $modeAccessReport = 0;
my $reportAccessCounter = 0;
=head1 Progress Tracking
Extraction often requires hours of elapsed time to complete.
Progress tracking provides both a heart-beat and a completion estimate.
The primary factor influencing extraction time is the number of TFS requests
required to extract information for a changeset.
Each changeset is one request.
The number of files involved is not a significant factor to extraction.
=cut
my $progressEvery = 50;
my $historyFilename = 'history.raw';
my $hHistory = undef;
####################
#
# Messaging support
#
####################
my $pathLog = '';
my $hLog = undef;
my $ifERRORReported = 0;
my $errorContext = undef;
sub Msg($)
{
print "$_[0]\n";
print $hLog "$_[0]\n" if defined $hLog;
}
sub MsgERROR($)
{
print STDERR "$errorContext\n" if defined $errorContext;
print STDERR "***" . $_[0] . "***\n";
print $hLog "$errorContext\n" if defined $errorContext && defined $hLog;
print $hLog "***" . $_[0] . "***\n" if defined $hLog;
$errorContext = undef;
$ifERRORReported = 1;
return 1;
}
sub MsgERRORResponse($@)
{
my ($error, @results) = @_;
MsgERROR( $error );
Msg( "Response details:" );
foreach my $line (@results) {
chomp $line;
Msg( ".. $line" );
}
}
sub CoordinateExit(;$)
{
my $exitCode = $_[0];
$exitCode = $ifERRORReported if ! defined $exitCode;
$errorContext = undef;
Msg( "\n" );
MsgERROR( "Processing terminated by errors" ) if $exitCode != 0;
Msg( ">>> Processing completed without errors" ) if $exitCode == 0;
close $hLog if defined $hLog;
close $hHistory if defined $hHistory;
exit( $exitCode );
}
sub OptionUsage(;$)
{
my $errorMessage = $_[0];
MsgERROR( $errorMessage ) if defined $errorMessage;
print "$APPWHAT
$APPNOTICES
Usage:
$APPNAME -V
$APPNAME [-h | -?]
$APPNAME [options] FIRST [ LAST ]
Options:
-log LOG - Create a copy of all message output in the log file LOG.
Default is output to STDOUT only.
-progress COUNT - Issue progress message every COUNT changeset
extractions. COUNT defaults to $progressEvery. 0 is no progress
tracking.
-rawhistory RAWHISTORY - Generate raw history information into the file
RAWHISTORY. Error if RAWHISTORY exists (to avoid loss due to
overwrite). If not specified, RAWHISTORY is $historyFilename.
-accessreport - Access problems are reported but not as errors. Normally
access problems are reported as errors.
Arguments:
FIRST - First changeset to start extraction from. Must be greater than 0.
LAST - Last changeset to extract. If not specified, the last changeset
known. Must be larger than or equal to FIRST. Adjusted to be the
last known changeset if specified value is greater than the last
known changeset.
";
exit 0;
}
sub OptionVersion()
{
print "$APPWHAT\n";
exit 0;
}
####################
#
# Progress tracking
#
####################
my $progressEventTotal = 0; # total progress events
my $progressTimeStart = undef; # time() at start of progress tracking
my $progressTimeLast = undef; # time() as of last progress report
my @progressTimes = (); # seconds per unit during last reporting interval
my $progressFullHistory = 0; # number of progress events to fill history buffer
my $PROGRESS_IDX_FIRST = 0;
my $PROGRESS_IDX_LAST = 9;
my $PROGRESS_IDX_COUNT = ($PROGRESS_IDX_LAST - $PROGRESS_IDX_FIRST) + 1;
my $progressIDX = $PROGRESS_IDX_FIRST;
sub TimeHHMMSS($)
{
return sprintf "%2d:%02d:%02d", $_[0]/3600, ($_[0]/60) % 60, $_[0] % 60;
}
sub ProgressInit()
{
my $idx;
for( $idx = $PROGRESS_IDX_FIRST; $idx <= $PROGRESS_IDX_LAST; ++$idx ) {
$progressTimes[$idx] = 0;
}
$progressFullHistory = $PROGRESS_IDX_COUNT * $progressEvery;
$progressEventTotal = 0;
$progressTimeStart = time();
$progressTimeLast = $progressTimeStart;
if( $progressEvery <= 0 ) {
Msg( "No progress tracking" );
} else {
Msg( "Progress tracking every $progressEvery changesets" );
}
}
sub Progress($$)
{
my ($progressThis, $progressLast) = @_;
return if $progressEvery <= 0;
++$progressEventTotal;
if( ($progressThis == $progressLast) || ($progressEventTotal % $progressEvery == 0) ) {
my $now = time();
$progressTimes[$progressIDX] = $now - $progressTimeLast;
++$progressIDX;
$progressIDX = $PROGRESS_IDX_FIRST if $progressIDX > $PROGRESS_IDX_LAST;
my $historicTime = 0;
for( my $idx = $PROGRESS_IDX_FIRST; $idx <= $PROGRESS_IDX_LAST; ++$idx ) {
$historicTime += $progressTimes[$idx];
}
my $events = $progressEventTotal >= $progressFullHistory
? $progressFullHistory
: $progressEventTotal;
my $remaining = $progressLast - $progressThis;
$remaining = int( $remaining * ($historicTime / $events) );
=head2 Progress Output
Progress output uses the format:
NN of LL Last HHMMSS Elapsed HHMMSS Remaining HHMMSS
NN is the most recent changeset processed and LL is the last changeset to
process.
HHMMSS is a time value. Last is the time required to complete the most recent
processing, Elapsed is the time since start of processing, and Remaining is an
estimate of the time required to complete extraction.
=cut
Msg( sprintf "%6d of %6d Last %s Elapsed %s Remaining %s",
$progressThis, $progressLast,
TimeHHMMSS( $now - $progressTimeLast ),
TimeHHMMSS( $now - $progressTimeStart ),
TimeHHMMSS( $remaining ) );
$progressTimeLast = $now;
}
}
sub ProgressLast()
{
}
####################
#
# General utility functions.
#
####################
sub utilAssurePathFile($)
{
my $FilePath = $_[0];
my ($name, $Dir, $suffix) = fileparse( $FilePath, (qr(\.[^\.]+),qr(\.)));
unless( -e $Dir ) {
mkpath( $Dir, 0, 0777 );
}
}
sub XMLTextEncode($)
{
my $Text = $_[0];
return '' unless defined $Text; # safety
$Text =~ s/\&/\&/g; # must be first encode
$Text =~ s/\>/\>/g;
$Text =~ s/\</\</g;
$Text =~ s/\'/\'/g;
$Text =~ s/\"/\"/g;
return $Text;
}
####################
#
# Extraction and extraction support
#
####################
sub RunTFCommand($;$)
{
my ($command, $exitGood) = @_;
$exitGood = 0 unless defined $exitGood;
return (0, ())
if $command eq '';
my @results = `tf $command 2\>\&1`;
my $exitCode = $? >> 8;
MsgERRORResponse( "tf exit code $exitCode was not expected $exitGood", @results )
if $exitCode != 0 && $exitCode != $exitGood;
return ($exitCode, @results);
}
sub EstablishFirstLast()
{
my ($first, $last) = (undef, undef);
my ($exitCode, @results) = RunTFCommand( "changeset /latest /noprompt" );
return (undef, undef) if $exitCode != 0;
my $knownLast = 0;
foreach my $line (@results) {
if( $line =~ m!^Changeset\: (\d+)! ) {
$knownLast = $1;
last;
}
}
if( $knownLast == 0 ) {
MsgERRORResponse( "Can't establish last known changeset", @results );
return (undef, undef);
}
Msg( "Last known changeset: $knownLast" );
if( scalar @ARGV == 1 ) {
$first = $ARGV[0];
$last = $knownLast;
} else {
$first = $ARGV[0];
$last = $ARGV[1];
if( $last > $knownLast ) {
Msg( "LAST limited to $knownLast" );
$last = $knownLast;
}
}
MsgERROR( "FIRST must be >= 1" ) if $first < 1;
MsgERROR( "LAST must be >= 1" ) if $last < 1;
MsgERROR( "FIRST ($first) can not be greater than LAST ($last)" )
if $first > $last;
return $ifERRORReported ? (undef, undef) : ($first, $last);
}
my %actions = ();
my %changesetKeywords = ();
sub ExtractionSummary()
{
my $now = time();
print $hHistory "<ACCESS count\=\"$reportAccessCounter\"\>\n"
if $modeAccessReport && $reportAccessCounter > 0;
print $hHistory "<!--\n";
print $hHistory "Extraction time: " . TimeHHMMSS( $now - $progressTimeStart ) . "\n";
print $hHistory " Changesets: $progressEventTotal\n";
print $hHistory "\nAction distribution:\n";
foreach my $key (sort keys %actions) {
print $hHistory sprintf " %6d %s\n", $actions{$key}, $key;
}
print $hHistory "\nChangeset other keyword distribution:\n";
foreach my $key (sort keys %changesetKeywords) {
print $hHistory sprintf " %6d %s\n", $changesetKeywords{$key}, $key;
}
print $hHistory "-->\n";
}
=head1 Extraction Information
Extraction information is encoded using XML style tags and constructs.
However, the extraction information is not intended to be a valid XML
document.
Extracted information is fronted by a TFSEXTRACT tag.
The tag has a version attribute indicating the version of this script that
created the extract.
Unless otherwise stated, all values are XML encodings of values
from the TFS output.
Generation provides manipulation of these values if appropriate.
=cut
=head2 Date Format
TFS 2010 appears to use the fixed date format:
MMM dd, yyyy hh:mm:ss AM/PM
As such, this is the lowest common denominator format.
TFS 2012 and later respond to the "date, time or number format" setting of
the local host.
The default date format for Win7 and Win8 clients is to prepend the name of
the day before the month (MMM) information.
A search was made but nothing was found relative to which source
(server or client) controls this format.
=over 6
=item NOTE
This is the one place where the TFS date/time format is significant.
From this point forward the import package uses an internal format that
is numeric.
This is the only place where alternative date/time format processing
is required during extract.
Using Perl DateTime module packages or other techniques to make date/time
recognition more generic is not desirable.
Date/time is critical to imports.
Flexibility in date/time recognition leads to highly undesirable false
positives.
Assume a fixed date/time format and reject anything else.
=back
=cut
my %mon2num = qw( jan 1 feb 2 mar 3 apr 4 may 5 jun 6 jul 7 aug 8 sep 9 oct 10 nov 11 dec 12 );
sub TFSDateAsUTC($)
{
my $TFSDate = $_[0];
my ($month, $mday, $year, $hour, $min, $sec, $AMPM) = (undef, 0, 0, 0, 0, 0, '');
($month, $mday, $year, $hour, $min, $sec, $AMPM) = ($1, $2, $3, $4, $5, $6, $7, $8)
if $TFSDate =~ m!(\S+)\s+(\d+),\s+(\d+)\s+(\d+)\:(\d+)\:(\d+)\s+(\S+)!;
if( ! defined $month ) {
MsgERROR( "Don't recognized date: '$TFSDate'" );
return "? $TFSDate";
}
$month = $mon2num{ lc substr($month, 0, 3) };
$hour = 0 if $AMPM eq 'AM' && $hour == 12;
$hour += 12 if $AMPM eq 'PM' && $hour != 12;
my $timeUTC = timelocal( $sec, $min, $hour, $mday, $month - 1 , $year - 1900 );
($sec, $min, $hour, $mday, $month, $year) = gmtime($timeUTC);
=head2 Action Time
Action time is represented by the UTC of the extracted date-time followed by a
YEAR.MON.DAY:HOUR:MIN:SEC of the UTC date-time.
The YEAR.MON information is provided for convenience and to help users avoid
context relative conversion errors.
The UTC conversion assumes that the date-time from TFS is the local
version of an internal UTC value. The import needs a UTC value.
An operational timezone dependence is created between the extraction and other
phases if the UTC conversion is done at other stages of the import process.
The extracted TFS date-time is not included in the extracted information.
=cut
return sprintf "%d %04d.%02d.%02d:%02d:%02d:%02d",
$timeUTC, $year+1900, $month+1, $mday, $hour, $min, $sec;
}
sub Extract($$)
{
my ($first, $last) = @_;
print $hHistory "<TFSEXTRACT version=\"$versionMajor\.$versionMinor\"/>\n";
while( ! $ifERRORReported && $first <= $last ) {
$errorContext = "Changeset $first description";
if( -e $pathStopFile ) {
MsgERROR( "Stop file encountered" );
last;
}
my $context = "Changeset $first access failure";
my ($exitCode, @results) = RunTFCommand( "changeset $first /noprompt", 100 );
if( $modeAccessReport && $exitCode == 100 ) {
++$reportAccessCounter;
print $hHistory "<!--- $context -->\n";
Msg( $context );
Progress( $first, $last );
++$first;
next;
} elsif( $exitCode != 0 ) {
MsgERROR( $context );
last;
}
my ($changeset, $user, $date) = ('', '', '');
my ($comment, $other, $blanks) = ('', '', '');
my @items = ();
my $isComment = 0;
my $isItem = 0;
my $isOther = 0;
foreach my $line (@results) {
chomp $line;
# Lines that start with a non-blank character are usually keywords.
# A select set of keywords significant to the extract have
# specific identification and processing. All other lines are
# collectively part of what is called the other.
#
# NOTE: The changeset, user, and date keywords always occur at the
# head of the information. Instances of these keywords in
# comment and other text have been encountered. Once they
# have been processed do not attempt to re-process them.
if( $line =~ m!^Changeset\: (.+)! && $changeset eq '' ) {
$changeset = $1;
$isComment = 0;
$isItem = 0;
$isOther = 0;
} elsif( $line =~ m!^User\: (.+)$! && $user eq '' ) {
$user = $1;
$isComment = 0;
$isItem = 0;
$isOther = 0;
} elsif( $line =~ m!^Date\: (.+)$! && $date eq '' ) {
$date = TFSDateAsUTC( $1 );
$isComment = 0;
$isItem = 0;
$isOther = 0;
} elsif( $line =~ m!^Comment\:$! ) {
$isComment = 1;
$isItem = 0;
$isOther = 0;
$blanks = '';
} elsif( $line =~ m!^Items\:$! ) {
$isComment = 0;
$isItem = 1;
$isOther = 0;
$blanks = '';
} elsif( $line =~ m!^(\S.+)\:$! ) {
my $otherKeyword = $1;
++$changesetKeywords{$otherKeyword};
$isComment = 0;
$isItem = 0;
$isOther = 1;
$other .= "$blanks"
if $other ne '';
$other .= "$otherKeyword:\n";
$blanks = '';
# Values are established by the context of a keyword. Anything outside
# a keyword context is an error that needs to be reported.
} elsif( $line =~ m!^\s*$! ) {
$blanks .= "\n";
} elsif( $isComment && $line ne '' ) {
$comment .= "\n" if $comment ne '';
$comment .= "$blanks$line";
$blanks = '';
} elsif( $isOther && $line ne '' ) {
$other .= "\n" if $other ne '';
$other .= "$blanks$line";
$blanks = '';
} elsif( $isItem && $line =~ m!^\s+([^\$]+)(\$.+)$! ) {
my ($action, $reference) = ($1, $2);
$action =~ s!\s+$!!;
++$actions{$action};
push @items, { 'action' => $action, 'reference' => $reference };
} else {
MsgERROR( "Don't understand value line '$line'" );
last;
}
}
=head2 Changeset Start
The start of information associated with a changeset is specified by a
CSS (ChangeSet Start) tag.
The CSS tag has attributes that describe the changeset 'number', 'user',
and 'date'.
COMMENT and NOTE tags are values of the CSS tag.
The COMMENT values are associated with the 'Comment:' keyword.
Other keywords and their value lines are collectively the value of the
NOTE tag.
=cut
print $hHistory "<CSS number\=\"$changeset\" user\=\"$user\" date\=\"$date\"/>\n";
print $hHistory " <COMMENT>"
. XMLTextEncode( $comment )
. "</COMMENT>\n";
print $hHistory " <NOTE>"
. XMLTextEncode( $other )
. "</NOTE>\n"
if $other ne '';
=head2 Changeset Items
Each item associated with a changeset is represented by an ITEM tag
within the extraction.
The ITEM tag has attributes describing the 'action' and 'reference'
of the TFS item.
ITEMs may reference either a directory or a file.
Item type does not impact generation so the extraction does not spend
time establishing the type of the ITEM's reference.
=cut
foreach my $item (@items) {
if( -e $pathStopFile ) {
MsgERROR( "Stop file encountered" );
last;
}
print $hHistory " <ITEM action\=\"$item->{action}\" reference\=\"$item->{reference}\"/>\n";
}
=head2 Changeset End
The end of items associated with the most previous CSS tag
is indicated by a CSE (ChangeSet End) tag.
=cut
print $hHistory "<CSE/>\n";
Progress( $first, $last );
++$first;
}
}
####################
#
# Main processing point
#
####################
my $optHelp = 0;
my $optVersion = 0;
Getopt::Long::Configure( "auto_abbrev", "no_ignore_case" );
OptionUsage( "Invalid specification" )
unless( GetOptions( "help|?" => \$optHelp,
"log=s" => \$pathLog,
"accessreport" => \$modeAccessReport,
"progress=i" => \$progressEvery,
"rawhistory=s" => \$historyFilename,
"Version" => \$optVersion
) );
# Help and version are one description and we're done
OptionVersion() if $optVersion;
OptionUsage() if $optHelp || scalar @ARGV == 0;
# Must be one or two arguments. 0 has already been covered above.
OptionUsage( "Don't understand arguments" )
if( scalar @ARGV > 2 );
# Establish a log
if( $pathLog ne '' ) {
$pathLog = rel2abs( $pathLog );
utilAssurePathFile( $pathLog );
Msg("Activity logged in: $pathLog");
open $hLog, ">", $pathLog;
}
# Identify the processing.
Msg( $APPWHAT );
# Verify the TFS context and establish the changeset range.
my ($first, $last) = EstablishFirstLast();
CoordinateExit() unless defined $first;
Msg( "History for changeset $first" ) if $first == $last;
Msg( "History for changesets $first to $last" ) if $first != $last;
Msg( "Extraction stop file: $pathStopFile" );
my $historyPath = rel2abs( $historyFilename );
Msg( "Extracted history information to: $historyPath" );
if( -e $historyPath ) {
MsgERROR( "History file exists '$historyFilename'" );
CoordinateExit( );
} else {
utilAssurePathFile( $historyPath );
open $hHistory, '>', $historyPath;
$hHistory->autoflush(1);
print $hHistory "<!---\nGenerated " . localtime() . " by $APPNAME\n -->\n";
}
ProgressInit();
Extract( $first, $last );
ExtractionSummary();
ProgressLast();
CoordinateExit();