#!/usr/bin/perl -w
#
# $Id: //guest/jeffery_g_smith/perforce/utils/sttop4/main/readhist.pl#8 $
#
# StarTeam to Perforce Converter, phase I: extract StarTeam metadata
#
# Copyright 1998 Perforce Software.  All rights reserved.
# Based on VSStoP4:
#   Written by James Strickland, April 1998
#   Maintained by Robert Cowham, since 2000
# Updated to support StarTeam conversions:
#   Jeffery G. Smith, MedPlus, Inc. 2004-2005
#
# The StarTeam respoitory is stored in a non-documented, binary format.  However, all
# the metadata (data about the revisions in the archive) is available via a COM
# interface supported by the StarTeam SDK.  This script extracts all useful metadata,
# using the COM API writing the result to files which
# contain no extraneous information and which are easy to parse.
#
# Of course, if Borland changes the COM SPI, this script may have to change to match it!

require 5.0;
use strict;
use integer;
use Time::Local;
use lib '.';
use convert;
use Change;
use File::Path;
use Win32::OLE;
use stcom;

convert::openlog('open');

StarTeamSDKinit();

mkpath($convert::metadata_dir); # ensure the metadata directory exists
mkpath("$convert::metadata_dir/labels"); # ensure the label dir exists
my $msg="can't open";
open(FILES,   ">$convert::metadata_dir/files")      or die $msg;

unlink("$convert::metadata_dir/ssproperties"); # otherwise VSS appends to it

# Modified to allow more than one VSS root dir to be specified
if ($convert::root =~ /\|/) {
  my $rt;
  foreach $rt (split(/\|/, $convert::root)) {
    extract_vss_metadata($rt);
  }
}
else {
  extract_vss_metadata($convert::root);
}

my %label_files;
# Check for DB_File module installed - much faster for larger repositories if so
my $USEDB_FILE = 0;
eval{require DB_File; import DB_File;};
if (!$@) {
  $USEDB_FILE = 1;
  print "Using module DB_File\n";
  unlink "$convert::metadata_dir/db.labels" ;
  no warnings qw(once);
  tie %label_files, "DB_File", "$convert::metadata_dir/db.labels", O_RDWR()|O_CREAT(), 0666, $DB_File::DB_HASH
        or die "Cannot open file '$convert::metadata_dir/db.labels': $!\n";
}

open(SS,"<$convert::metadata_dir/ssproperties") or die $msg;
my ($file,$type);

FILE_OR_PROJECT:
while( $_=myread() ) {
  chomp; s/\r$//;
  my ($field,$value) = split(/:\s*/,$_,2);
  next unless defined($value);

  if( $field eq "File") {
    $file = $value;
    if( $file =~ /[\000-\031@#%\*]/ || $file =~ /\.\.\./ ) {
      die "..., @, #, %, * or unprintable characters not allowed in filename '$file' - sorry, rename it and start over.\n";
    }
  } elsif( $field eq "Type") {
    $type=lc($value);
  } elsif( $field eq "Store only latest version") {
    die "parse error" if(!defined($file)||!defined($type));
    if($value eq "Yes") { $type="tempobj"; }
    $file =~ s/^\s+//;
    if (exclude_file($file)) {
      my $msg = "Ignoring file: $file\n";
      convert::log($msg);
    }
    else {
      print FILES "$type $file\n";
    }
    undef $file;
    undef $type;
  }
}
convert::log("\n\nDone with: ssproperties\n", 1);
close(FILES);
close(SS);

open(FILES,   "<$convert::metadata_dir/files")      or die $msg; # for input
open(LABELS_SUMMARY,  ">$convert::metadata_dir/labels_summary")     or die $msg;
open(CHANGES, ">$convert::metadata_dir/changes.ns") or die $msg; # ns for "not sorted"

unlink("$convert::metadata_dir/changes"); # get rid of any existing change file to avoid confusion

print "Starting processing of file versions\n";
my $file_count = 0;
while(<FILES>) {
  my ($type,$file) = split(/ /,$_,2);
  chomp($file);
  read_history($file);
  print "Processing file: $file_count\r";
  $file_count++;
}
print "\n";

if ($USEDB_FILE) {
  untie %label_files;
}
else {
  open(LF,">$convert::metadata_dir/db.labels") or die $msg;
  foreach my $k (keys %label_files) {
    print LF "$k#" . $label_files{$k} . "\n";
  }
  close(LF);
}

StarTeamSDKdone();

my (%label_comments, %label_warned, %label_ignored, %label_processed);
my (%label_timestamp);

sub read_history # expects open file descriptors CHANGES, LABELS_SUMMARY, BRANCHES
{
  my $file = shift;
  read_file_history($file);

  read_comment(); # ignore everything up to the first data block
  # (previously I verified that it started with "History of $file ..."
  # except that MS word wraps long filenames with spaces so just hang it)

  my (@pending_labels);

  ITEM:
  while($_=myread()) {

    my ($op,$version,$user,$timestamp,$label,$comment);

    if(/^\*{17}( +Version (\d+) +\*{17})?/) {

      my ($version,$user,$timestamp,$label,$comment);
      $version = $2;

      $_=myread();
      if(/^Label: "(.+)"$/) {
        $label = $1;
        $_=myread();
      }
      ($user,$timestamp) = parse_user_and_timestamp($_);

      $_=myread();
      if(/Labeled/) { # this revision isn't really a revision - it's just a label
        my $before = $label;

        if ($convert::label_ignore_regexp) { # ignore unwanted labels
            if ($label =~ /$convert::label_ignore_regexp/i) {
                if (!$label_ignored{$label}) {
                  print "Ignoring label \"$label\"\n";
                  $label_ignored{$label} = 1;
                }
                $comment = read_comment();
                next ITEM;
            }
        }

        # label transformations without a warning
        # labels must not contain umlauts
        $label =~ s/\204/ae/g;
        $label =~ s/\224/oe/g;
        $label =~ s/\201/ue/g;
        # delete contigous exclamation marks and append "[local] instead"
        if ($label =~ s/ *!!+ *//g) {
            $label .= "_[local]";
        }
        # replace spaces within labels silently as they appear in almost every label
        $label =~ s/ /_/g;
        # replace more special characters silently
        $label =~ s@/@-@g;
        $label =~ tr/()/[]/;

        # label transformations with a warning
        # replace special characters
        if( $label =~ s%[\000-\031 @#&,<>()\"'!\%\*/'\\:]%_%g && !$label_warned{$label}) {
          print "label \"$before\" changed to \"$label\"\n";
          $label_warned{$label} = 1;
        }
        if( $label =~ s/\.\.\./_/g && !$label_warned{$label} ) {
          print "Removed ellipsis (...) in \"$label\"\n";
          $label_warned{$label} = 1;
        }
        if( $label =~ /^\d+$/ && !$label_warned{$label} ) {
          $label = "_$label";
          print "Underscore prepended to all numeric label \"$label\"\n";
          $label_warned{$label} = 1;
        }

        # Prefix label if specified
        $label = $convert::label_prefix . $label;

        push @pending_labels,$label;
        # assume all the comments for a particular label are the same...
        $comment = read_comment();
        $label_comments{$label} = $comment
          if ! defined($label_comments{$label});
        $label_timestamp{$label} = $timestamp
          if ! defined($label_timestamp{$label});
        next ITEM;

      } # Labelled

      # this is a "real" revision - either "Checked in projectname" or "Branched"

      $comment = read_comment();
      my $change = new Change( {
        'timestamp' => $timestamp,
        'author' => $user,
        'change_description' => $comment,
        'changelist' => [ "$version $file" ] } );

      $change->put(\*CHANGES);
      while(@pending_labels) {
        $label = pop(@pending_labels);
        if ($label_files{"$version $file"}) {
          $label_files{"$version $file"} = $label_files{"$version $file"} . " " . $label;
        }
        else {
          $label_files{"$version $file"} = $label;
        }
        if (! defined($label_processed{$label})) {
          $label_processed{$label} = 1;
          print LABELS_SUMMARY "$label $label_timestamp{$label}\n";
          # use 10 *'s as end of comments marker
          print LABELS_SUMMARY "$label_comments{$label}**********\n";
        }
      }
      next ITEM;

    } else {
      die "parsing messed up on '$_'\n";
    }

  }
  close(SS);
}


sub parse_user_and_timestamp
{
  $_=shift;
  my $date_re = $convert::format_date;
  my $time_re = $convert::format_time;

  if ( ! m@^User: (.*)\b\s+Date:\s+(\S+)\s+Time:\s+(\S+)@ )
  {
    # 2002-03-04/ps
    # A problem here most likely indicates a row of asterisks in a
    # comment. Probably the regexps in this file with "\*{17}" should be
    # changed to fix this, but I have no more time to do _and_ test
    # it...
    # Changed by RC - seems OK.
    print "timestamp problem: $_\n";
    die "can't parse timestamp" ;
  }

  my $user = $1;
  $user=lc($user) if($convert::lowercase_usernames);

  my $date_exp = $2;
  my $time_exp = $3;

  my ($day, $mon, $year, $hour, $min);
  my ($a, $b, $c);

  if (!(($a, $b, $c) = ($date_exp =~ m@^(\d+).(\d+).(\d+)@ )))
  {
    print "date problem: $date_exp\n";
    die "can't parse date" ;
  }

  if ($date_re =~ /^m.d.y$/)
  {
    $day = $b;
    $mon = $a;
    $year = $c
  }
  elsif($date_re =~ /^d.m.y$/)
  {
    $day = $a;
    $mon = $b;
    $year = $c;
  }
  elsif($date_re =~ /^y.m.d$/)
  {
    $day = $c;
    $mon = $b;
    $year = $a;
  }
  else
  {
    print "date problem: $date_re\n";
    die "can't parse date format" ;
  }

  # Now time component
  if ( !(($a, $b, $c) = ($time_exp =~ m@^(\d+).(\d+)([ap]*)@ )))
  {
    print "time problem: $date_exp\n";
    die "can't parse time" ;
  }

  if ($time_re =~ /^H:M$/)
  {
    $hour = $a;
    $min = $b;
    if($c =~ /[ap]/) {
      print "time problem: expected $time_re got unexpected AM/PM char\n";
      die "can't parse time" ;
    }
  }
  elsif($time_re =~ /^H:Mp$/)
  {
    $hour = $a % 12;
    if($c eq 'p') { $hour += 12; }
    $min = $b;
  }
  else
  {
    print "time problem: $time_re\n";
    die "can't parse time format" ;
  }

  # timelocal takes  second, minute,  hour,   day, month, year
  # in the range      0..59,  0..59, 0..23, 1..31, 0..11, 0..99
  # The two digit year has assumptions made about it such that
  # any time before 2037 (when the 32-bit seconds-since-1970 time
  # will run out) is handled correctly.  i.e. 97 -> 1997, 1 -> 2001

  my $timestamp=timelocal(0, $min, $hour, $day, $mon -1, $year);

  # convert user names because they must not contain umlauts
  $user =~ s/\204/ae/g;
  $user =~ s/\224/oe/g;
  $user =~ s/\201/ue/g;

#   # nifty table for more complicated username conversion (if desired)
#   my %usertable = ( "user_j"          => "joe.user",
#                     "developer_d"     => "donald.developer",
#                   );
#   if ( !exists $usertable{$user} ) {
#       print "unknown user $user\n";
#   } else {
#       $user = $usertable{$user};
#   }

  return ($user,$timestamp);
}

sub read_comment
{
  my $comment="";

  while($_=myread()) {
    # convert a few umlauts from DOS-Codepage to Latin-1
    #         ?   ?   ü   ?   ?   ?   ?
    $_ =~ tr/\204\224\201\216\231\232\341/??ü????/;

    # SAB 980503 - Comment can be terminated either by a new version
    # banner or by a Label separator...
##      *****************  Version 28  *****************
##      User: User1        Date:  3/19/98   Time:  2:16p
##      Checked in $/Projects/Common/AppUtils
##      Comment: setup CoffFormat build configuration
##
##      **********************
##      Label: "demo #1"
##      User: User2        Date:  3/16/98   Time:  4:23a
##      Labeled
##      Label comment: Proposed demo.
##
##      **********************
##      Label: "demo."
##      User: User2        Date:  3/13/98   Time:  2:35a
##      Labeled
##      Label comment: Project ready for demo.
##
##      *****************  Version 27  *****************
##      User: User2        Date:  2/17/98   Time: 10:27p
##      Checked in $/Projects/Common/AppUtils
##      Comment: Fixed location of output .lib/.bsc files.
##

    if(/^\*{22}$|^\*{17} +Version (\d+) +\*{17}$/) {
      $comment =~ s/^(Label comment|Comment): //;
      $comment =~ s/\n\s+$/\n/; # strip trailing blank lines
      pushback($_);
      return $comment;
    }
    $comment .= $_;
  }
  $comment =~ s/^Comment: //;
  return $comment;
}


# functions to read from SS with pushback
my @linebuffer;

sub myread
{
  my $line;
  $line = pop @linebuffer;
  if (! defined($line)) {
    $line = <SS>;
    convert::log("${line}", 1) if defined($line) && $convert::debug_level > 1;
  }
  return $line;
}

sub pushback
{
  push @linebuffer,shift;
}

sub read_file_history
{
  my $filename = shift;

  @linebuffer || pushback("History of $filename\n\n");
  
  my $fileref = STgetfileref($filename);
  my $history = STgethistory($fileref);
  print $history;
  
  my $version = STgetfilever($file);
  my $user = STgetfilever($file);
  my $date = STgetfiledate($file);
  my $time = STgetfilever($file);
  my $comment = STgetfilever($file);

  pushback("***************** Version $version *****************\n");
  pushback("User: $user\tDate: $date\tTime: $time\n");
  pushback("Operation\n");
  pushback("Comment: $comment\n\n");
}

sub exclude_file    # True if file should be excluded
{
  my $file = shift;

  if (!$convert::exclude) {
    return 0;
  }

  # note use of Quote Meta encapsulation and file matching at start of string
  if ($convert::exclude =~ /\|/) {
    my $pat;

    foreach $pat (split(/\|/, $convert::exclude)) {
      if ($file =~ /^\Q$pat\E/) {
        return 1;
      }
    }
  }
  elsif ($file =~ /^\Q$convert::exclude\E/) {
    return 1;
  }

  return 0;
}

# Procedure which extracts using a more painful method
sub extract_vss_metadata
{
  my $rt = shift;
  my $cmd;
  
  my $st_project = STgetproject($rt);

  # Otherwise we know we have to be careful how we produce a tree listing of all files so as to avoid
  # any corrupt parts that will stop us in our tracks!
  my @files = vss_dir_tree($st_project);
  my $msg = "Number of StarTeam files found: " . @files . "\n";
  convert::log($msg);

  my ($file, $name, $prop);
  my $i = 0;
  foreach $file (@files) {
    print "Getting properties for file: $i" . ++$i . "\r";
    $name = STgetname($file);
    $prop = get_file_properties($file);
    if ($prop) {
      append_file("$convert::metadata_dir/ssproperties", $prop);
    }
    else {
      $msg = "\nFailed to get properties for:\n$name\n";
      convert::log($msg);
      die;
    }
  }
  print "\n";
}

sub append_file # Append lines to file
{
  my ($file, @lines) = @_;

  open(FILE, ">>$file") or die "Failed to open $file\n";
  print FILE join('',@lines);
  close(FILE);

}

# Recurse down the list of VSS directories and find complete directory tree
sub vss_dir_tree
{
  my $parent = shift;
  my $folder = shift;
  
  # If folder is null then this is the start of the recursion
  my $dir = '';
  if (! $folder) {
    my $view = STgetview($parent);
    $folder = STgetrootfolder($view);
  }
  else
  {
    $dir = "$parent/" . STgetname($folder);
  }

  my @files = ();
  
  if (exclude_file($dir)) {
    my $msg = "Ignoring directory: $dir\n";
    convert::log($msg);
    return @files;
  }
        
  my $files = STgetfilelist($folder);
  while (my $file = STgetnext($files))
  {
    push(@files, $file);
  }

  my $folders = STgetsubfolders($folder);
  foreach my $folder (in $folders)
  {
    push(@files, vss_dir_tree($dir, $folder));
  }

  return @files;
}

sub get_file_properties
{
    my ($file) = @_;
    my $filepathname = STgetfilepathname($file);
    $filepathname =~ s/\\/\//g;
    return "File:  " . $filepathname . "\n" .
           "Type:  " . STgetfiletype($file) . "\n" .
           "Size:  " . STgetfilesize($file) . "\n" .
           "Store only latest version:  No\n" .
           "Latest:\n" .
           "  Version:  " . STgetfilever($file) . "\n" .
           "  Date:     " . STgetfiledate($file) . "\n" .
           "Comment:\n" . STgetfilecomment($file) . "\n";
}