genmetadata #1

eval '(exit $?0)' && eval 'exec perl -S $0 ${1+"$@"}'
  & eval 'exec perl -S $0 $argv:q'
  if 0;
#  THE PRECEEDING STUFF EXECS perl via $PATH
# -*-Fundamental-*-

#  $Id: //guest/perforce_software/archived/utils/cvs2p4/bin/genmetadata#1 $
#
#  Richard Geiger
#

require 5.000;
require "timelocal.pl";

#use bytes;

my $revpat;

my $errors;

sub dirname
{
  local($dir) = @_;
  $dir =~ s%^$%.%; $dir = "$dir/";
  if ($dir =~ m%^/[^/]*//*$%) { return "/"; }
  if ($dir =~ m%^.*[^/]//*[^/][^/]*//*$%)
  { $dir =~ s%^(.*[^/])//*[^/][^/]*//*$%$1%; { return $dir; } }
  return ".";
}

use Carp; # ...or flounder. (This will fail unless 'perl' is a perl5!)
$| = 1;

($Myname = $0) =~ s%^.*/%%;
$Mydir = &dirname($0);
$Here = `/bin/pwd`; chop $Here;
if ($Mydir ne ".") { chdir "$Mydir" || die "$Myname: can't chdir \"$Mydir\": $!"; }
chdir ".." || die "$Myname: can't chdir \"..\": $!";
$Mydir = `/bin/pwd`; chop $Mydir;
chdir $Here || die "$Myname: can't chdir \"$Here\": $!";

require "$Mydir/lib/util.pl";

$Usage = <<LIT;
$Myname: usage: $Myname [-prescan]
LIT


sub usage
{
  print STDERR $Usage;
  exit 1;
}


sub help
{
  print STDERR <<LIT;
$Usage
$Myname is not done yet. Be patient.
LIT
  exit 1;
}



######
#
#  Perlstuff for parsing RCS repository files
#

#  Some globals used by these routines...
#
$Rcs_Inquote = 0;       # remembers when we're in a '@' quoted string
$Rcs_Eofatal = 1;       # die if we hit the end of the file
$Rcs_File    = "?";     # caller should set this for the error message


sub lead
{ if (defined($Myname)) { return "$Myname: "; } else { return ""; } }


sub dirname
{
  my ($dir) = @_;

  $dir =~ s%^$%.%; $dir = "$dir/";
  if ($dir =~ m%^/[^/]*//*$%) { return "/"; }
  if ($dir =~ m%^.*[^/]//*[^/][^/]*//*$%)
    { $dir =~ s%^(.*[^/])//*[^/][^/]*//*$%$1%; { return $dir; } }
  return ".";
}


sub skip_to_rcstok
{
  my ($this) = @_;
  my $tok;
  while (($tok = &rcstok()) ne $this) { };
}


sub sdump
{
  my ($s) = $1;

  my @s = split(//, $s);

  my $ret = "";
  foreach my $c (@s)
    { $ret .= sprintf(" %02x", ord($c)); }
  return $ret;
}


sub setrevs
{
  my($d_rev, $d_next, $d_branches, $d_date, $d_author, $d_state) = @_;

  my($b_rev);

  $RCS_Revs{$d_rev} = "$d_next:$d_branches";
  $d_date = "19$d_date" if length(( split( /\./, $d_date ))[0]) < 4;
  $RCS_Dates{$d_rev} = "$d_date";
  $RCS_Authors{$d_rev} = "$d_author";
  $RCS_States{$d_rev} = "$d_state";

  if ($d_rev =~ /^\d+\.\d+$/)
    {
      $RCS_Prevs{$d_rev} = $d_next;
      if ($d_next) { $RCS_Nexts{$d_next} = $d_rev; }
    }
  else
    {
      if ($d_next) { $RCS_Prevs{$d_next} = $d_rev; }
      $RCS_Nexts{$d_rev} = $d_next;
    }

  foreach $b_rev (split(/ /, $d_branches))
    { $RCS_Prevs{$b_rev} = $d_rev; }
}


sub potential_branches
{
  my ($tag) = @_;
  my @res;

  # Look for a branch this revision is present in...
  #
  my $sel_branch = $RCS_Tags{$tag};
  ($sel_branch) = ($sel_branch =~ /^(.*)\.\d+$/);

  if ($sel_branch =~ /^\d+$/)
    { push(@res, "main"); }
  else
    {
      my ($sel_brbase, $sel_brnum) = ($sel_branch =~ /^(.*)\.(\d+)$/);
      my $sel_brrev = "$sel_brbase.0.$sel_brnum";
      if ($RCS_rev_brtags{$sel_brrev}) { push(@res, $RCS_rev_brtags{$sel_brrev}); }
    }

  my $try_limit = 10;
  my $g = 0;
  my $try_tag;

  # Look for branches rooted at this revision:
  #
  for ($i = 1; $g <= $try_limit; $i++)
    {
      my $try_rev = "$RCS_Tags{$tag}.0.$i";
      if ($RCS_rev_brtags{$try_rev}) { push(@res, $RCS_rev_brtags{$try_rev}); }

      # If we see that the branch number exists, we should keep looking.
      # We want to give up when we either A: find the branch or
      # B: have found $try_limit unused branch bumbers in a row.
      # Since CVS is supposed to dole them out sequentially, this
      # -should- be sufficient to decide that there are no more.
      # This should be better for efficiency, compared to checking
      # every key in $RCS_rev_brtags.
      #
      if (defined($RCS_rev_brtags{$try_tag})) { $g = 0; next; }
      $g++;
    }

  return @res;
}


#  OK, huddle... shift in strategy!: genmetadata will simply record the
#  union of all tag->branch mapping determinations we make. Let's
#  let dolabels sort it out... :-)
#
sub set_mapping
{
  my ($tag, $sel_br) = @_;

  my @sels = split(/$S/, $Tags{$tag});
  foreach my $sel (@sels)
    {
      # Do we already have this mapping?
      if ($sel_br eq $sel) { return; }
    }

  # If we get here, it was a new mapping... add it to the list
  #
  push(@sels, $sel_br);
  $Tags{$tag} = join("$S", @sels);
}
          

sub exclude
{
  my ($repfile, $tag, $is_branch) = @_;

  my $mod = $repfile;
  $mod =~ s/^$CVS_MODULE\/?//;

  if ($mod =~ /^([^\/]+)\//)
    { $mod = $1; }
  else
    { $mod = ""; }

  if ($is_branch)
    { return (defined(${Exclude_branches{"*"}}{$tag}) || defined(${Exclude_branches{$mod}}{$tag})); }
  else
    { return (defined(${Exclude_tags{"*"}}{$tag}) || defined(${Exclude_tags{$mod}}{$tag})); }
}


#  initialize RCS_Tags, RCS_Revs, (etc.) from an RCS ,v file.
#
sub set_RCS_revs
{
  my ($path, $do_texts) = @_;
  my $repdir;
  my $repfile;
  my $tag;
  my $rev;
  my $tok;
  my ($d_havedelta, $d_branches, $d_next, $d_rev);
  my ($ext, $format);
  my $msg;
  my $admaci = 0;
  my $File;

  my ($CVS_module) = ($path =~ m/^$CVS_ROOT\/([^\/]+)\//);

  undef $RCS_File;
  undef $RCS_Valid;
  undef $RCS_expand;
  undef $RCS_exec;
  undef %RCS_Tags;              # both plain and branch (and special "head")
  undef %RCS_Branchtags;        # branch, only
  undef %RCS_rev_brtags;        # inverse of the above - keyed by branch tag value
  undef %RCS_Revs;
  undef %RCS_States;
  undef %RCS_Authors;
  undef %RCS_Dates;
  undef %RCS_Nexts;
  undef %RCS_Prevs;
  undef $RCS_Branch;
  undef $RCS_import_is_main;
  undef $RCS_import_branch;

  undef $rcsline_buf;

  ($Rcs_File = $path) =~ s%^.*/%%;
  ($File = $Rcs_File) =~ s/,v$//;

  $repdir = &dirname($path);

  if (-r "$repdir/.adamci,v") { $adamci = 1; }

  $repfile = $path;

  $RCS_File = $repfile;

  if (! $CVSNT && -x $repfile) { $RCS_exec = "*"; }

  # What the subshell needs for "'" escaping in an "'"-quoted string:
  #
  $path =~ s/'/'\\''/g;
  my $rlogcmd = "$Mydir/bin/rlog '$path'";

  if (! open(RLOG, "$rlogcmd | ")) { die "\n\nopen [$rlogcmd]"; }

  local $mode = "head";
  local $rev_num;
  local $rev_msg;
  local $rev_author;
  local $rev_state;
  local $rev_date;
  local $rev_next;
  local $rev_branches;
  local @tmprevs;

  # This function achieves 100% global abuse!
  # (Basically just to save inlining the code)...
  #
  sub put_rev
  {
    # First, trim the tailing "---..." lines from the log message...:

    while ($rev_msg =~ /----------------------------\n$/s)
      { $rev_msg =~ s/----------------------------\n$//s; }
  
    # This little stackiness adjusts the order in which
    # revisions are seen to bee that in which they occur in
    # the file; rlog inverts them.
    #
    if (($rev_num =~ tr/\./\./) == 1 || $rev_num =~ /\.1$/)
      {
        &setrevs($rev_num, $rev_next, $rev_branches, $rev_date, $rev_author, $rev_state);
        my $revstr;
        while ($revstr = pop(@revstack))
          { &setrevs(split(/\001/, $revstr)); }
      }
    else
      { push(@revstack, "$rev_num\001$rev_next\001$rev_branches\001$rev_date\001$rev_author\001$rev_state"); }
  
    $RCS_Logs{$rev_num} = $rev_msg;
    $mode = "rev";
  }

  my $have_rev = 0;

  # Entropy embodied!: this is the easy way to make sure we only attend to
  # the first 1.1.1 tag seen.
  #
  my $have_1_1_1 = 0;

  while (<RLOG>)
    {
      if ($mode eq "head")
        {
          if (/^RCS file: (.*)$/) { $RCS_File = $1; }
          if (/^head: (.*)$/)     { $RCS_Tags{"head"} = $1; }
          if (/^branch: (.*)$/)
            {
              $RCS_Branch = $1;
              if ($RCS_Branch eq "1.1.1") { $RCS_import_is_main = 1; }
            }
          if (/^symbolic names:/) { $mode = "symbols"; }
        }
      elsif ($mode eq "symbols")
        {
          # This now has to be a two-pass operation, since we need to know
          # the vendor beanch name before doing the revisions properly...

          if (/^\t([^ :]+): (.*)$/)
            {
              my ($tag, $rev) = ($1, $2);

              if ($rev eq "1.1.1")
                { 
                  if ($have_1_1_1) { next; }
                  $have_1_1_1 = 1;
                }

              push (@tmprevs, "$1$S$2");
              if ($rev eq "1.1.1") { $RCS_import_branch = $tag; }

              # [see re "At Ironport", below]...
              #
              if ($IRONPORT && $rev =~ /$revpat/o) { $RCS_import_is_main = 1; }
            }
          elsif (/^keyword substitution: (.*)/)
            { 
              $RCS_expand = $1;
              if ($RCS_expand eq "kv") { $RCS_expand = ""; }
              $mode = "mid";
            }
        }         
      elsif ($mode eq "mid")
        {
          # At IronPort, some non-cvs imported files somehow started
          # growing local revs along 1.1.m.n (with no default branch
          # set); this is part of an attempt to deal with this
          # unpleasantness. I have conditionalized it, so as to be able
          # to switch it on or off by setting $IRONPORT (or not).
          #
	  if ($IRONPORT && $RCS_import_is_main && (! $RCS_import_branch))
            {
              $RCS_import_branch = "import-spoofed";
              push(@tmprevs, "import-spoofed${S}1.1.1");
            }

          # Now we process the revision information more fully...
          #
          while ($#tmprevs >= 0)
            {
              my ($tag, $rev) = split(/$S/, shift(@tmprevs));

              if (($cnt = $rev =~ tr/\./\./) % 2 == 0)
                {
                  # Handle "RCS" branch tags:
                  #
                  my @nums = split(/\./, $rev);
    
                  splice @nums, $#nums, 0, (0);
                  $rev = join(".", @nums);
                }
        
              if ((! $PureRCS) && $rev =~ /\.0\.[0-9]+$/)
                {
                  if (! $adamci)
                    {
                      if (defined($BRANCH_FLASH)) { $tag =~ s/$BRANCH_FLASH$//; }

                      $RCS_Tags{$tag} = $rev;
                      $RCS_Branchtags{$tag} = $rev;
                      $Brtags{$tag} = 1;              
                      if ($RCS_rev_brtags{$rev})
                        { print "WARNING: file: $RCS_File: dup CVS branch tags on rev <$rev> (tag <$tag>)(ignored)\n"; }
                      else
                        { $RCS_rev_brtags{$rev} = $tag; }
                    }
                }
              elsif (($cnt = $rev =~ tr/\./\./) % 2 == 1)
                {
	          # Ignore RCS tags named "head"; (CVS users should never create such!)
                  #
   	          if ($tag eq "head") { next; }

                  $RCS_Tags{$tag} = $rev;
                  my $rcspath;
                  ($rcspath = $repfile) =~ s/,v$//;

                  if ($IMPORTTAGSPOOF && $rev eq "1.1.1.1") { $rev = "1.1"; }
        
                  if (! &exclude($repfile, $tag))
                    {
		      # Is this one of those wacky main/import shared revs...?

                      my $import_as_main = "";

                      if ($rev =~ /$revpat/o && 
                        ((! defined($RCS_Revs{"1.2"})) || ($RCS_Dates{$rev} < $RCS_Dates{"1.2"})))
                          { $import_as_main = $RCS_import_branch; }

                      print LABELS "$tag$S$rcspath$S$rev$S$import_as_main\n";

                      # don't want to wipe out previously observed ones, as it might
                      # already have found the "real" mapping!
                      #
                      if ($import_as_main)
                        { &set_mapping($tag, "main"); }
                      else
                        { &set_mapping($tag, "UNMAPPED"); }
                    }
                }
              # end of deferred rev processing
            }

          if (/^description:/) { $mode = "rev"; }
        }
      elsif (($mode eq "rev" || $mode =~ /revmsg/) && /^revision\s+([^\s]+)\s*next\s*([^\s]*)$/)
        {
          my ($t1, $t2) = ($1, $2);
          if ($have_rev) { &put_rev(); }
          $rev_num = $t1; $rev_next = $t2; 
          $rev_branches = "";
          $have_rev = 1;
          $mode = "rev";
        }
      elsif ($mode eq "rev")
        {
          if (/^date: ([^;]+);\s+author: ([^;]+);\s+state: ([^;]+);/)
            {
              ($rev_date, $rev_author, $rev_state) = ($1, $2, $3);

	      if ($INTOUCH)
                {
                  $rev_author =~ s/^ith\\//;
                  $rev_author =~ tr/A-Z/a-z/;
                }

              my @d = split(/[\/ :]/, $rev_date);
              if ($d[0] < 2000) { $d[0] -= 1900; }

              $rev_date = join(".", @d);

              $RCS_dates{$rev_num} = $rev_date;
              $RCS_authors{$rev_num} = $rev_author;
              $RCS_states{$rev_num} = $rev_state;
              $rev_msg = ""; $mode = "revmsg0";
            }
        }
      elsif ($mode =~ "^revmsg")
        {
          if ($_ eq "=============================================================================\n")
            { if ($have_rev) { &put_rev(); } }
          elsif ($mode eq "revmsg0" && /^branches:\s+(.*)/)
            {
              my $branches = $1;
              $branches =~ s/;//g;
              foreach my $branch (split(/\s+/, $branches))
                {
                  if ($PureRCS)
                    {
                      my ($l, $b, $r) = ($tok =~ /(.*)\.(\d+)\.(\d+)$/);
                      my $tag = "$l.$b";
                      $RCS_Branchtags{$tag} = $rev;
                    }
                  if ($rev_branches ne "") { $rev_branches .= " "; }
                  $rev_branches .= "$branch.1";
               }
            }          
          else
            { $rev_msg .= "$_"; }
          $mode = "revmsg1";
        }
      else
        {
          die "assert can't get here";
        }
    }

  # OK, we get here having seen every tag in the file. See whether,
  # for any of the tags we saw in this file, we can determine the
  # branch for that tag, and remember the mapping globally.

  try_tag: foreach my $tag (sort(keys(%RCS_Tags)))
    {
      # Only want to consider rev tags:
      #
      if (defined($RCS_Branchtags{$tag})) { next try_tag; }

      # Throw away stuff on the exclude list...

      if (&exclude($RCS_File, $tag)) { next try_tag; }

      my $tagrev = $RCS_Tags{$tag};
      my (@sel_brs) = &potential_branches($tag);

      #### "non-potential import" case:
      #

      # So: do any of the known branch tags in this file select the
      # tagged revision?
      #
      my $tagrev_brnum;
      ($tagrev_brnum = $tagrev) =~ s/\.\d+$//;

      if ($#sel_brs == 0)
        {
          $sel_br = $sel_brs[0];

          if ($RCS_import_is_main && $sel_br eq "main" && $RCS_Tags{$tag} =~ /$revpat/o)
            { &set_mapping($tag, "main"); }

          # so... if we get here, in theory, the heuristic was applicable;
          # exactly one branch tag selected this branch point. $sel_br has
          # the presumptive mapping.
          
          # [fall through, to &set_mapping, below]
        }
      else
        {
          # As a last resort, try the mapping function!

          $sel_br = "";
          if (defined(&brmap))
            { $sel_br = &brmap($tag, $CVS_module); }
          if (! $sel_br) { next try_tag; }

          # [fall through, to &set_mapping, below]
        }

      # Now, make sure we didn't get a different answer than from some previous
      # file:
      #
      &set_mapping($tag, $sel_br);
    }
      
  if ($Prescan) { return 1; }

  if ($adamci)
    {
      undef %metadata;
      eval `$CO -q -p $repdir/.adamci,v`; # TBD: Optimize this to only reread if new dir?

      foreach my $p (keys(%{$metadata{'context_rules'}}))
        {
          my $k;
          ($k) = keys( %{${${$metadata{'context_rules'}}{$p}}{$File}}  );

          if (defined($k) && $k ne "TBB")
            {
              my $rev = ${${${${$metadata{'context_rules'}}{$p}}{$File}}{$k}}[1];
              $rev =~ s/^(.*\.\d+)\.(\d+)$/$1.0.$2/;
              $RCS_Tags{$p} = $rev;
              $RCS_Branchtags{$p} = $rev;
            }
        }
    }

  close RCS;
  $RCS_Valid = 1;

  return 1;
}


sub rcs_tip
{
  my ($rev) = @_;
  my $next;

  #  Find the tip of the branch...
  #
  while (1)
    {
      if (! defined($RCS_Revs{$rev}))
        { return "???"; }
      ($next) = split(/:/, $RCS_Revs{$rev});
      if ($next eq "") { return $rev; }
      $rev = $next;
    }
}


#  given a "CVS line spec" (revision #, "head", or a tag)
#
sub rev_on_line
{
  my($line) = @_;

  my $added_on_branch = 0;

  if ($RCS_Tags{$line} =~ /^(.*)\.(0\.\d+)$/)
    { if ($RCS_States{&rev_on_line("head")} eq "dead") { $added_on_branch = 1; } }
       
#  Hmmm. Why is this EVER needed now? I am tentaively removing it, because
#  it causes one of David Birkhead's files to get the wrong revision, still
#  passes out own poor test/runtest, and we'll seeon the next pass of the
#  IronPort conversion!
#
#  if ((! $added_on_branch) && ($line eq $TRUNKLINE) && ($RCS_Tags{$line}))
#    { $line = "head"; }

  if (defined($RCS_Tags{$line})) { $line = $RCS_Tags{$line}; }
  elsif ($line !~ /^[0-9.]+$/) { return "none"; }

  if ($line =~ /\.0\.([0-9]+)$/)
    {
      # It's a CVS branch revision number... demunge it:
      #
      $line =~ s/\.0(\.[0-9]+)$/$1/;

      #  OK, see whether the branch actually exists:
      #  (We have an assumption here that first rev is always ".1")
      #
      $line = "$line.1";
      if (! defined($RCS_Revs{$line}))
        {
          # Nope, so fall back to the root, which we know to be an
          # existing revision...

          $line =~ s/\.[0-9]+\.[0-9]+$//;
          return $line;
        }

      # Yep, the branch exists; so it *is* a branch; so, we go out to
      # the tip. (Right?)
      #
      return &rcs_tip($line);
    }

  #  OK, do we have an RCS branch or an RCS revision number?  (count
  #  the dots)
  #
  if (($line =~ tr/\././) % 2)
    {
      #  An odd number of dots... it's a revision number
      #
      if (defined($RCS_Revs{$line})) { return $line; }

      return "none"; # Or should we assert?
    }
  else
    {
      #  An even number of dots... it's a branch number
      #  (We have an assumption here that first rev is always ".1")
      #
      return &rcs_tip("$line.1");
    }
}


#  Is rev "$this" < rev "$that"?
#  Note: "" is considered infinitely high
#        revs must be of the same order (I.e., same # of "."s)
#
sub rev_lt
{
  my($this, $that) = @_;
  my(@this, @that);

  if (! $that) { return 1; }

  @this = split(/\./, $this);
  @that = split(/\./, $that);

  while (1)
    {
      $this_n = shift(@this);
      $that_n = shift(@that);

      if ($this_n < $that_n) { return 1; }
      if ($this_n > $that_n) { return 0; }
      if ($#this < 0) { return 0; }
    }
}


#  Note: "" is considered infinitely high
#
sub linerev_gt
{
  my($this, $that) = @_;
  my $ret;

  if (! $that)
    { $ret =  1; }
  else
    {
      my $thisord, $thatord;
      $thisord = ($this =~ tr/\././);
      $thatord = ($that =~ tr/\././);
      if ($thisord < $thatord)
        { $ret = 1; }
      elsif ($thisord > $thatord)
        { $ret = 0; }
      else
        { $ret = &rev_lt($that, $this); }
    }
  return $ret;
}


# Maximum size for a log message we'll keep.
# Messages beyond this get truncated, to accomodate a limitation
# on the key/value pair size in ndbm. That's life.
#
$MAXSZ = 256*3;


#  Generate the metadata for a single file
#
sub dofile
{
  local($dir, $file) = @_;

  if ($file !~ /,v$/) { return; }
  if ($IGNOREFILES && $file =~ /$IGNOREFILES/) { return; }

  if ($file =~ /[\000-\014\016-\037\177-\377]/)
    {
      print "$Myname: RCS filename with non-printable characters (skipped): ";
      $l = length($file);
      for ($i = 0; $i <= $l; $i++)
        {
          $c = substr($file, $i, 1);
          if ($c =~ /[\000-\014\016-\037\177-\377]/)
            { printf "\\%03o", ord($c); }
          else
            { print "$c"; }
        }
      print "\n";
      return;
    }

#  uncomment this to ban files with '...'. Leave commented out to
#  rename them with ",,,"
#
#  elsif ($file =~ /\.\.\./)
#    {
#      print "$Myname: RCS filename with illegal Perforce characters (skipped): $file\n";
#      return;
#    }

  undef %RCS_lines;
  undef %RCS_Branches;
  undef $Firstusedrev;

  print "========== $dir/$file";

#if (0) # for fast file,v Attic/file,v scan
#{

  # This parses the RCS information from the ,v file, filling
  # in various data structures that we use, below.
  #
  if (&set_RCS_revs("$dir/$file", 0) == undef) { print " (empty)\n"; return; } # empty ,v

  # What RCS keyword expansion options are in effect?
  # (We use this to detect binary files)
  #
  $options = "${RCS_expand}$RCS_exec";
  if (! $options) { $options = "-"; }

#}

  @path = split(/\//, "$dir/$file");
  $file = pop(@path);
  $file =~ s/,v$//;
  if ($path[$#path] eq "Attic") { pop @path; }
  $na_dir = join("/", @path);
  $path = sprintf("%s%s%s", $dir, $dir ? "/" : "", $file);
  $na_path = sprintf("%s%s%s", $na_dir, $na_dir ? "/" : "", $file);

  # The "defined($Filesseen{$path})" saves lots of stat()s!
  #

  if (defined($Filesseen{$na_path}))
    {
      my @p = split(/\//, "$na_path");

      splice(@p, $#p, 0, "Attic");
      my $a_path = join("/", @p);

      #  Users have seen this, which previous caused mysterious death
      #  in the sort phase... let's be a little more informative:
      #
      if (-f "$na_path,v" && -f "$a_path,v")
        {
          my $msg = "repository has both\n  $na_path\n  $a_path";
          print " error: $msg\n";
          $errors .= "$msg\n";
          return;
        }
      else
        { die "assert: dofile(): duplicate path: $na_path"; }
    }

  print " ok\n";

  $Filesseen{$na_path} = 1;

  if ($Prescan) { return; }

  # For all of the branches we see, store the tip revision in
  # $HAVELINES{$line}; this is also where we weed out
  # codelines we are not interested in.
  #
  my @codelines = keys %RCS_Branchtags;
  if (! defined($RCS_Branchtags{$TRUNKLINE})) { push(@codelines, $TRUNKLINE); }

  foreach $line (@codelines)
    {
      # Note: lines added to the exclude_branches file should
      # give the actual, complete branch tag name, not the 
      # "de-flashed" (if any) rendition.
      #
      if (&exclude("$dir/$file", $line, 1)) { next; }

      $no_flash_line = $line;
      if (defined($BRANCH_FLASH)) { $no_flash_line =~ s/$BRANCH_FLASH$//; }
      if ($WANTLINES && ! (defined($WANTLINES{$no_flash_line}))) { next; }

      if (($tiprev = &rev_on_line($line)) eq "none") { next; }

      $HAVELINES{$line} = $tiprev;
    }

  #  Now we go through each line, to build a list of the RCS revs that
  #  need to be exported into the metadata stream.
  #
  while (1) # We have more lines to deal with...
    {
      (@k) = (keys %HAVELINES);
      if ($#k < 0) { last; }
      my $theline;

      #  Choose the highest numbered line of the lowest "order" for
      #  the next one to export... this will always pick up lines on
      #  branches nearer the trunk first, so the subsequent branches
      #  will have a place to branch from!
      #
      foreach $k (@k)
        {
          if ($k eq $TRUNKLINE) { $theline = $k; last; }

          #  if both lines select the *same* revision...
          #
          if ($HAVELINES{$k} eq $HAVELINES{$theline})
            {
              # ...take the one with the lower branch tag order first
              #
              if (&linerev_gt($RCS_Branchtags{$k}, $RCS_Branchtags{$theline}))
                { $theline = $k; next; }
            }

          if (&linerev_gt($HAVELINES{$k}, $HAVELINES{$theline}))
            { $theline = $k; }
        }

      $rev = $tiprev = &rev_on_line($theline);

      $t = $theline;

      if (defined($BRANCH_FLASH))
        { $t =~ s/$BRANCH_FLASH$//; }

      #  This is where we build the list of codelines we've encountered.
      #
      $All_lines{$t} = 1;

      if (  (defined($RCS_lines{$rev})) &&
            ($theline ne $TRUNKLINE) &&
            (($rev =~ tr/\././) < ($RCS_Branchtags{$theline} =~ tr/\././)))
        {
          if ($RCS_Branches{$rev}) { $RCS_Branches{$rev} .= ":"; }
          $RCS_Branches{$rev} .= $t;
        }
      else
        {
          while ($rev && ! defined($RCS_lines{$rev}))
            {
              $RCS_lines{$rev} = $theline;

              # if we are looking at 1.1.m.n, and it's commit time is less
              # than any 1.2, or there isn't a 1.2 present, then add a "+"
              # to $RCS_lines{$rev}, to so indicate to later stages...
              #
              if ($rev =~ /$revpat/o && ($1 >= 2 || ($1 eq "1" && $2 >= 2)) &&
                    ((! defined($RCS_Revs{"1.2"}) || $RCS_Dates{$rev} lt $RCS_Dates{"1.2"})))
                { $RCS_lines{$rev} .= "+"; }
                
              $rev = $RCS_Prevs{$rev};
            }
          if ($rev)
            {
              if ($RCS_Branches{$rev}) { $RCS_Branches{$rev} .= ":"; }
              $RCS_Branches{$rev} .= $t;
            }
        }

      # We test for "if $rev" here cause it may have gone null if the while loop
      # above ran off the end...
      #
      if ($rev && (($rev =~ tr/\././) == 1) && &rev_lt($rev, $Firstusedrev))
        { $Firstusedrev = $rev; }

      delete $HAVELINES{$theline};
    }

  #  OK, we have the set of revisions to export - write them to the
  #  metadata stream.
  #
  foreach $rev (keys %RCS_Revs)
    {
      $revkey = "$path/$rev";

      $state = $RCS_States{$rev};
      $author = $RCS_Authors{$rev};
      $date = $RCS_Dates{$rev};
      my ($yr, $mo, $da, $hr, $mi, $se) = split (/\./, $date);
      $date = timegm($se,$mi,$hr,$da,$mo - 1,$yr);
      $line = $RCS_lines{$rev};

      if (defined($BRANCH_FLASH)) { $line =~ s/$BRANCH_FLASH$//; }
      $branches = $RCS_Branches{$rev};

      #  Detect revisions before the first branch point, and
      #  omit them if we're not doing ALLTHEWAYBACK.
      #
      if ( (! $ALLTHEWAYBACK)
           && ($line eq $TRUNKLINE)
           && ($rev ne $RCS_Tags{"head"})
           && $Firstusedrev
           && &rev_lt($rev, $Firstusedrev))
             { next; }

      if (! $line) { next; }
      if (! $branches) { $branches = "-"; }

      if ((! $ALLTHEWAYBACK) && $rev eq $Firstusedrev)
        { $prevrev = "-"; }
      elsif ($RCS_Prevs{$rev})
        { $prevrev = $RCS_Prevs{$rev}; }
      else
        { $prevrev = "-"; }

      $All_lines{$line} = 1;

      if ($RCS_import_is_main)
        {
          my @btmp = split(/ /, $branches);
          my $newb = "";
          foreach my $b (@btmp)
            {
              if ($newb) { $newb .= " "; }
              $newb .= $b;
            }
          $branches = $newb;
        }

      my ($revpath, $revnum) = ($revkey =~ m/^(.*)\/([^\/]*)$/);

      print METATMP "$revkey$S$date$S$author$S$state$S$line$S$RCS_import_branch$S$branches$S$prevrev$S$options\n";

      #  MAXSZ derives from a ndbm limitation on the size of a key/entry pair.
      #  at (256*3) it allows for a $revkey up to 250 chars or so.
      #
      $logmsg = substr($RCS_Logs{$rev}, 0, $MAXSZ);
      if ($logmsg !~ /\n$/) { $logmsg .= "\n"; }

      if (length($logmsg)+length($revkey) > 1010)
        { print "$Myname: revkey + log too long for <$revkey>\n"; exit 1; }

      if ($RCS_import_is_main && $logmsg eq "Initial revision\n")
        {
          my $logkey = "$revpath/1.1.1.1";
          if (defined($MSGS{$logkey})) { $logmsg = $MSGS{$logkey}; }
        }
      $MSGS{$revkey} = $logmsg;
    }
}

# option switch variables get defaults here...

$Convdir = "";

$Boolopt = 0;
$Valopt = 0;
$Prescan = 0;

while ($#ARGV >= 0)
  {
    if ($ARGV[0] eq "-testtoks")   { &test_rcstoks; }
    if ($ARGV[0] eq "-prescan")    { $Prescan = 1; shift; next; }
    elsif ($ARGV[0] eq "-valopt")
      {
        shift; if ($ARGV[0] < 0) { &usage; }
        $Valopt = $ARGV[0]; shift; next;
      }
    elsif ($ARGV[0] eq "-help")
      { &help; }
    elsif ($ARGV[0] =~ /^-/) { &usage; }
    if ($Args ne "") { $Args .= " "; }
    push(@Args, $ARGV[0]);
    shift;
  }

if ($#Args ne 0) { &usage; }

$Convdir = $Args[0];

$Metatmp   = "$Convdir/metatmp";
$Metadata  = "$Convdir/metadata";
$Labels    = "$Convdir/labels";
$Tags	   = "$Convdir/tags";
$Tagfiles  = "$Convdir/tagfiles";
$Brtags	   = "$Convdir/brtags";
$Logmsgs   = "$Convdir/logmsgs";
$Filesseen = "$Convdir/filesseen";
$P4root    = "$Convdir/p4root";

$Changes   = "$Convdir/changes";
$Revmap    = "$Convdir/revmap";
$Clientdir = "$Convdir/p4";

require "$Convdir/config";

if (! -x "$Mydir/bin/rlog")
  {
    print <<EOM;
$Myname:

  *** This version of $Myname requires a patched version of the RCS
  *** rlog command to be built and installed in $Mydir/bin. Please
  *** see the src/rcs-5.7/src/README and rlog.c.patch files included
  *** in this distribution for further information.

EOM
    exit 1;
  }

$revpat = "^1\\.1\\.(1)\\.(\\d+)\$";

&load_excludes();

&load_brmap;

#  (Handle either f or f.db or f.pag, f,dir style dbs):
#
if (&s("/bin/rm -rf $Logmsgs $Logmsgs.db $Logmsgs.pag $Logmsgs.dir ".
    "$P4root $Changes $Clientdir ".
    "$Tags.txt $Tags $Tags.db $Tags.pag $Tags.dir ".
    "$Tagfiles.txt $Tagfiles $Tagfiles.db $Tagfiles.pag $Tagfiles.dir ".
    "$Brtags.txt $Brtags.db $Brtags.pag $Brtags.dir ".
    "$Revmap $Revmap.db $Revmap.pag $Revmap.dir $Labels"))
  { die "/bin/rm -rf $Logmsgs ..."; }

use DB_File;
$DBMCLASS="DB_File";

#$myhashinfo = new DB_File::HASHINFO;
#$myhashinfo->{bsize} = 4096;

$myhashinfo = new DB_File::BTREEINFO;

if (! tie(%Files_seen, $DBMCLASS, $Filesseen, O_CREAT|O_RDWR, 0666, $myhashinfo))
  { print "$Myname: can't tie \"$Filesseen\": $!\n"; exit 1; }

if (! tie(%MSGS, $DBMCLASS, $Logmsgs, O_CREAT|O_RDWR, 0666, $myhashinfo))
  { print "$Myname: can't tie \"$Logmsgs\": $!\n"; exit 1; }

if (! open(LABELS, ">$Labels"))
  { print "$Myname: can't open \">$Labels\": $!\n"; exit 1; }

#  The $Tags hash is keyed by the tag name. It's value is set to the
#  branch tag of the branch it belongs to, iff the mapping can be
#  detemermined by observing that a tagged revision is present in
#  exactly one branch, i.e., has moved beyond the branch's branch
#  point, AND is not selected as the base of some other branch.
#
if (! tie(%Tags, $DBMCLASS, $Tags, O_CREAT|O_RDWR, 0666, $myhashinfo))
  { print "$Myname: can't tie \"$Tags\": $!\n"; exit 1; }

#  The Tagfiles hash remembers for each tag mapped by the heuristic,
#  the file where the mapping was established. This is useful in cases
#  where there are conflicts.
#
if (! tie(%Tagfiles, $DBMCLASS, $Tagfiles, O_CREAT|O_RDWR, 0666, $myhashinfo))
  { print "$Myname: can't tie \"$Tagfiles\": $!\n"; exit 1; }

#  The $Brtags hash is keyed by branch tag name, and the value is the
#  actual branch number.
#
if (! tie(%Brtags, $DBMCLASS, $Brtags, O_CREAT|O_RDWR, 0666, $myhashinfo))
  { print "$Myname: can't tie \"$Brtags\": $!\n"; exit 1; }

if (! open(METATMP, ">$Metatmp"))
  { print "$Myname: can't open \">$Metatmp\": $!\n"; exit 1; }

#chdir $CVS_MODULE || die "$Myname: can't chdir \"$CVS_MODULE\": $!";
#$CVS_MODULE = `/bin/pwd`; chop $CVS_MODULE;
#chdir $Here || die "$Myname: can't chdir \"$Here\": $!";

&traverse($CVS_MODULE, 0, "dofile");

close METATMP;
#close REVTAGS;
untie %MSGS;

if (! open(TAGS, ">$Tags.txt"))
  { print "$Myname: can't open \">$Tags.txt\": $!\n"; exit 1; }

foreach $tag (sort(keys(%Tags)))
  { 
    print TAGS "$tag\t";

    if ($Tags{$tag})
      { print TAGS "$Tags{$tag}\n"; }
    else
      # Should this be an assert now? TBD (Probably! - look at the code!)
      #
      { print TAGS "UNMAPPED-NOTFOUND\n"; }
  }

close TAGS;
print "Wrote $Tags.txt\n";
untie %Tags;
untie %Tagfiles;
&s("rm -f $Tags $Tags.db $Tags.pag $Tags.dir");
&s("rm -f $Tagfiles $Tagfiles.db $Tagfiles.pag $Tagfiles.dir");

if (! open(BRTAGS, ">$Brtags.txt"))
  { print "$Myname: can't open \">$Brtags.txt\": $!\n"; exit 1; }
foreach my $brtag (sort(keys(%Brtags))) { print BRTAGS "$brtag\n"; }
close BRTAGS;
print "Wrote $Brtags.txt\n";
untie %Brtags;
&s("rm -f $Brtags $Brtags.db $Brtags.pag $Brtags.dir");

if ($errors)
  {
    print "***** Fatal Errors:\n$errors";
    exit (1);
  }

if ($Prescan) { exit 0; }

sub metasort
{
  my @a = split(/$S/, $a);
  my @b = split(/$S/, $b);

  # The revision time is the primary sort key
  #   - But this is now handled by the external sort; we still
  #     do secondary and tertiary sort keys, below
  #
  #  if ($a[1] != $b[1]) { return $a[1] <=> $b[1]; }

  $a[0] =~ s/^(.*)\///; my $apath = $1;
  $b[0] =~ s/^(.*)\///; my $bpath = $1;

  # Next is the pathname
  #
  if ($apath ne $bpath) { return $apath cmp $bpath; }

  # If we're still tied, it goes to the revision number!
  #
  @aa = split(/\./, $a[0]);
  @bb = split(/\./, $b[0]);

  for (my $i = 0; $i <= $#aa; $i=$i+2)
    {
      if (! defined($bb[$i])) { return 1; }   # a has more positions, thus greater
      if ($aa[$i] < $bb[$i])  { return -1; }  # a is less than b, thus less
      if ($aa[$i] > $bb[$i])  { return 1; }   # and vice-versa

      # if they are equal, we look to the next position:
      #
      if (! defined($aa[$i+1]))
        { die "impossible sort key (RCS rev) \"$a[0]\"?\n"; }
      if (! defined($bb[$i+1]))
        { die "impossible sort key (RCS rev) \"$b[0]\"?\n"; }
      if ($aa[$i+1] < $bb[$i+1])  { return -1; }  # a is less than b, thus less
      if ($aa[$i+1] > $bb[$i+1])  { return 1; }   # and vice-versa

      # Otherwise, we go on to the next level...
    }

  if ($#bb > $#aa) { return -1; }

  die "impossible equal sort keys:\n  <$a>\n  <$b>\n";
}

my $cmd = "sort -n -t $S -k 2 < $Metatmp |";

if (! open(METASORT, $cmd))
  { print "$Myname: can't open \"$cmd\": $!\n"; exit 1; }

if (! open(META, ">$Metadata"))
  { print "$Myname: can't open \">$Metadata\": $!\n"; exit 1; }

# Do the sorting in chunks, per primary sort key. (We're going through
# all of this, BTW, in order to constrain genmetadata's memory
# footprint, which was getting huge when we held all of the tags and
# metadata in-core)
#

my $t = 0;
my @Meta;

while (<METASORT>)
  {
    chomp;
    my @r = split(/$S/, $_);
    if ($r[1] ne $t)
      {
        if ($#Meta >= 0)
          {
            my @Metasorted = sort metasort @Meta;
            foreach my $m (@Metasorted) { print META "$m\n"; }
          }
        $t = $r[1];
        @Meta = ();
      }
    push (@Meta, $_);
  }

if ($#Meta >= 0)
  {
    my @Metasorted = sort metasort @Meta;
    foreach my $m (@Metasorted) { print META "$m\n"; }
  }

close METASORT;
close META;

$Lines  = "$Convdir/lines";
if (! open(LINES, ">$Lines"))
  { print "$Myname: can't open \">$Lines\": $!\n"; }
else
  {
    print       "===== Lines referenced:\n";
    print LINES "===== Lines referenced:\n";
    foreach $line (sort keys %All_lines)
      {
        print       "$line\n";
        print LINES "$line\n";
      }
    close LINES;
  }

exit 0;
#	Change	User	Description
#1	8160	michael	Archive obsolete CVS to Perforce converter. See the Perforce KB and website for an updated CVS to Perforce converter.
//guest/perforce_software/utils/cvs2p4/bin/genmetadata
#32	7274	Richard Geiger	Integrate dev changes to become 3.1
#31	5649	Richard Geiger	Integrate 3.0b5, b6 changes...
#30	5619	Richard Geiger	Changes for 3.0b4
#29	5601	Richard Geiger	Integrate 3.0 changes, preparing to publish.
#28	5145	Richard Geiger	Publish 2.5.5
#27	4923	Richard Geiger	Integrate 2.5.1-3 changes.
#26	4735	Richard Geiger	This is release 2.5, which handles those four extra special characters Perforce 2004.2 kind-of allows :-)
#25	4354	Richard Geiger	Integrate 2.4 changes.
#24	4274	Richard Geiger	Publish 2.3.8
#23	3711	Richard Geiger	release 2.3.7
#22	2449	rmg	Public 2.3.4
#21	2285	rmg	Publish 2.3.3
#20	2063	rmg	Publish 2.3.2
#19	1989	Richard Geiger	Public 2.3.1
#18	1946	rmg	Publish 2.1
#17	1786	rmg	Publish 2.0
#16	1439	rmg	Publish 1.3.3
#15	1410	rmg	Publish 1.3.2
#14	1205	Richard Geiger	Publish 1.3.1
#13	1187	Richard Geiger	1.3. Mainly, support for labels!
#12	1033	Richard Geiger	publish 1.2.17
#11	476	Richard Geiger	publish cvs2p4 1.2.15
#10	462	Richard Geiger	publish 1.2.14 to the public site.
#9	418	Richard Geiger	Publish 1.2.12
#8	400	Richard Geiger	Publish 1.2.11
#7	395	Richard Geiger	1.2.10
#6	344	Richard Geiger	1.2.9
#5	333	Richard Geiger	Publish 1.2.8 (Not usre why test/file,v was included in this integration; am guessing it was a missed integrations left over from 1.2.7, perhaps)
#4	256	james	push cvs2p4 1.2.6 to public area
#3	242	james	Pull cvs2p4 into //public so latest version (1.2.5) gets synced onto the Perforce FTP area. Note the cvs2p4-latest.tar symlink - I'll update loadsupp to point to that.
#2	230	Laura Wingerd	Pull cvs2p4 into //public so latest version (1.2.4) gets synced onto the Perforce FTP area.
#1	152	Laura Wingerd	cvs2p4 goes public.
//guest/richard_geiger/utils/cvs2p4/bin/genmetadata
#1	130	Richard Geiger	CVS-to-Perforce converter. This is release 1.2.2 (first submit to the Perforce Public Depot)