genmetadata~ #1

eval '(exit $?0)' && eval 'exec perl -S $0 ${1+"$@"}'
  & eval 'exec perl -S $0 $argv:q'
  if 0;
#  THE PRECEEDING STUFF EXECS perl via $PATH
# -*-Fundamental-*-

#  $Id: //depot/clients/atg/cvs2p4/main/bin/genmetadata#16 $
#
#  Richard Geiger
#

require 5.000;
require "timelocal.pl";
use Digest::MD5 qw(md5_base64);
use IO::File;
use POSIX;

my @Meta;

sub dirname
{
  local($dir) = @_;
  $dir =~ s%^$%.%; $dir = "$dir/";
  if ($dir =~ m%^/[^/]*//*$%) { return "/"; }
  if ($dir =~ m%^.*[^/]//*[^/][^/]*//*$%)
  { $dir =~ s%^(.*[^/])//*[^/][^/]*//*$%$1%; { return $dir; } }
  return ".";
}

use Carp; # ...or flounder. (This will fail unless 'perl' is a perl5!)
$| = 1;

($Myname = $0) =~ s%^.*/%%;
$Mydir = &dirname($0);
$Here = `/bin/pwd`; chop $Here;
if ($Mydir ne ".") { chdir "$Mydir" || die "$Myname: can't chdir \"$Mydir\": $!"; }
chdir ".." || die "$Myname: can't chdir \"..\": $!";
$Mydir = `/bin/pwd`; chop $Mydir;
chdir $Here || die "$Myname: can't chdir \"$Here\": $!";

require "$Mydir/lib/util.pl";

$CVS_TOP = '';
$MAX_LABEL_FD = int(POSIX::sysconf( _SC_OPEN_MAX) / 2);


sub usage
{
  print <<_EOF_;
Usage: $Myname [ -h -i -l -t topdir -v ] conversiondir

    -b file  file listing the branch mappings
    -h       print this message
    -i       generate an incremental metadata file
    -l       generate file revision information on non-branch labels
    -t       look for files from here down (must be under \$CVS_MODULE)
    -v       increase level of verbosity (may be used mulitple times)
    --warn-nobranchmap  when using -b, warn of missing maps (default is exit)
_EOF_
  exit $_[0];
}

sub verbose {
    my $level = @_ > 1 ? shift : 1;
    print @_, "\n" if $level <= $V;
}

######
#
#  Perlstuff for parsing RCS repository files
#

#  Some globals used by these routines...
#
$Rcs_Inquote = 0;       # remembers when we're in a '@' quoted string
$Rcs_Eofatal = 1;       # die if we hit the end of the file
$Rcs_File    = "?";     # caller should set this for the error message

sub lead
{ if (defined($Myname)) { return "$Myname: "; } else { return ""; } }

sub rcsline
{
  my $line;
  my $m;

  $line = <RCS>;

  if ($line eq "")
    {
      if ($Rcs_Eofatal)
        {
	  $m = "unexpected eof on \"$Rcs_File\".";
          printf STDERR "%s$m\n", &lead();
	  exit 1;
        }
      else
        { return undef; }
    }  

  if ($line =~ /\r{0,1}\n$/) { $line =~ s/\r{0,1}\n$//; }
  return $line;
}
  
$Rcstok_Buf = "";
$Rcstok_pushed = undef;

#  Return the next token from the RCS repository file.
#  Caller should open the file on descriptor RCS.
#  (Caller should also empty $Rcstok_Buf!)

sub rcstok
{
  my $rcsstr;
  my $m;
  my $strpart;

  if (defined($Rcstok_pushed))
    {
      my $ret = $Rcstok_pushed;
      $Rcstok_pushed = undef;
      return $ret;
    }

  $Rcstok_Buf =~ s/^\s+//;

  if ($Rcstok_Buf eq "")
    {
      while (1)
        {
          $Rcstok_Buf = &rcsline();
          if (! defined ($Rcstok_Buf)) { return undef; }
          if ($Rcstok_Buf ne "") { last; }
        }
      $Rcstok_Buf =~ s/^\s+//;
    }

  #  num
  #
  if ($Rcstok_Buf =~ /^([0-9][0-9.]*)(.*)$/)
    { $Rcstok_Buf = $2; return $1; }

  #  : ; id
  #
  #  Note: the character class for "idchar" assumes all characters
  #  are printable ascii! May break with binary RCS files.
  #  (Actually, I've now convinced myself that there is no
  #  concern here).
  #
  if ($Rcstok_Buf =~ /^(:|;|[a-zA-Z][^ \t\r\n$,:;@]+)(.*)$/)
    { $Rcstok_Buf = $2; return $1; }

  #  string
  #
  if ($Rcstok_Buf =~ /^@(.*)$/)
    {
      $Rcstok_Buf = $1;
      $rcsstr = "";
      while (1)
        {
          if ($Rcstok_Buf eq "")
            {
              $rcsstr .= "\n";
              $Rcstok_Buf = &rcsline();
              if (! defined ($Rcstok_Buf)) { return undef; }
            }
          if ($Rcstok_Buf =~ /^([^@]+)(.*)$/)
            {
              $rcsstr .= $1;
              $Rcstok_Buf = $2;
              next;
            }
          if ($Rcstok_Buf =~ /^@@(.*)$/)
            {
              $rcsstr .= "@";
              $Rcstok_Buf = $1;
              next;
            }
          if ($Rcstok_Buf =~ /^@(.*)$/)
            {
              $Rcstok_Buf = $1;
              return $rcsstr;
            }
        }
    }

  $m = "rcstok(): internal error: \$Rcstok_Buf <$Rcstok_Buf>";
  printf STDERR "%s$m\n", &lead();
  exit 1;
}


sub dirname
{
  my ($dir) = @_;

  $dir =~ s%^$%.%; $dir = "$dir/";
  if ($dir =~ m%^/[^/]*//*$%) { return "/"; }
  if ($dir =~ m%^.*[^/]//*[^/][^/]*//*$%)
    { $dir =~ s%^(.*[^/])//*[^/][^/]*//*$%$1%; { return $dir; } }
  return ".";
}


sub skip_to_rcstok
{
  my ($this) = @_;
  my $tok;
  while (($tok = &rcstok()) ne $this) { };
}


sub skip_to_deltas
{
  my $tok;

  # Called after we encounter ";" for symbols. We must now skip:
  #
  #   locks      {id : num}*;  {strict  ;}
  #   { comment  {string}; }
  #   { expand   {string}; }
  #   { newphrase }*

  while (1)
    {
      my $fatalsave = $Rcs_eofatal;

      $tok = &rcstok();
      
      if ($tok eq "expand")
        {
          $RCS_expand = &rcstok();
          if (&rcstok() ne ";")
            { die "$Myname: skip_to_deltas(): expected ';' after expand."; }
          next;
        }
      if ($tok =~ /^[0-9]/) { $Rcstok_pushed = $tok; last; }
      while (1)
        {
          # Turning this off here because it looks like cvs problems can create
          # delta-less ,v files; this allows us to handle this.
          #
          $Rcs_Eofatal = 0; 
          $tok = &rcstok();
          $Rcs_Eofatal = $fatalsave;
          if (! defined($tok)) { return undef; }

          if ($tok eq ";") { last; }
        }
    }
  return 1;
}


sub test_rcstoks
{
  my $tok;

  open(RCS, "<$ARGV[1]") || die;
  $Rcstok_Buf = "";

  $Rcs_Eofatal = 0;
  while (defined($tok = &rcstok))
    { print "<$tok>\n"; }

  exit 1;
}


sub setrevs
{
  my($d_rev, $d_next, $d_branches, $d_date, $d_author, $d_state) = @_;
  my($b_rev);

  $RCS_Revs{$d_rev} = "$d_next:$d_branches";
  $d_date = "19$d_date" if length(( split( /\./, $d_date ))[0]) < 4;
  $RCS_Dates{$d_rev} = "$d_date";
  $RCS_Authors{$d_rev} = "$d_author";
  $RCS_States{$d_rev} = "$d_state";

  if ($d_rev =~ /^1\.1\.1\./)
    {
      # We have a "vendor" branch - spoof a branch tag for it.
      #
      $RCS_Branchtags{"import"} = "1.1.0.1";
      $RCS_Tags{"import"} = "1.1.0.1";
    }      

  if ($d_rev =~ /^[0-9]+\.[0-9]+$/)
    {
      $RCS_Prevs{$d_rev} = $d_next;
      if ($d_next) { $RCS_Nexts{$d_next} = $d_rev; }
    }
  else
    {
      if ($d_next) { $RCS_Prevs{$d_next} = $d_rev; }
      $RCS_Nexts{$d_rev} = $d_next;
    }
  foreach $b_rev (split(/ /, $d_branches))
    { $RCS_Prevs{$b_rev} = $d_rev; }
}


#  initialize RCS_Tags, RCS_Revs, (etc.) from an RCS ,v file.
#
sub set_RCS_revs
{
  my ($path, $do_texts) = @_;
  my $repfile;
  my $tag;
  my $rev;
  my $tok;
  my ($d_havedelta, $d_branches, $d_next, $d_rev);
  my $rcspath;
  my $msg;

  undef $RCS_Valid;
  undef $RCS_expand;
  undef $RCS_exec;
  undef %RCS_Tags;
  undef %RCS_Branchtags;
  undef %RCS_Revs;
  undef %RCS_States;
  undef %RCS_Authors;
  undef %RCS_Dates;
  undef %RCS_Prevs;
  undef %RCS_Nexts;
  undef $RCS_Branch;
  undef %RCS_Texts;

  ($Rcs_File = $path) =~ s%^.*/%%;

  $repfile = $path;

  $rcspath = "<$repfile";      

  if (! open(RCS, $rcspath))
    {
      printf STDERR "%scan't open \"$rcspath\": $!\n", &lead();
      return 0;
    }

  if (-x $repfile) { $RCS_exec = "x"; }

  $Rcstok_Buf = "";

#while (1) { $tok = &rcstok(); print "<$tok>\n"; }

  &skip_to_rcstok("head");

#  #  (If the RCS repository is in the Attic there is logically no "head"
#  #  revision for this file)
#  #
#  if ($path !~ /Attic\/.+,v$/) { $RCS_Tags{"head"} = &rcstok(); }

  $RCS_Tags{"head"} = &rcstok();

  $tok = &rcstok(); $tok = &rcstok();

  if ($tok eq "branch") { $RCS_Branch = &rcstok(); }

  &skip_to_rcstok("symbols");

  while (1)
    {
      $tok = &rcstok();
      if ($tok eq ";") { last; }
      $tag = $tok; &rcstok(); $rev = &rcstok();
      $RCS_Tags{$tag} = $rev;
      if ($rev =~ /\.0\.[0-9]+$/) { $RCS_Branchtags{$tag} = $rev; }
    }

  if (&skip_to_deltas() == undef) { return undef; }

  $d_rev = ""; $d_havedelta = 0; $d_branches = ""; $d_next = "";

  while (1)
    {
      $tok = &rcstok();
      if ($tok =~ /[0-9.]+/)
        {
	  if ($d_havedelta)
            { &setrevs($d_rev, $d_next, $d_branches, $d_date, $d_author, $d_state); }
          $d_rev = $tok; $d_havedelta = 1; $d_branches = ""; $d_next = "";
        }
      elsif ($tok eq "branches")
        {
          while (1)
            {
              if (($tok = &rcstok()) eq ";") { last; }
              if ($d_branches ne "") { $d_branches .= " "; }
              $d_branches .= $tok;
            }
        }
      elsif ($tok eq "date")   { $tok = &rcstok(); if ($tok ne ";") { $d_date = $tok; &rcstok(); } }
      elsif ($tok eq "author") { $tok = &rcstok(); if ($tok ne ";") { $d_author = $tok; &rcstok(); } }
      elsif ($tok eq "state")  { $tok = &rcstok(); if ($tok ne ";") { $d_state = $tok; &rcstok(); } }
      elsif ($tok eq "next")   { $tok = &rcstok(); if ($tok ne ";") { $d_next = $tok; &rcstok(); } }
      elsif ($tok eq "desc") { last; }
      else { &skip_to_rcstok(";"); }
    }

  if ($d_havedelta)
    { &setrevs($d_rev, $d_next, $d_branches, $d_date, $d_author, $d_state); }

  if (! defined($do_texts)) { close RCS; $RCS_Valid = 1; return 1; }

  $Rcs_Eofatal = 0;
  while (1)
    {
      $tok = &rcstok();
      if (! defined($tok)) { last; }
      if ($tok =~ /[0-9.]+/)
        { $d_rev = $tok; }
      elsif ($tok eq "text")
        {
          $msg = &rcstok();
          if ($msg eq "") { $RCS_Texts{$d_rev} = 0; } else { $RCS_Texts{$d_rev} = 1; }
        }
      elsif ($tok eq "log")
        { $log = &rcstok(); $RCS_Logs{$d_rev} = $log; }
    }
  $Rcs_Eofatal = 1;

  close RCS;
  $RCS_Valid = 1;
  return 1;
}


sub rcs_tip
{
  my ($rev) = @_;
  my $next;

  #  Find the tip of the branch...
  #
  while (1)
    {
      if (! defined($RCS_Revs{$rev}))
        { return "???"; }
      ($next) = split(/:/, $RCS_Revs{$rev});
      if ($next eq "") { return $rev; }
      $rev = $next;
    }
}


#  given a "CVS line spec" (revision #, "head", or a tag)
#
sub rev_on_line
{
  my($line) = @_;

  if ($line eq $TRUNKLINE)
    { $line = "head"; }
#  else
#    { $line = "${line}_BRANCH"; }

  if (defined($RCS_Tags{$line})) { $line = $RCS_Tags{$line}; }
  elsif ($line !~ /^[0-9.]+$/) { return "none"; }
  if ($line =~ /\.0\.([0-9]+)$/)
    {
      # It's a CVS branch revision number... demunge it:
      #   
      $line =~ s/\.0(\.[0-9]+)$/$1/;
      
      #  OK, see whether the branch actually exists:
      #  (We have an assumption here that first rev is always ".1")
      #
      $line = "$line.1";
      if (! defined($RCS_Revs{$line}))
        {
          # Nope, so fall back to the root, which we know to be an
          # existing revision...

          $line =~ s/\.[0-9]+\.[0-9]+$//;
          return $line;
        }

      # Yep, the branch exists; so it *is* a branch; so, we go out to
      # the tip. (Right?)
      #
      return &rcs_tip($line);
    }         

  #  OK, do we have an RCS branch or an RCS revision number?  (count
  #  the dots)
  #
  if (($line =~ tr/\././) % 2)
    {
      #  An odd number of dots... it's a revision number
      #    
      if (defined($RCS_Revs{$line})) { return $line; }

      return "none"; # Or should we assert?
    }
  else
    {
      #  An even number of dots... it's a branch number
      #  (We have an assumption here that first rev is always ".1")
      #
      return &rcs_tip("$line.1");
    }
}


#  Is rev "$this" < rev "$that"?
#  Note: "" is considered infinitely high
#        revs must be of the same order (I.e., same # of "."s)
#
sub rev_lt
{
  my($this, $that) = @_;
  my(@this, @that);

  if (! $that) { return 1; }

  @this = split(/\./, $this);
  @that = split(/\./, $that);

  while (1)
    {
      $this_n = shift(@this);
      $that_n = shift(@that);

      if ($this_n < $that_n) { return 1; }
      if ($this_n > $that_n) { return 0; }
      if ($#this < 0) { return 0; }
    }
}      
  

#  Note: "" is considered infinitely high
#
sub linerev_gt
{
  my($this, $that) = @_;
  my $ret; 

  if (! $that)
    { $ret =  1; }
  else
    {
      my $thisord, $thatord;
      $thisord = ($this =~ tr/\././);
      $thatord = ($that =~ tr/\././);
      if ($thisord < $thatord)
        { $ret = 1; }
      elsif ($thisord > $thatord)
        { $ret = 0; }
      else
        { $ret = &rev_lt($that, $this); }
    }
  return $ret;
}


# Maximum size for a log message we'll keep.
# Messages beyond this get truncated, to accomodate a limitation
# on the key/value pair size in ndbm. That's life.
#
$MAXSZ = 256*3;

#  Generate the metadata for a single file
#
sub dofile
{
  local($dir, $file) = @_;
  my $reason = "";

  if ($file !~ /,v$/)
    { $reason = "not an RCS file"; }
  elsif ($IGNOREFILES && $file =~ /$IGNOREFILES/)
    { $reason = "matches IGNOREFILES pattern \"$IGNOREFILES\""; }
  elsif ($file =~ /[\000-\037\177-\377]/)
    { 
      $reason = "non-printable characters in file name";
      my $newfile = "";
      $l = length($file);
      for ($i = 0; $i <= $l; $i++)
        {
          $c = substr($file, $i, 1);
          if ($c =~ /[\000-\037\177-\377]/)
            { $newfile .= sprintf "\\%03o", ord($c); }
          else
            { $newfile .= "$c"; }
        }
      $file = $newfile;
    }
  elsif ($file =~ /\#|\%|\*|\@|\.\.\./)
    {
      $reason = "illegal Perforce characters in file name";
    }

  if ($reason)
    {
      print "ignore: $file\n";
      print "reason: $reason\n";
      return;
    }

    
#if ($file ne "rm.c,v") { return; }

  undef %RCS_lines;
  undef %RCS_Branches;
  undef $Firstusedrev;

  # This parses the RCS information from the ,v file, filling
  # in various data structures that we use, below.
  #
  if (&set_RCS_revs("$dir/$file", 0) == undef) { return; } # empty ,v

  # What RCS keyword expansion options are in effect?
  # (We use this to detect binary files)
  #
  $options = "${RCS_expand}$RCS_exec";
  if (! $options) { $options = "-"; }

  @path = split(/\//, "$dir/$file");
  $file = pop(@path);
  $file =~ s/,v$//;
  if ($path[$#path] eq "Attic") { pop @path; }
  $dir = join("/", @path);
  $path = sprintf("%s%s%s", $dir, $dir ? "/" : "", $file);

  $path =~ s/^$CVS_MODULE//;
  print "$path\n";

  # For all of the branches we see, store the tip revision in
  # $HAVELINES{$line}; this is also where we weed out
  # codelines we are not interested in.
  #
  foreach $line ((keys %RCS_Branchtags), $TRUNKLINE)
    {
      $no_flash_line = $line;
      if (defined($BRANCH_FLASH)) { $no_flash_line =~ s/$BRANCH_FLASH$//; }
      if ($WANTLINES && ! (defined($WANTLINES{$no_flash_line}))) { next; }
      if (($tiprev = &rev_on_line($line)) eq "none") { next; }
      $HAVELINES{$line} = $tiprev;
      my $br = $line eq $TRUNKLINE ? "main" : $line;
      $TIPS{"$path$S$br"} = $tiprev;
    }

  #  Now we go through each line, to build a list of the RCS revs that
  #  need to be exported into the metadata stream.
  #
  while (1) # We have more lines to deal with...
    {
      (@k) = (keys %HAVELINES);
      if ($#k < 0) { last; }
      my $theline;

      #  Choose the highest numbered line of the lowest "order" for
      #  the next one to export... this will always pickup lines on
      #  branches nearer the trunk first, so the subsequent branches
      #  will have a place to branch from!
      #
      foreach $k (@k)
        {
          if ($k eq $TRUNKLINE) { $theline = $k; last; }

          #  if both lines select the *same* revision...
          #
          if ($HAVELINES{$k} eq $HAVELINES{$theline})
            {
              # ...take the first SEEN line one before all else
              my $SEEN_line = $SEEN{"$path/$HAVELINES{$theline}"}->{line} || '';
              $SEEN_line =~ s/$S.*//;

              if ($theline eq $SEEN_line)
                { next; }
              if ($k eq $SEEN_line)
                { $theline = $k; next }

              # ...otherwise, take the one with the lower branch tag order first
              #
              if (&linerev_gt($RCS_Branchtags{$k}, $RCS_Branchtags{$theline}))
                { $theline = $k; next; }
            }

          if (&linerev_gt($HAVELINES{$k}, $HAVELINES{$theline}))
            { $theline = $k; }
        }

      $rev = $tiprev = &rev_on_line($theline);

      $t = $theline;
      if (defined($BRANCH_FLASH))
        { $t =~ s/$BRANCH_FLASH$//; }

      #  This is where we build the list of codelines we've encountered.
      #
      $All_lines{$t} = 1;

      if (  (defined($RCS_lines{$rev})) &&
            ($theline ne $TRUNKLINE) &&
            (($rev =~ tr/\././) < ($RCS_Branchtags{$theline} =~ tr/\././)))
        {
          if ($RCS_Branches{$rev}) { $RCS_Branches{$rev} .= ":"; }
          $RCS_Branches{$rev} .= $t;
        }
      else
        {
          while ($rev && ! defined($RCS_lines{$rev}))
            {
              $RCS_lines{$rev} = $theline;
              $rev = $RCS_Prevs{$rev};
            }
          if ($rev)
            {
              if ($RCS_Branches{$rev}) { $RCS_Branches{$rev} .= ":"; }
              $RCS_Branches{$rev} .= $t;
            }
        }

      # We test for "if $rev" here cause it may have gone null if the while loop
      # above ran off the end...
      #
      if ($rev && (($rev =~ tr/\././) == 1) && &rev_lt($rev, $Firstusedrev))
        { $Firstusedrev = $rev }

      delete $HAVELINES{$theline};
    }        

  #  OK, we have the set of revisions to export - write them to the
  #  metadata stream.
  #
  foreach $rev (keys %RCS_Revs)
    {
      my $revkey = "$path/$rev";

      $state = $RCS_States{$rev};
      $author = $RCS_Authors{$rev};
      $date = $RCS_Dates{$rev};
      my ($yr, $mo, $da, $hr, $mi, $se) = split (/\./, $date);
      $date = timegm($se,$mi,$hr,$da,$mo - 1,$yr);
      $line = $RCS_lines{$rev};
      if (defined($BRANCH_FLASH)) { $line =~ s/$BRANCH_FLASH$//; }
      $branches = $RCS_Branches{$rev};

      #  Detect revisions before the first branch point, and
      #  omit them if we're not doing ALLTHEWAYBACK.
      #
      if ( (! $ALLTHEWAYBACK)
           && ($line eq $TRUNKLINE)
           && ($rev ne $RCS_Tags{"head"})
           && $Firstusedrev
           && &rev_lt($rev, $Firstusedrev))
             { next; }

      if (! $line) { next; }
      if (! $branches) { $branches = "-"; }

      if ((! $ALLTHEWAYBACK) && $rev eq $Firstusedrev)
        { $prevrev = "-"; }
      elsif ($RCS_Prevs{$rev})
        { $prevrev = $RCS_Prevs{$rev}; }
      else
        { $prevrev = "-"; }

      $All_lines{$line} = 1;

      #  MAXSZ derives from a ndbm limitation on the size of a key/entry pair.
      #  at (256*3) it allows for a $revkey up to 250 chars or so.
      #
      $logmsg = substr($RCS_Logs{$rev}, 0, $MAXSZ);
      if ($logmsg !~ /\n$/) { $logmsg .= "\n"; }

      $md5sum = md5_base64($logmsg);

      if (length($logmsg)+length($md5sum) > 1010)
        {
          warn "$Myname: md5sum + log too long for <$revkey>\n";
          warn "$Myname: truncating\n";
          $logmsg = substr($logmsg, 0, 1010-23). "\n";
	}

      $MSGS{$md5sum} = $logmsg;

      # Added the file to the list.
      #
      if (!$SEEN{$revkey}->{line}) {
	push_revs($revkey, $date, $author, $state, $line,
			$branches, $prevrev, $options, $md5sum);
      } else {
	my ($seen_line, $seen_branches) =
			($SEEN{$revkey}->{line}, $SEEN{$revkey}->{branches});
	verbose 2, "skipping: $revkey\nAlready seen in an incremental";
        verbose 3, "checking: $seen_branches vs. $branches";
	if ($seen_branches ne $branches) {
            %BR = map { $_ => 1 } split ":", $branches;
            for my $seen (split ":", $seen_branches) {
                delete $BR{$seen};
            }
            # XXX - Use this kludge of prepending "-:" to the branches to
            # indicate later that you don't have to re-add/edit the $line
            # file.
            # NOTE - this will fail if you delete a branch during one
            # incremental run and then re-add it during another.
            my $newbr = join ":", "-", keys %BR;
            push_revs($revkey, $date, $author, $state, $line,
		      $newbr, $prevrev, $options, $md5sum
		     ) if $newbr ne '-' and $newbr ne '-:-';
	}
      }
      $SEEN{$revkey} = {line => $line, branches => $branches};
      dumpLabels($path, $rev, $line, \%RCS_Tags)
                                if $labels and $state ne 'dead';
    }
}


sub push_revs {
    my ($revkey, $date, $author, $state, $line,
	$branches, $prevrev, $options, $md5sum) = @_;
    my $need_edit = 1;
    $need_edit = 0 if $branches =~ m/^-:/;
    my @defaults = ();
    my %mapped = ();
    for my $br (split ":", $branches) {
	next if $br eq '-';
	my $info = $BRANCHES{$br};
	if ($bfile and !$info) {
	    warn "warning: using branch mappings but no mapping found for ",
	         "$br. Is it new?\nfile checked: $revkey\n";
	    die "can't continue\n" if !$warnOnNobranchmap;
	    $info = {};
	}
	if (!$info->{author}) {
	    push @defaults, $br;
	} else {
	    $mapped{$br} = $info;
	}
    }

    if ($need_edit || @defaults) {
	my $br = $need_edit ? '' : '-:';
	$br .= join ':', @defaults;
	push(@Meta, join("$S",
		     $revkey, $date, $author, $state, $line,
		     $br || '-', $prevrev, $options, $md5sum
		    ));
    }
    for my $other (keys %mapped) {
	my $info = $mapped{$other};
	$author = $info->{author};
	$date = $info->{date};
	$md5sum = $info->{md5sum};
	push(@Meta, join("$S",
		     $revkey, $date, $author, $state, $line,
		     "-:$other", $prevrev, $options, $md5sum
		    ));
    }
}

sub dumpLabels {
    my ($path, $rev, $line, $tags) = @_;
    verbose 2, "dumping labels for $path/$rev $line";
    for my $label (keys %$tags) {
        next if $label eq 'head';
        if ($tags->{$label} eq $rev) {
            verbose 3, "found $label for $path/$rev on line $line";
	    _dumpLabel($label, "$path/$rev", $line);
	}
    }
}

sub _dumpLabel {
    my ($label, $pathrev, $line) = @_;
    my $fh = _get_label_fh($label);
    $fh->print("$pathrev$S$line\n");
}

%FHS = ();
%TS = ();

sub _get_label_fh {
    my ($label) = @_;

    if (!exists($FHS{$label})) {
	my $file = "$CVSLabels/$label";
	if (keys %FHS >= $MAX_LABEL_FD) {
	    my ($oldest) = sort { $TS{$a} <=> $TS{b} } keys %TS;
	    verbose 3, "too many open files.  closing oldest: $oldest";
	    close $oldest;
	    delete $FHS{$oldest};
	    delete $TS{$oldest};
	}
	$FHS{$label} = new IO::File or die "couldn't open $file: $!\n";
	open($FHS{$label}, ">>$file") or die "couldn't open $file: $!\n";
    }
    $TS{$label} = time();
    $FHS{$label};
}


# option switch variables get defaults here...

$help = 0;
$doIncremental = 0;
$labels = 0;
$V = 0;
$bfile = '';
%BRANCHES = ();
$warnOnNobranchmap = 0;

use Getopt::Long;
GetOptions(
    "branches=s"        => \$bfile,
    "help"              => \$help,
    "incremental"       => \$doIncremental,
    "labels"            => \$labels,
    "top=s"             => \$CVS_TOP,
    "verbose+"          => \$V,
    "warn-nobranchmap"  => \$warnOnNobranchmap,
) || usage(1);
$help && usage(0);

$Convdir = shift || usage(1);

$Metadata  = "$Convdir/metadata";
$Logmsgs   = "$Convdir/logmsgs";
$Tips      = "$Convdir/tips";
$Changes   = "$Convdir/changes";
$Donelog   = "$Convdir/donelog";
$Revmap    = "$Convdir/revmap";
$Revlog    = "$Convdir/revlog";
$Seen      = "$Convdir/seen";
$Clientdir = "$Convdir/p4";
$CVSLabels = "$Convdir/labels/cvs";
$P4Labels  = "$Convdir/labels/p4";

require "$Convdir/config";

if ($CVS_TOP) {
    $CVS_TOP =~ m|$CVS_MODULE| or die "$CVS_TOP not under $CVS_MODULE\n";
    $CVS_TOP =~ s|/$||;
}
if ($CVS_MODULE !~ m|/CVSROOT/?$|) {
    warn "\$CVS_MODULE $CVS_MODULE doesn't end in CVSROOT\n";
}

unless ($doIncremental) {
    &s("rm -rf $Logmsgs.dir $Logmsgs.pag $Tips.dir $Tips.pag $Seen* $Donelog" .
	" $Metadata* $Changes* $Clientdir $Revlog $Revmap.dir $Revmap.pag" .
        " $Convdir/labels");
} else {
    $md = $Metadata;
    $cnt = 1;
    while (-e $md) {
	$md = $Metadata . "." . ++$cnt;
    }
    $Metadata = $md;
}
if ($labels) {
    verbose 1, "only using $MAX_LABEL_FD concurrent open label files";
    &s("rm -rf $Convdir/labels");
    mkdir "$Convdir/labels", 0777 or die "can't make $Convdir/labels: $!\n";
    mkdir "$CVSLabels", 0777 or die "can't make $CVSLabels: $!\n";
    mkdir "$P4Labels", 0777 or die "can't make $P4Labels: $!\n";
}


if (! dbmopen(MSGS, $Logmsgs, 0666))
  { print "$Myname: can't dbmopen \"$Logmsgs\": $!\n"; exit 1; }
if (! dbmopen(TIPS, $Tips, 0666))
  { print "$Myname: can't dbmopen \"$Tips\": $!\n"; exit 1; }

%SEEN = ();
if (open(SEEN, "<$Seen")) {
    while (<SEEN>) {
        chomp;
	my ($k, $l, $b) = split /$S/;
        $SEEN{$k} && die "Why am I reading '$k' in twice";
	$SEEN{$k} = {line => $l, branches => $b};
    }
    close SEEN;
}

if ($bfile) {
    open BRANCHES, "<$bfile" or die "can't open $bfile: $!\n";
    while (<BRANCHES>) {
        chomp;
        next if /^#/;
        next if /^\s*$/;
        my ($p4branch, $cvsbranch, @rest) = split /\s+/;
        $p4branch =~ s|/$||;
        $p4branch =~ s|^/|| unless $p4branch =~ m|^//|;
        $cvsbranch = $p4branch unless $cvsbranch;
        verbose "mapping branch $cvsbranch to $p4branch";
	die "not enough fields in map" unless !@rest or @rest >= 3;
	# GMR - prepend 42 to the date for now - ick but we need them last.
	my %info = (author => shift @rest,
		    date   => (42 . shift @rest));
	my $logmsg = join " ", @rest;
	$logmsg =~ s/^"|"$//g;
	$logmsg .= "\n";

	my $md5sum = md5_base64($logmsg);

	if (length($logmsg)+length($md5sum) > 1010)
	  {
	      warn "$Myname: md5sum + log too long for branchmap: $cvsbranch\n";
	      warn "$Myname: truncating\n";
	      $logmsg = substr($logmsg, 0, 1010-23). "\n";
	  }

	$MSGS{$md5sum} = $logmsg;
	$info{md5sum} = $md5sum;
        $BRANCHES{$cvsbranch} = \%info;
    }
    close BRANCHES;
}

&traverse($CVS_TOP || $CVS_MODULE, 0, "dofile");

sub metasort
{
  my @a = split(/$S/, $a);
  my @b = split(/$S/, $b);

  if ($a[0] eq $b[0]) {
      # Favour main edit otherwise base it on date (and secondarily branches).
      my $a_br = $a[5];
      my $b_br = $b[5];
      return -1 if $a_br !~ m/^-:/;
      return  1 if $b_br !~ m/^-:/;
      
      my $a_date = $a[1];
      my $b_date = $b[1];
      return $a_date <=> $b_date if $a_date != $b_date;
      return $a_br cmp $b_br;
  }
  if ($a[1] != $b[1]) { return $a[1] <=> $b[1]; }

  $a[0] =~ s/^(.*)\///; my $apath = $1;
  $b[0] =~ s/^(.*)\///; my $bpath = $1;

  if ($apath ne $bpath) {
    # field 6 is the previous revision - favour files without one
    if ($a[6] eq "-" ^ $b[6] eq "-") {
	return $a[6] eq "-" ? -1 : 1;
    }
    return $apath cmp $bpath;
  }

  @aa = split(/\./, $a[0]);
  @bb = split(/\./, $b[0]);

  for (my $i = 0; $i <= $#aa; $i=$i+2)
    {
      if (! defined($bb[$i])) { return 1; }   # a has more positions, thus greater
      if ($aa[$i] < $bb[$i])  { return -1; }  # a is less than b, thus less
      if ($aa[$i] > $bb[$i])  { return 1; }   # and vice-versa

      # if they are equal, we look to the next position:
      # 
      if (! defined($aa[$i+1]))
        { die "impossible sort key (RCS rev) \"$a[0]\"?\n"; }
      if (! defined($bb[$i+1]))
        { die "impossible sort key (RCS rev) \"$b[0]\"?\n"; }
      if ($aa[$i+1] < $bb[$i+1])  { return -1; }  # a is less than b, thus less
      if ($aa[$i+1] > $bb[$i+1])  { return 1; }   # and vice-versa

      # Otherwise, we go on to the next level...
    }     

  if ($#bb > $#aa) { return -1; }

  # GMR - should check for Attic dups earlier now that we allow dups.
  die "impossible equal sort keys:\n  <$a>\n  <$b>\n" .
      "it is possible that there is a duplicate in the Attic\n";

}
  
print "sorting\n";
@Metasorted = sort metasort @Meta;
print "sotrted\n";

unless (@Metasorted) {
  verbose "no new revisions so nothing being saved in \"$Metadata\"";
} else {
  if (! open(META, ">$Metadata"))
    { print "$Myname: can't open \">$Metadata\": $!\n"; exit 1; }

  foreach my $m (@Metasorted) { print META "$m\n"; }
  close META;
}

dbmclose MSGS;
dbmclose TIPS;

open(SEEN, ">$Seen") or die "can't open $Seen: $!\n";
while (my ($k, $v) = each %SEEN) {
    print SEEN "$k$S$v->{line}$S$v->{branches}\n"
			or die "can't write to $Seen: $!\n";
}
close SEEN or die "can't close $Seen: $!\n";

for my $fh (values %FHS) {
    $fh->close();
}
    
$Lines  = "$Convdir/lines";
if (! open(LINES, ">$Lines"))
  { print "$Myname: can't open \">$Lines\": $!\n"; }
else
  {
    print       "===== Lines referenced:\n";
    print LINES "===== Lines referenced:\n";
    foreach $line (sort keys %All_lines)
      {
        print       "$line\n";
        print LINES "$line\n";
      }
    close LINES;
  }

exit 0;
#	Change	User	Description	Committed
#1	882	Matthew Rice	Matt's cvs2p4 1.2.35 release