package VCP::Rev;

=head1 NAME

VCP::Rev - VCP's concept of a revision

=head1 SYNOPSIS

   use VCP::Rev;

   use VCP::Rev qw( iso8601format );

   my $r = VCP::Rev->new;

=head1 DESCRIPTION

A data structure that represents a revision to a file (but, technically,
not a version of a file, though the two are often synonymous).

=head1 METHODS

=over

=cut

$VERSION = 1 ;

@EXPORT_OK = qw( iso8601format );
use Exporter ();
*import = \&Exporter::import;
*import = \&Exporter::import;

use strict ;

use Carp ;
use VCP::Debug ':debug' ;
use VCP::Utils 'empty' ;
use vars qw( %FIELDS ) ;

use fields (
   ##
   ## RevML fields:
   ##
   'ID',               ## A unique identifier for the rev
   'NAME',             ## The file name, relative to REV_ROOT
   'SOURCE_NAME',      ## immutable field, initialized to NAME
   'SOURCE_FILEBRANCH_ID', ## immutable field, initialized to NAME or NAME<branch_number> for cvs
   'SOURCE_REPO_ID',   ## immutable field, initialized to <repo_type>:<repo_server>
   'TYPE',             ## Type.  Binary/text.  Need to stdize the values here
   'BRANCH_ID',        ## What branch this revision is on
   'SOURCE_BRANCH_ID', ## immutable field initialized to BRANCH_ID
   'REV_ID',           ## The source repositories unique ID for this revision
   'SOURCE_REV_ID',    ## immutable field initialized to REV_ID
   'CHANGE_ID',        ## The unique ID for the change set, if any
   'SOURCE_CHANGE_ID', ## immutable field initialized to CHANGE_ID
   'P4_INFO',          ## p4-specific info.
   'CVS_INFO',         ## cvs-specific info.
   'STATE',            ## The state (CVS specific at the moment).
   'TIME',             ## The commit/submit time, if available, as a simple number
   'MOD_TIME',         ## The last modification time, if available
   'USER_ID',          ## The submitter/commiter of the revision
   'LABELS',           ## A HASH, keys are tags/labels assoc. with this rev.
   'COMMENT',          ## The comment/message for this rev.
   'ACTION',           ## What was done ('edit', 'move', 'delete', etc.)
   'PREVIOUS_ID',      ## The id of the preceding version
   
   ##
   ## Internal fields: used by VCP::* modules, but not present in RevML files.
   ##
   'WORK_PATH',        ## Where to find the revision on the local filesys
   'DEST_WORK_PATH',   ## Where to find the rev on local fs if it was backfilled
   'VCP_SOURCE_SCM_FN',## Non-normalized name of the file, meaningful only to
                       ## a specific VCP::Source
   'PREVIOUS',         ## A reference to the preceding version, if any

   'AVG_COMMENT_TIME', ## Calculated by VCP::Dest for sorting purposes
   'SORT_TIME',        ## When TIME is missing (think VSS), we need
                       ## to kludge in a time without passing that
                       ## time to the dest repository.
) ;


BEGIN {
   ## Define accessors.
   my %dont_do = (
      WORK_PATH            => undef,
      DEST_WORK_PATH       => undef,
      ID                   => undef,
      NAME                 => undef,
      COMMENT              => undef,
      LABELS               => undef,
      PREVIOUS             => undef,
      REV_ID               => undef,
      CHANGE_ID            => undef,
      SOURCE_NAME          => undef,
      SOURCE_REV_ID        => undef,
      SOURCE_CHANGE_ID     => undef,
      SOURCE_FILEBRANCH_ID => undef,
      SOURCE_REPO_ID       => undef,
   );

   my @funcs;
   for ( grep !exists $dont_do{$_}, keys %FIELDS ) {
      my $f = lc( $_ ) ;
      push @funcs,
         qq{
            #line 1 'VCP::Rev::$f autogenerated accessor'
            sub $f {
               my VCP::Rev \$self = shift ;
               confess "too many parameters passed" if \@_ > 1 ;
               \$self->{$_} = shift if \@_ == 1 ;
               return \$self->{$_} ;
            } 
         };
   }

   eval( join "", @funcs, 1 ) or die $@;
}


# Because names and comments have so much duplication, we store them
# in hashes and refer to those hashes.  Each element is actually
# an ARRAY in which we store both the name and a rank.  Once all names
# have been read, we sort the names and assign the first name the
# rank of 0, etc.  This allows for very fast sorting.
my %names;
my %comments;

# Same goes for rev_ids and change_ids, which are not really much of
# a space savings over storing the strings, but this allows us to
# treat each rev_id once and only once, instead of once per file,
# for instance, which is valuable when many files can have the same
# rev_id.  This saves both processor and memory.
my %ids;

#END {
#   print "names:         " . keys %names,    "\n";
#   print "comments:      " . keys %comments, "\n";
#   print "ids:           " . keys %ids,      "\n";
#   print "comment bytes: ", length( join "", keys %comments ), "\n";
#}

# cache repo_ids too.
my %repo_ids;


sub name {
   my VCP::Rev $self = shift ;
   confess "too many parameters passed" if @_ > 1 ;
   $self->{NAME} = $names{$_[0]} ||= [ $_[0], undef ]
      if @_ == 1 ;
   return $self->{NAME}->[0];
}


sub source_name {
   my VCP::Rev $self = shift ;
   confess "too many parameters passed" if @_ > 1 ;
   $self->{SOURCE_NAME} = $names{$_[0]} ||= [ $_[0], undef ]
      if @_ == 1 ;
   return $self->{SOURCE_NAME}->[0];
}


sub source_filebranch_id {
   my VCP::Rev $self = shift ;
   confess "too many parameters passed" if @_ > 1 ;

   # TODO: is this still the case?
   # for most repositories (except cvs) the filebranch_id will be the
   # same as the name, so cache it there.
   $self->{SOURCE_FILEBRANCH_ID} = $names{$_[0]} ||= [ $_[0], undef ]
      if @_ == 1 ;
   return $self->{SOURCE_FILEBRANCH_ID}->[0];
}


sub source_repo_id {
   my VCP::Rev $self = shift ;
   confess "too many parameters passed" if @_ > 1 ;

   $self->{SOURCE_REPO_ID} = $repo_ids{$_[0]} ||= [ $_[0], undef ]
      if @_ == 1 ;
   return $self->{SOURCE_REPO_ID}->[0];
}


sub comment {
   my VCP::Rev $self = shift ;
   confess "too many parameters passed" if @_ > 1 ;
   if ( @_ == 1 ) {
      $self->{COMMENT} = defined $_[0]
          ? $comments{$_[0]} ||= [ $_[0], undef ]
          : [ undef, 0 ];
   }
   return $self->{COMMENT}->[0];
}


sub rev_id {
   my VCP::Rev $self = shift ;
   confess "too many parameters passed" if @_ > 1 ;
   $self->{REV_ID} = $ids{$_[0]} ||= [ $_[0], undef ]
      if @_ == 1 ;
   return $self->{REV_ID}->[0];
}


sub source_rev_id {
   my VCP::Rev $self = shift ;
   confess "too many parameters passed" if @_ > 1 ;
   $self->{SOURCE_REV_ID} = $ids{$_[0]} ||= [ $_[0], undef ]
      if @_ == 1 ;
   return $self->{SOURCE_REV_ID}->[0];
}


sub change_id {
   my VCP::Rev $self = shift ;
   confess "too many parameters passed" if @_ > 1 ;
   $self->{CHANGE_ID} = $ids{$_[0]} ||= [ $_[0], undef ]
      if @_ == 1 ;
   return $self->{CHANGE_ID}->[0];
}


sub source_change_id {
   my VCP::Rev $self = shift ;
   confess "too many parameters passed" if @_ > 1 ;
   $self->{SOURCE_CHANGE_ID} = $ids{$_[0]} ||= [ $_[0], undef ]
      if @_ == 1 ;
   return $self->{SOURCE_CHANGE_ID}->[0];
}


sub labels {
   my VCP::Rev $self = shift ;
   if ( @_ ) {
      $self->{LABELS} = {} ;
      @{$self->{LABELS}}{@_} = (1) x @_ ;
   }
   return $self->{LABELS} ? sort keys %{$self->{LABELS}} : () ;
}


sub _split_name {
   shift;
   local $_ = $_[0];
   return ()     unless defined ;
   return ( "" ) unless length ;

   s{\A[\\/]+}{};
   s{[\\/]+\z}{};

   return split qr{[\\/]+};
}

=item split_id

   VCP::Rev->split_id( $id );

Splits an id in to chunks on punctuation and number/letter boundaries.

   Id           Result
   ==           ======
   1            ( 1 )
   1a           ( 1, "a" )
   1.2          ( 1, "", 2 )
   1a.2         ( 1, "a", 2 )

This oddness is to facilitate manually named revisions that use a
lettering scheme.  Note that the sort algorithms make an assumption that
"1.0a" is after "1.0".  This prevents kind of naming like "1.2pre1".

=cut

sub split_id {
   shift;
   for ( $_[0] ) {
      return ()     unless defined ;
      return ( "" ) unless length ;

      my @r = map /(\d*)(\D*)/, split /[^[:alnum:]]+/;
      pop @r while @r && ! length $r[-1];
      return @r;
   }
}

=item join_id

   VCP::Rev->join_id( @id );

Joins an id's chunks back to being an id in dotted format.

=cut

sub join_id {
   shift;
   my @in = ref $_[0] ? @{shift()} : @_;
   my @out;
   while ( @in ) {
      my $num = shift @in;
      $num .= shift @in if @in;
      push @out, $num;
   }

   return join ".", @out;
}


=item cmp_id

   VCP::Rev->cmp_id( $id1, $id2 );
   VCP::Rev->cmp_id( \@id1, \@id2 );  # for presplit ids

splits $id1 and $id2 if necessary and compares them using C<< <=> >> on
even numbered elements and C<cmp> on odd numbered elements.

=cut

sub cmp_id {
   my $self = shift;
   Carp::confess unless UNIVERSAL::isa( $self, __PACKAGE__ );

   my @a = ref $_[0] ? @{$_[0]} : $self->split_id( $_[0] );
   my @b = ref $_[1] ? @{$_[1]} : $self->split_id( $_[1] );

   my ( $A, $B, $r );
   while ( 1 ) {
      last unless @a && @b;
      ( $A, $B ) = ( shift @a, shift @b );
      $r = $A <=> $B;
      return $r if $r;

      last unless @a && @b;
      ( $A, $B ) = ( shift @a, shift @b );
      $r = $A cmp $B;
      return $r if $r;
   }

   return @a <=> @b;
}


=item sort_time

When some revisions come without a time field, as in VSS, the sort
algorithm needs to plug in a "best guess" time to facilitate sorting.

If no time (or a time of 0) is set, the sort_time field is used instead,
if set.

=cut

# sort time is autognerated


=item preindex

NOTE: A function.

This is called from sort_revs() to rank certain fields by sorting them
and using numbers to represent their sort order.  This is both a speed
and a memory optimization.

=cut

# Called after last rev is added, before doing any sorting.
sub preindex {
   my $rank = 0;
   $comments{$_}->[1]    = $rank++ for sort keys %comments;

   {
      # names are more work: we split them in to segments and do a segment
      # oriented sort.
      my @names = values %names;
      $_->[1] = [ VCP::Rev->_split_name( $_->[0] ) ] for @names;
      $rank = 0;
      $_->[1] = $rank++ for sort {
         my @a = @{$a->[1]};
         my @b = @{$b->[1]};

         my $r = 0;
         $r = shift( @a ) cmp shift( @b )
            while ! $r && @a && @b;

         $r || @a <=> @b;
      } @names;
   }

   {
      # ids are more work yet: we split them in to segments, pack()
      # all segments back in to a single string, and use that string
      # as the sort criterion, then replace the sort criterion with
      # the rank.
      my @max_lengths;
      my @ids = values %ids;
      for ( @ids ) {
         ## TODO: Store the revision type somewhere and use it instead of
         ## VCP::Rev
         my @segments = VCP::Rev->split_id( $_->[0] );
         $_->[1] = \@segments;
         for ( my $i = 0; $i <= $#segments; ++$i ) {
            my $l = length $segments[$i];
            $max_lengths[$i] = $l
               if ! defined $max_lengths[$i] || $l > $max_lengths[$i];
         }
      }

      # even segments are assumed to be numeric, odd to be alphabetic
      my $seg_num = 0;
      my $fmt = join "",
         map
            $seg_num++ % 2 ? "Z" . ( $_ + 1 ) : "N",
            @max_lengths;

      $_->[1] = pack $fmt, @{$_->[1]}
         for map {
            for ( my $seg_num = 0; $seg_num <= $#max_lengths; ++$seg_num ) {
               for ( $_->[1]->[$seg_num] ) {
                  $_ = $seg_num % 2 ? "\000" : 0
                     if empty $_ ;
               }
            }

            $_;
         } @ids;

      $rank = 0;
      $_->[1] = $rank++ for sort { $a->[1] cmp $b->[1] } @ids;
   }
}

=item pack_format

Returns the pack format for a field.  Only sortable fields are supported.

=cut

sub pack_format {
    "N";  ## All string fields are ranked as above.
}

=item index_value_expression

Returns an expression that, given "$_", returns the packable code for a field.
Only sortable fields are supported.

=cut

{
   my %ranked_fields = (
       NAME      => undef,
       COMMENT   => undef,
       REV_ID    => undef,
       CHANGE_ID => undef,
   );

   sub index_value_expression {
      my VCP::Rev $self = shift;
      my ( $field_name ) = @_;

      $field_name = uc $field_name;

      if ( $field_name eq "TIME" ) {
         return "(\$_->{TIME} || \$_->{SORT_TIME} || 0)";
      }
      if ( exists $ranked_fields{$field_name} ) {
         return "(\$_->{$field_name}->[1] || 0)";
      }
      return "(\$_->{$field_name} || 0)";
   }
}


## We never, ever want to delete a file that has revs referring to it.
## So, we put a cleanup object in %files_to_delete and manually manage a
## reference count on it.  The hash is keyed on filename and contains
## a count value.  When the count reaches 0, it is cleaned.  We add a warning
## about undeleted files, which is a great PITA.  The reason there's a
## warning is that we could be using gobs of disk space for temporary files
## if there's some bug preventing VCP::Rev objects from being DESTROYed
## soon enough.  It's a PITA because it means that the source and
## destination object really must be dereferenced ASAP, so their SEEN
## arrays get cleaned up, and every once in awhile I screw it up somehow.
my %files_to_delete ;

END {
   if ( debugging && ! $ENV{VCPNODELETE} ) {
      for ( sort keys %files_to_delete ) {
	 if ( -e $_ ) {
	    warn "$_ not deleted" ;
	 }
      }
   }
}


=item new

Creates an instance, see subclasses for options.

   my VCP::Rev $rev = VCP::Rev->new(
      name => 'foo',
      time => $commit_time,
      ...
   ) ;

=cut

sub new {
   my $class = shift ;
   $class = ref $class || $class ;

   my VCP::Rev $self ;

   {
      no strict 'refs' ;
      $self = bless [ \%{"$class\::FIELDS"} ], $class ;
   }

   while ( @_ ) {
      my $key = shift ;
      my $meth = lc $key;
      $meth eq "labels"
         ? $self->$meth( @{shift() || []} )
         : $self->$meth( shift );
   }

   $self->{LABELS} = {} unless $self->{LABELS} ;

   return $self ;
}


=item is_base_rev

Returns TRUE if this is a base revision.  This is the case if no action
is defined.  A base revision is a revision that is being transferred
merely to check it's contents against the destination repository's
contents. Base revisions contain no action and contain a <digest> but no
<delta> or <content>.

When a VCP::Dest::* receives a base revision, the actual body of the
revision is 'backfilled' from the destination repository and checked
against the digest.  This cuts down on transfer size, since the full
body of the file never need be sent with incremental updates.

See L<VCP::Dest/backfill> as well.

=cut

sub is_base_rev {
   my VCP::Rev $self = shift ;

   return ! defined $self->{ACTION} ;
}


=item is_placeholder_rev

Returns TRUE if this is a placeholder revision.  Placeholder revisions
are used to record branch points for files that have not been altered on
their branches.

This occurse when reading CVS repositories and finding files that have
branch tags but no revisions on the branch.

A placeholder revision has an action of "placeholder".

Note that placeholders may have rev_id and change_id fields, but they
may be malformed; they are present for sorting purposes only and should
be ignored by the destination repository.

Placeholders may not be present for branches which have files on them.

=cut

sub is_placeholder_rev {
   my VCP::Rev $self = shift ;

   return ( $self->{ACTION} || "" ) eq "placeholder" ;
}


sub previous {
   my VCP::Rev $self = shift;

   confess "too many parameters passed" if @_ > 1 ;
   if ( @_ ) {
      my $n = $self->{PREVIOUS} = shift;
      my %seen = ( int $self => undef );
      my @seen;
      while ( $n ) {
         push @seen, $n;
         confess "\$rev->previous_id loop detected:\n", map "   " . $_->as_string . "\n", @seen
            if exists $seen{int $n};
         $seen{int $n} = undef;
         $n = $n->previous;
      }
   }
   return $self->{PREVIOUS} ;
}


=item base_revify

Converts a "normal" rev in to a base rev.

=cut

sub base_revify {
   my VCP::Rev $self = shift ;

   $self->{$_} = undef for qw(
      P4_INFO
      CVS_INFO
      STATE
      TIME
      MOD_TIME
      USER_ID
      LABELS
      COMMENT
      ACTION
   );
}

=item id

Sets/gets the id.  Returns "$name#$rev_id" by default, which should work
for most systems.

=cut

sub id {
   my VCP::Rev $self = shift;

   $self->{ID} = shift if @_;

   return defined $self->{ID}
      ? $self->{ID}
      : $self->{NAME}->[0] . "#" . $self->{REV_ID}->[0];
}


=item work_path, dest_work_path

These set/get the name of the working file for sources and destinations,
respectively.  These files are automatically cleaned up when all VCP::Rev
instances that refer to them are DESTROYED or have their work_path or
dest_work_path set to other files or undef.

=cut

sub _set_work_path {
   my VCP::Rev $self = shift ;

   my ( $field, $fn ) = @_ ;
   my $doomed = $self->{$field} ;
   if ( defined $doomed
      && $files_to_delete{$doomed}
      && --$files_to_delete{$doomed} < 1
      && -e $doomed
   ) {
      if ( debugging $self ) {
         my @details ;
	 my $i = 2 ;
	 do { @details = caller($i++) } until $details[0] ne __PACKAGE__ ;
	 debug "vcp: $self unlinking '$doomed' in "
	    . join( '|', @details[0,1,2,3]) ;
      }
      unlink $doomed or warn "$! unlinking $doomed\n"
         unless $ENV{VCPNODELETE};
   }

   $self->{$field} = $fn ;
   ++$files_to_delete{$self->{$field}} if defined $self->{$field} ;
}


sub work_path {
   my VCP::Rev $self = shift ;
   confess "too many parameters passed" if @_ > 1 ;
   $self->_set_work_path( 'WORK_PATH', @_ ) if @_ ;
   return $self->{WORK_PATH} ;
}


sub dest_work_path {
   my VCP::Rev $self = shift ;
   confess "too many parameters passed" if @_ > 1 ;
   $self->_set_work_path( 'DEST_WORK_PATH', @_ ) if @_ ;
   return $self->{DEST_WORK_PATH} ;
}


=item labels

   $r->labels( @labels ) ;
   @labels = $r->labels ;

Sets/gets labels associated with a revision.  If a label is applied multiple
times, it will only be returned once.  This feature means that the automatic
label generation code for r_... revision and ch_... change labels won't add
additional copies of labels that were already applied to this revision in the
source repository.

Returns labels in an unpredictible order, which happens to be sorted for
now.  This sorting is purely for logging purposes and may disappear at
any moment.

=item add_label

  $r->add_label( $label ) ;
  $r->add_label( @labels ) ;

Marks one or more labels as being associated with this revision of a file.

=cut

sub add_label {
   my VCP::Rev $self = shift ;
   @{$self->{LABELS}}{@_} = (1) x @_ ;
   return ;
}

=item remove_label

   $r->remove_label( $l );

Removes a label from the rev if it exists.  Does nothing if it does not.

Returns TRUE if the label was removed, FALSE otherwise.

=cut

sub remove_label {
   my VCP::Rev $self = shift ;
   my $deleted;
   $deleted = delete $self->{LABELS}->{$_} || $deleted for @_;
   return $deleted ? 1 : 0;
}


sub _branch_id {
    my VCP::Rev $self = shift;

    for ( $self->branch_id ) {
        return "" if empty $_;
        return "($_)";
    }
}


sub _name_branch_id {
    my VCP::Rev $self = shift;

    $self->name . $self->_branch_id;
}

=item iso8601format

   VCP::Rev::iso8601format( $time );

Takes a seconds-since-the-epoch time value and converts it to
an ISO8601 formatted date.  Exportable:

   use VCP::Rev qw( iso8601format );

=cut

sub iso8601format {
   die "time parameter missing" unless @_;
   my @f = reverse( (gmtime shift)[0..5] ) ;
   $f[0] += 1900 ;
   $f[1] ++ ; ## Month of year needs to be 1..12
   return sprintf( "%04d-%02d-%02d %02d:%02d:%02dZ", @f ) ;
}


=item as_string

Prints out a string representation of the name, rev_id, change_id, type,
time, and a bit of the comment.  base revisions are flagged as such (and
don't have fields like time and comment).

=cut

sub as_string {
   my VCP::Rev $self = shift ;

   my @v = map(
      defined $_ ? $_ : "<undef>",
      $self->is_base_rev || $self->is_placeholder_rev
	 ? map $self->$_(), qw( name _branch_id rev_id change_id type )
	 : map(
	    $_ eq 'time' && defined $self->$_()
                ? iso8601format $self->$_()
	    : $_ eq 'comment' && defined $self->$_()
                ? do {
                   my $c = $self->$_();
                   $c =~ s/\n/\\n/g;
                   $c =~ s/\r/\\r/g;
                   $c =~ s/\t/\\t/g;
                   $c =~ s/\l/\\l/g;
                   $c = substr( $c, 0, 32 )
                      if length( $c ) > 32;
                   $c;
                }
	    : $_ eq 'action' && defined $self->$_()
                ? sprintf "%-6s", $self->$_() # 6 == length "delete"
                : $self->$_(),
	    qw(name _branch_id rev_id change_id type action time user_id comment )
	 )
   ) ;

   return
      $self->is_base_rev
         ? sprintf( qq{%s%s#%s @%s (%s) -- base rev --}, @v )
      : $self->is_placeholder_rev
         ? sprintf( qq{%s%s#%s @%s (%s) -- placeholder rev --}, @v )
         : sprintf( qq{%s%s#%s @%s (%s) %s %s %s "%s"}, @v ) ;
}

sub DESTROY {
   return if $ENV{VCPNODELETE};
   my VCP::Rev $self = shift ;
   my $doomed = $self->work_path ;
   $self->work_path( undef ) ;
   $self->dest_work_path( undef ) ;
   if ( defined $doomed && -e $doomed ) {
      debug "vcp: $self unlinking '$doomed'" if debugging $self ;
      unlink $doomed or warn "$! unlinking $doomed\n";
   }
}


=back

=head1 SUBCLASSING

This class uses the fields pragma, so you'll need to use base and 
possibly fields in any subclasses.

=head1 COPYRIGHT

Copyright 2000, Perforce Software, Inc.  All Rights Reserved.

This module and the VCP package are licensed according to the terms given in
the file LICENSE accompanying this distribution, a copy of which is included in
L<vcp>.

=head1 AUTHOR

Barrie Slaymaker <barries@slaysys.com>

=cut

1