package VCP::Rev; =head1 NAME VCP::Rev - VCP's concept of a revision =head1 SYNOPSIS use VCP::Rev; use VCP::Rev qw( iso8601format ); my $r = VCP::Rev->new; =head1 DESCRIPTION A data structure that represents a revision to a file (but, technically, not a version of a file, though the two are often synonymous). =head1 METHODS =over =cut $VERSION = 1 ; @EXPORT_OK = qw( iso8601format ); use Exporter (); *import = \&Exporter::import; *import = \&Exporter::import; use strict ; use Carp ; use VCP::Logger qw( lg pr BUG ); use VCP::Debug ':debug' ; use VCP::Utils 'empty' ; ## this tells new() how much to preallocate and how. my $pack_init; ## A string to copy in to $self->[0] ## this tells where to get each non-pack()ed field my %field_pos; my @fields; BEGIN { ## VCP::Revs are blessed arrays that contain a series of unpacked fields ## (the references to shared strings above) and a packed string. The ## accessors for the packed strings unpack as needed. The packing is ## to save overhead for "payload" fields that are not used for sorting. ## @fields = ( ## ## RevML fields and their types. ## s=string, the default ## i=integer ## _=build private accessors (prefixed with an "_") for a packed ## field; allows public wrappers around packed fields. ## @=it's an array (needed for serialization support) ## 'ID:_', ## A unique identifier for the rev 'NAME', ## The file name, relative to REV_ROOT 'SOURCE_NAME', ## immutable field, initialized to NAME 'SOURCE_FILEBRANCH_ID', ## immutable field, initialized to ## NAME or NAME for cvs 'SOURCE_REPO_ID', ## immutable field, initialized to ## : 'TYPE', ## Type. Binary/text. 'BRANCH_ID', ## What branch this revision is on 'SOURCE_BRANCH_ID', ## immutable field initialized to BRANCH_ID 'REV_ID', ## The source repositories unique ID for this revision 'SOURCE_REV_ID', ## immutable field initialized to REV_ID 'CHANGE_ID', ## The unique ID for the change set, if any 'SOURCE_CHANGE_ID', ## immutable field initialized to CHANGE_ID 'P4_INFO', ## p4-specific info. 'CVS_INFO', ## cvs-specific info. 'TIME:i', ## The commit/submit time, in seconds-since-the-epoch 'MOD_TIME:i', ## The last modification time, if available 'USER_ID', ## The submitter/commiter of the revision 'LABELS:_@', ## A bit vector of tags/labels assoc. with this rev. 'COMMENT', ## The comment/message for this rev. 'ACTION', ## What was done ('edit', 'move', 'delete', etc.) 'PREVIOUS_ID', ## The id of the preceding version 'SOURCE:_', ## A reference to the source so that the destination ## can get the file it needs right from the source. ## NOTE: it's up to callers to thunk this for ## serialization, not VCP::Rev. Some will want ## to undef() it, others will want to save ## and restore it. ); ## ## Compile the fields' accessors ## ## build a more accessible structure from the above: my $field_pos = 2; ## The index of the next non-packed field to be built ## [0] is for the packed string and [1] for the ## defined indicator. my $packed_field_pos = 0; ## The index of the next packed field to be built my @pack_format; my %call_count; END { lg "$_: $call_count{$_}\n" for sort keys %call_count; } my %fields = map { my $key = $_; my ( $name, $type ) = split /:/; my $is_private = $type ? $type =~ s/_// : undef; my $is_array = $type ? $type =~ s/\@// : undef; $type = "s" unless $type; my $pack_format = $type eq "s" ? "L1" : ## strings get turned in to uint32 indexes $type eq "i" ? "L1" : ## integers are int32. undef; ## Other formats may not be packed. push @pack_format, $pack_format if $pack_format; $field_pos{$name} = $field_pos unless $pack_format; ( $key => { NAME => $name, public_name => lc $name, name => ( $is_private ? "_" : "" ) . lc $name, set_name => ( $is_private ? "_" : "" ) . "set_" . lc $name, type => $type, pack_format => $pack_format, pack_pos => $pack_format ? $packed_field_pos++ : undef, pos => $pack_format ? undef : $field_pos++, } ); } @fields; my $pack_format = join " ", @pack_format; $pack_init = pack $pack_format, 0 x @pack_format; my @code; my @packed_fields; ## Many of the strings used in metadata are extremely redundant. So ## we store all strings as indexes in to @strings. my %strings; my @strings; $strings[0] = ""; $strings{""} = 0; for ( map $fields{$_}, @fields ) { push @packed_fields, $_; my $pack_pos = $_->{pack_pos}; my $name = $_->{name}; my $set_name = $_->{set_name}; if ( $_->{type} ne "s" ) { push @code, < 1; my \$self = shift; \$call_count{$name}++; return undef unless vec \$self->[1], $pack_pos, 1; return unpack "L1", substr \$self->[0], $pack_pos * 4, 4; } #line 1 VCP::Rev::$set_name() sub $set_name { my \$self = shift; \$call_count{$set_name}++; my \$is_defined = defined \$_[0]; vec( \$self->[1], $pack_pos, 1 ) = \$is_defined; substr( \$self->[0], $pack_pos * 4, 4 ) = pack "L1", \$is_defined ? shift : 0; Carp::cluck "$set_name called in non-void context" if defined wantarray; } ACCESSOR } else { ## It's a string field push @code, < 1; my \$self = shift; \$call_count{$name}++; return undef unless vec \$self->[1], $pack_pos, 1; return \$strings[ unpack "L1", substr \$self->[0], $pack_pos * 4, 4 ]; } #line 1 VCP::Rev::$set_name() sub $set_name { my \$self = shift; \$call_count{$set_name}++; my \$v = shift; ## Don't copy. my \$is_defined = defined \$v; \$v = "" unless defined \$v; vec( \$self->[1], $pack_pos, 1 ) = \$is_defined; substr( \$self->[0], $pack_pos * 4, 4 ) = pack "L1", exists \$strings{\$v} ? \$strings{\$v} : do { push \@strings, \$v; \$strings{\$v} = \$#strings; }; Carp::cluck "$set_name called in non-void context" if defined wantarray; } ACCESSOR } } ## ## These fields have special set_...() wrappers that should be called on ## startup. $_->{force_set} = 1 for grep $_->{public_name} =~ m{\A( labels )\z}x, values %fields; push( @code, <{public_name}; $_->{force_set} ? " 0, ## $name\n" : ( " defined \$h{$name}\n", $_->{type} eq "s" ? <{public_name}} ? 1 : 0,\n", @packed_fields), <set_$_( \$h{$_} ) if defined \$h{$_};\n", map $_->{public_name}, grep defined $_->{pos} || $_->{force_set}, map $fields{$_}, @fields ), <action || "" ) eq "branch"; return \$self; } FAST_NEW_END ); push( @code, <{public_name} eq "labels" ? " labels => [ \$self->labels ],\n" : " $_->{public_name} => \$self->$_->{public_name},\n", sort values %fields ), < $index_in_labels_array sub set_labels { my VCP::Rev $self = shift ; my ( $labels ) = @_; die "empty label" if grep ! length, @$labels; push( @labels, $_ ), $labels{$_} = $#labels for grep ! exists $labels{$_}, @$labels; my %seen; $self->_set_labels( pack "L*", grep !$seen{$_}++, map $labels{$_}, @$labels ); } =item fields Returns a list of field names, with "@" prepended to any array fields. =cut sub fields { return map { my $name = lc $_; my $is_array = /\@/; $name =~ s/:.*//; $is_array ? "\@$name" : $name; } @fields; } =item serialize Converts the revision metadata to a set of "name=value" strings suitable for emitting to a flat file for later recovery. Names are included so that new revisions of VCP can rescuscitate revisions. =cut sub serialize { my VCP::Rev $self = shift; return map { my $name = lc $_; my $is_array = /\@/; $name =~ s/:.*//; my $getter = $name eq "source" ? "_source" : $name; my @v = $self->$getter(); @v && defined $v[0] ? $name . ( $is_array ? "@" . join ",", map { my $v = $_; $v =~ s/\\/\\\\/g; $v =~ s/,/\\-/g; $v; } @v : "=" . $v[0] ) : (); } sort @fields; } sub deserialize { my $class = shift; my $r = VCP::Rev->new; for ( @_ ) { my ( $name, $type, $value ) = /\A(\w+)([@=])(.*)\z/s or BUG "can't deserialize '$_'"; my $setter = $name eq "source" ? "_set_source" : "set_$name"; if ( $type eq "=" ) { BUG "unknown VCP::Rev field '$name'" unless $r->can( $setter ); $r->$setter( $value ); } else { my @values = map { s{\\\\}{\\}g; s{\\-}{,}g; $_; } split /,/, $value; $r->$setter( \@values ); } } return $r; } sub labels { Carp::confess "call set_labels instead!" if @_ > 1; my VCP::Rev $self = shift ; my $l = $self->_labels; return if empty $l; return sort map $labels[$_], unpack "L*", $l; } sub split_name { shift; local $_ = $_[0]; return () unless defined ; return ( "" ) unless length ; s{\A[\\/]+}{}; s{[\\/]+\z}{}; return split qr{[\\/]+}; } sub cmp_name { my $self = shift; Carp::confess unless UNIVERSAL::isa( $self, __PACKAGE__ ); my @a = ref $_[0] ? @{$_[0]} : $self->split_name( $_[0] ); my @b = ref $_[1] ? @{$_[1]} : $self->split_name( $_[1] ); my $r = 0; $r = shift( @a ) cmp shift( @b ) while ! $r && @a && @b; $r || @a <=> @b; } =item split_id VCP::Rev->split_id( $id ); Splits an id in to chunks on punctuation and number/letter boundaries. Id Result == ====== 1 ( 1 ) 1a ( 1, "a" ) 1.2 ( 1, "", 2 ) 1a.2 ( 1, "a", 2 ) This oddness is to facilitate manually named revisions that use a lettering scheme. Note that the sort algorithms make an assumption that "1.0a" is after "1.0". This prevents kind of naming like "1.2pre1". =cut sub split_id { shift; for ( $_[0] ) { return () unless defined ; return ( "" ) unless length ; my @r = map /(\d*)(\D*)/, split /[^[:alnum:]]+/; pop @r while @r && ! length $r[-1]; return @r; } } =item join_id VCP::Rev->join_id( @id ); Joins an id's chunks back to being an id in dotted format. =cut sub join_id { shift; my @in = ref $_[0] ? @{shift()} : @_; my @out; while ( @in ) { my $num = shift @in; $num .= shift @in if @in; push @out, $num; } return join ".", @out; } =item cmp_id VCP::Rev->cmp_id( $id1, $id2 ); VCP::Rev->cmp_id( \@id1, \@id2 ); # for presplit ids splits $id1 and $id2 if necessary and compares them using C<< <=> >> on even numbered elements and C on odd numbered elements. =cut sub cmp_id { my $self = shift; Carp::confess unless UNIVERSAL::isa( $self, __PACKAGE__ ); my @a = ref $_[0] ? @{$_[0]} : $self->split_id( $_[0] ); my @b = ref $_[1] ? @{$_[1]} : $self->split_id( $_[1] ); my ( $A, $B, $r ); while ( 1 ) { last unless @a && @b; ( $A, $B ) = ( shift @a, shift @b ); confess "\$A='$A' not numeric" unless $A =~ /\A\d+\z/; confess "\$B='$B' not numeric" unless $B =~ /\A\d+\z/; $r = $A <=> $B; return $r if $r; last unless @a && @b; ( $A, $B ) = ( shift @a, shift @b ); $r = $A cmp $B; return $r if $r; } return @a <=> @b; } =item new Creates an instance, see subclasses for options. my VCP::Rev $rev = VCP::Rev->new( name => 'foo', time => $commit_time, ... ) ; =cut ## Autogenerated =item is_base_rev Returns TRUE if this is a base revision. This is the case if no action is defined. A base revision is a revision that is being transferred merely to check it's contents against the destination repository's contents. Base revisions contain no action and contain a but no or . When a VCP::Dest::* receives a base revision, the actual body of the revision is 'backfilled' from the destination repository and checked against the digest. This cuts down on transfer size, since the full body of the file never need be sent with incremental updates. See L as well. =cut sub is_base_rev { my VCP::Rev $self = shift ; return ! defined $self->action; } =item is_placeholder_rev Returns TRUE if this is a placeholder revision. Placeholder revisions are used to record branch points for files that have not been altered on their branches. This occurse when reading CVS repositories and finding files that have branch tags but no revisions on the branch. A placeholder revision has an action of "placeholder". Note that placeholders may have rev_id and change_id fields, but they may be malformed; they are present for sorting purposes only and should be ignored by the destination repository. Placeholders may not be present for branches which have files on them. =cut sub is_placeholder_rev { my VCP::Rev $self = shift ; my $a = $self->action; return defined $a && $a eq "placeholder" ; } =item base_revify Converts a "normal" rev in to a base rev. =cut sub base_revify { my VCP::Rev $self = shift ; $self->set_labels; $self->$_( undef ) for qw( p4_info cvs_info time mod_time user_id comment action ); } =item id Sets/gets the id. Returns "$name#$rev_id" by default, which should work for most systems. =cut sub id { goto &_set_id if @_ > 1; my VCP::Rev $self = shift; my $id = $self->_id; return $id if defined $id; my $n = $self->name; my $r = $self->source_rev_id; BUG "undefined name: ", $self->as_string unless defined $n; BUG "empty name: ", $self->as_string unless length $n; BUG "undefined source_rev_id: ", $self->as_string unless defined $r; BUG "empty source_rev_id: ", $self->as_string unless length $r; return "$n#$r"; } sub set_id { goto &_set_id; } ## We maintain a reference to the sources and pack the index. This allows ## for recoverable serialization (as changesets.pm uses), but may hamper ## storage between instantiations (as VCP::Dest::metadb does). my %sources; sub source { my VCP::Rev $self = shift; goto \&set_source if @_; return $sources{$self->_source}; } sub set_source { my VCP::Rev $self = shift; my ( $new_source ) = @_; BUG "source must be an object" if defined $new_source && !ref $new_source; if ( defined $new_source ) { my $key = int $new_source; $sources{$key} ||= $new_source; $self->_source( $key ); } else { $self->_source( $new_source ); } } =item get_source_file Fetches the file from the source repository and returns a path to that file. =cut sub get_source_file { my VCP::Rev $self = shift; die "source() not set for ", $self->as_string, "\n" unless $self->source; $self->source->get_source_file( $self ); } =item labels $r->set_labels( \@labels ) ; ## pass an array ref for speed @labels = $r->labels ; Sets/gets labels associated with a revision. If a label is applied multiple times, it will only be returned once. This feature means that the automatic label generation code for r_... revision and ch_... change labels won't add additional copies of labels that were already applied to this revision in the source repository. Returns labels in an unpredictible order, which happens to be sorted for now. This sorting is purely for logging purposes and may disappear at any moment. =item add_label $r->add_label( $label ) ; $r->add_label( @labels ) ; Marks one or more labels as being associated with this revision of a file. =cut sub add_label { my VCP::Rev $self = shift ; $self->set_labels( [ $self->labels, @_ ] ); return ; } sub _branch_id { my VCP::Rev $self = shift; for ( $self->branch_id ) { return "" if empty $_; return "<$_>"; } } sub _name_branch_id { my VCP::Rev $self = shift; $self->name . $self->_branch_id; } =item iso8601format VCP::Rev::iso8601format( $time ); Takes a seconds-since-the-epoch time value and converts it to an ISO8601 formatted date. Exportable: use VCP::Rev qw( iso8601format ); =cut sub iso8601format { die "time parameter missing" unless @_; my @f = reverse( (gmtime shift)[0..5] ) ; $f[0] += 1900 ; $f[1] ++ ; ## Month of year needs to be 1..12 return sprintf( "%04d-%02d-%02d %02d:%02d:%02dZ", @f ) ; } =item as_string Prints out a string representation of the name, rev_id, change_id, type, time, and a bit of the comment. base revisions are flagged as such (and don't have fields like time and comment). =cut sub as_string { my VCP::Rev $self = shift ; my @v = map( defined $_ ? $_ : "", map( $_ eq 'time' && defined $self->$_() ? iso8601format $self->$_() : $_ eq 'comment' && defined $self->$_() ? do { my $c = $self->$_(); $c =~ s/\n/\\n/g; $c =~ s/\r/\\r/g; $c =~ s/\t/\\t/g; $c =~ s/\f/\\f/g; $c =~ s/([^\020-\177])/sprintf "\\%03o", ord $1/eg; $c = substr( $c, 0, 32 ) if length( $c ) > 32; $c; } : $_ eq 'action' && defined $self->$_() ? sprintf "%-6s", $self->$_() # 6 == length "delete" : $self->$_(), ( qw( name rev_id change_id branch_id type ), $self->is_base_rev || $self->is_placeholder_rev ? qw( time user_id ) : qw( action time user_id comment ) ) ) ) ; return $self->is_base_rev ? sprintf( qq{%s#%s @%s <%s> (%s) -- base rev --}, @v ) : $self->is_placeholder_rev ? sprintf( qq{%s#%s @%s <%s> (%s) %s -- %s -- placeholder rev --}, @v ) : sprintf( qq{%s#%s @%s <%s> (%s) %s %s %s "%s"}, @v ) ; } =back =head1 SUBCLASSING This class uses the fields pragma, so you'll need to use base and possibly fields in any subclasses. =head1 COPYRIGHT Copyright 2000, Perforce Software, Inc. All Rights Reserved. This module and the VCP package are licensed according to the terms given in the file LICENSE accompanying this distribution, a copy of which is included in L. =head1 AUTHOR Barrie Slaymaker =cut 1