package VCP::Dest ; =head1 NAME VCP::Dest - A base class for VCP destinations =head1 SYNOPSIS =head1 DESCRIPTION =head1 EXTERNAL METHODS =over =for test_scripts t/01sort.t =cut use strict ; use Carp ; use File::Spec ; use File::Spec::Unix ; use UNIVERSAL qw( isa ) ; use VCP::Revs ; use VCP::Debug qw(:debug) ; use VCP::Utils qw( start_dir escape_filename empty ); use vars qw( $VERSION $debug ) ; $VERSION = 0.1 ; $debug = 0 ; use base 'VCP::Plugin' ; use fields ( 'DEST_HEADER', ## Holds header info until first rev is seen. 'DEST_SORT_KEYS', ## HASH of sort keys, indexed by name and rev. 'DEST_COMMENT_TIMES', ## The average time of all instances of a comment 'DEST_DEFAULT_COMMENT', ## The comment to use when a comment is undefined ## This is used when presorting/merging so ## that comment will still be used to ## compare when selecting the next rev to ## merge, otherwise it would be removed as ## a sporadic field. 'DEST_HEAD_REVS', ## Map of head revision on each branch of each file 'DEST_REV_MAP', ## Map of source rev id to destination file & rev 'DEST_MAIN_BRANCH_ID', ## Container of main branch_id for each file 'DEST_FILES', ## Map of files->state, for CVS' sake 'DEST_DB_DIR', ## Directory name in which to store the transfer ## state databases ) ; use VCP::Revs ; =item new Creates an instance, see subclasses for options. The options passed are usually native command-line options for the underlying repository's client. These are usually parsed and, perhaps, checked for validity by calling the underlying command line. =cut sub new { my $class = shift ; $class = ref $class || $class ; my VCP::Dest $self = $class->SUPER::new( @_ ) ; ## rev_id is here in case the change id isn't, ## name is here for VSS deletes, which have no other data. return $self ; } =back ############################################################################### =head1 SUBCLASSING This class uses the fields pragma, so you'll need to use base and possibly fields in any subclasses. =head2 SUBCLASS API These methods are intended to support subclasses. =over =item parse_options $self->parse_options( \@options, @specs ); Parses common options. =cut sub parse_options { my VCP::Dest $self = shift; $self->SUPER::parse_options( @_, "db-dir=s" => sub { $self->db_dir( $_[1] ) }, ); if( ! empty $self->db_dir && empty $self->repo_id ) { warn "--repo-id required if --db-dir present\n"; $self->usage_and_exit ; } } =item digest $self->digest( "/tmp/readers" ) ; Returns the Base64 MD5 digest of the named file. Used to compare a base rev (which is the revision *before* the first one we want to transfer) of a file from the source repo to the existing head rev of a dest repo. The Base64 version is returned because that's what RevML uses and we might want to cross-check with a .revml file when debugging. =cut sub digest { shift ; ## selfless little bugger, isn't it? my ( $path ) = @_ ; require Digest::MD5 ; my $d= Digest::MD5->new ; open DEST_P4_F, "<$path" or die "$!: $path" ; $d->addfile( \*DEST_P4_F ) ; my $digest = $d->b64digest ; close DEST_P4_F ; return $digest ; } =item compare_base_revs $self->compare_base_revs( $rev ) ; Checks out the indicated revision fromt the destination repository and compares it (using digest()) to the file from the source repository (as indicated by $rev->work_path). Dies with an error message if the base revisions do not match. Calls $self->checkout_file( $rev ), which the subclass must implement. =cut sub compare_base_revs { my VCP::Dest $self = shift ; my ( $rev ) = @_ ; ## This block should only be run when transferring an incremental rev. ## from a "real" repo. If it's from a .revml file, the backfill will ## already be done for us. ## Grab it and see if it's the same... my $source_digest = $self->digest( $rev->work_path ) ; my $backfilled_path = $self->checkout_file( $rev ); my $dest_digest = $self->digest( $backfilled_path ); die( "vcp: base revision\n", $rev->as_string, "\n", "differs from the last version in the destination p4 repository.\n", " source digest: $source_digest (in ", $rev->work_path, ")\n", " dest. digest: $dest_digest (in ", $backfilled_path, ")\n" ) unless $source_digest eq $dest_digest ; } =item header Gets/sets the $header passed to handle_header(). Generally not overridden: all error checking is done in new(), and no output should be generated until output() is called. =cut sub header { my VCP::Dest $self = shift ; $self->{DEST_HEADER} = shift if @_ ; return $self->{DEST_HEADER} ; } =item db_dir Set or return the directory name where the transfer state databases are stored. This is the directory to store the state information for this transfer in. This includes the mapping of source repository versions (name+rev_id, usually) to destination repository versions and the status of the last transfer, so that incremental transfers may restart where they left off. =cut sub db_dir { my VCP::Dest $self = shift ; $self->{DEST_DB_DIR} = shift if @_; return $self->{DEST_DB_DIR}; } =item _db_store_location Determine the location to store the transfer state databases. Uses the absolute path provided by the --db-dir option if present, else use directory 'vcp_state' in the directory the program was started in. The file name is an escaped repo_id. =cut sub _db_store_location { my VCP::Dest $self = shift ; my $loc = $self->db_dir; $loc = ( empty $loc ) ? File::Spec->catdir( start_dir, "vcp_state" ) : File::Spec::Unix->rel2abs( $loc ) ; return File::Spec->catfile( $loc, escape_filename $self->repo_id ); } =item rev_map Set or return a reference to the RevMapDB in use. =cut sub rev_map { my VCP::Dest $self = shift ; $self->{DEST_REV_MAP} ||= do { require VCP::RevMapDB; VCP::RevMapDB->new( StoreLoc => $self->_db_store_location, ); }; } =item head_revs Set or return a reference to the HeadRevsDB in use. =cut sub head_revs { my VCP::Dest $self = shift ; $self->{DEST_HEAD_REVS} ||= do { require VCP::HeadRevsDB; $self->{DEST_HEAD_REVS} = VCP::HeadRevsDB->new( StoreLoc => $self->_db_store_location, ); }; } =item main_branch_id Set or return a reference to the MainBranchIdDB in use. =cut sub main_branch_id { my VCP::Dest $self = shift; $self->{DEST_MAIN_BRANCH_ID} ||= do { require VCP::MainBranchIdDB; $self->{DEST_MAIN_BRANCH_ID} = VCP::MainBranchIdDB->new( StoreLoc => $self->_db_store_location, ); }; } =item files Set or return a reference to the HeadRevsDB in use. =cut sub files { my VCP::Dest $self = shift ; $self->{DEST_FILES} ||= do { require VCP::FilesDB; $self->{DEST_FILES} = VCP::FilesDB->new( StoreLoc => $self->_db_store_location, ); } } =back =head2 SUBCLASS OVERLOADS These methods are overloaded by subclasses. =over =item backfill $dest->backfill( $rev ) ; Checks the file indicated by VCP::Rev $rev out of the target repository if this destination supports backfilling. Currently, only the revml destination does not support backfilling. The $rev->workpath must be set to the filename the backfill was put in. This is used when doing an incremental update, where the first revision of a file in the update is encoded as a delta from the prior version. A digest of the prior version is sent along before the first version delta to verify it's presence in the database. So, the source calls backfill(), which returns TRUE on success, FALSE if the destination doesn't support backfilling, and dies if there's an error in procuring the right revision. If FALSE is returned, then the revisions will be sent through with no working path, but will have a delta record. MUST BE OVERRIDDEN. =cut sub backfill { my VCP::Dest $self = shift ; my ( $r ) = @_; die ref( $self ) . "::checkout_file() not found for ", $r->as_string, "\n" unless $self->can( "checkout_file" ); my $work_path = $self->checkout_file( $r ); link $work_path, $r->work_path or die "$! linking $work_path to ", $r->work_path; unlink $work_path or die "$! unlinking $work_path"; } =item handle_footer $dest->handle_footer( $footer ) ; Does any cleanup necessary. Not required. Don't call this from the override. =cut sub handle_footer { my VCP::Dest $self = shift ; return ; } =item handle_header $dest->handle_header( $header ) ; Stows $header in $self->header. This should only rarely be overridden, since the first call to handle_rev() should output any header info. =cut sub handle_header { my VCP::Dest $self = shift ; my ( $header ) = @_ ; $self->header( $header ) ; return ; } =item handle_rev $dest->handle_rev( $rev ) ; Outputs the item referred to by VCP::Rev $rev. If this is the first call, then $self->none_seen will be TRUE and any preamble should be emitted. MUST BE OVERRIDDEN. Don't call this from the override. =cut sub handle_rev { my VCP::Dest $self = shift ; die ref( $self ) . "::handle_rev() not found, Oops.\n" ; } =back =head2 Sorting =over =cut sub _compile_sort_rec_bulk_indexer { my ( $rev, $spec ) = @_ ; my $code = join "", q[sub { my $revs = shift; my $r; for my $sr ( @$revs ) { $r = $sr->[0]; $sr->[1] = pack '], map( $rev->pack_format( $_ ), @$spec ), q[', ], join( ", ", map $rev->index_value_expression( $_ ), @$spec ), q[}}]; debug caller, $code if debugging __PACKAGE__; return ( eval $code or die $@ ); } =item sort_revs $source->dest->sort_revs( $source->revs ) ; This orders the revs in to change order either using the change_ids in the revisions or by working from the oldest revs to the newest in generations by picking off the revisions in each generation that comprise a change. "generation" means all revs that could have changed together without parent/child conflict; it's a loose metaphor but should give you some idea of how the change aggregation works. =cut ## TODO: see if a loop that works on all the records when the default ## sort ## key is used is much faster, I suspect it will be. sub _calc_sort_recs { my VCP::Dest $self = shift ; my ( $sort_recs, $spec ) = @_; return unless @$sort_recs; debug "vcp sort key: ", join ", ", @$spec if debugging "sort" ; if ( grep /avg_comment_time/, @$spec ) { $self->{DEST_COMMENT_TIMES} = {}; for ( @$sort_recs ) { my $r = $_->[0]; my $comment = defined $r->comment ? $r->comment : $r->is_base_rev ? "" : undef; my $time = defined $r->sort_time ? $r->sort_time : $r->is_base_rev ? 0 : undef; next unless defined $comment && defined $time; push @{$self->{DEST_COMMENT_TIMES}->{$comment}}, $time; } for ( values %{$self->{DEST_COMMENT_TIMES}} ) { next unless @$_; my $sum; $sum += $_ for @$_; $_ = $sum / @$_; } } my $indexer = _compile_sort_rec_bulk_indexer( $sort_recs->[0]->[0], $spec ); $indexer->( $sort_recs ); } sub _eq($$) { defined $_[0] && defined $_[1] ? $_[0] eq $_[1] : ! defined( $_[0] ) && ! defined( $_[1] ); } sub _in_same_change { my ( $ra, $rb ) = @_; return $ra->change_id eq $rb->change_id unless empty $ra->change_id || empty $rb->change_id; return _eq( $ra->user_id, $rb->user_id ) && _eq( $ra->comment, $rb->comment ) && ( ( ! $ra->is_placeholder_rev && ! $rb->is_placeholder_rev ) || ( $ra->is_placeholder_rev && $rb->is_placeholder_rev && _eq $ra->branch_id, $rb->branch_id ) ); ## placeholders must be on same branch; regular revs don't ## (we assume comments will break those more readily); ## placeholders and non-placeholders should not be in same ## change. } ## The "normal" way of VCP conversions is to create branches in ## one change and then make the first edits on another change. ## In this situation, the change number aggregation here should ## be the proper way to group revs in to changes, otherwise it's ## up to the subclass. ## For testing purposes, and possibly for real world use, we also ## allow the branch and the edit to occur in the same change. sub add_in_change_numbers { return 1; } sub sort_revs { my VCP::Dest $self = shift ; my ( $revs ) = @_ ; ## Use the ->previous references to find the roots and then ## reorder the revs by growing up from the roots. my %rev_kids; my @roots; my @sort_recs; debug "creating revision trees and indexing them" if debugging $self; my $has_change_ids; for my $r ( $revs->get ) { $has_change_ids ||= ! empty $r->change_id; ## the undef is so the sort keys can be filled in later ## without needing to increase the memory size. my $sort_rec = [ $r, undef ]; push @sort_recs, $sort_rec; if ( $r->previous ) { push @{$rev_kids{int $r->previous}}, $sort_rec; } else { push @roots, $sort_rec; } } $self->{DEST_DEFAULT_COMMENT} = ""; debug "generating index" if debugging $self; my @spec = qw( change_id time user_id comment branch_id name ); VCP::Rev::preindex(); $self->_calc_sort_recs( \@sort_recs, \@spec ); $self->{DEST_DEFAULT_COMMENT} = undef; warn "vcp: aggregating changes.\n"; my @result; @roots = sort { $a->[1] cmp $b->[1] } @roots; my $change_number = 0; while ( @roots ) { ++$change_number; my @change; my @kids; ## Extract one change and then add in all children of the ## extracted revisions. do { my ( $r, undef ) = @{shift @roots}; ## discard sort key push @change, $r; my $kids = delete $rev_kids{int $r}; push @kids, @$kids if $kids; } while ( @roots && ( $has_change_ids ? $change[0]->change_id eq $roots[0]->[0]->change_id : _in_same_change $change[0], $roots[0]->[0] ) ); debug "...change $change_number: " . @change . " revs" if debugging $self; if ( $self->add_in_change_numbers && ! $has_change_ids ) { $_->change_id( $change_number ) for @change; } push @result, @change; if ( @kids ) { ## This is the slow but guaranteed perfect way to sort: #@roots = sort { $a->[1] cmp $b->[1] } @roots, @kids; #next; ## This is the faster but more comples production sorting. ## It's a merge sort with some common cases short circuited ## out. @kids = sort { $a->[1] cmp $b->[1] } @kids if @kids > 1; if ( @roots ) { if ( $kids[0]->[1] ge $roots[-1]->[1] ) { push @roots, @kids; } elsif ( $kids[-1]->[1] le $roots[0]->[1] ) { unshift @roots, @kids; } else { my @result; ## 5 is just a guess. if ( @roots > 5 ) { ## Find the first root that is greater than the first ## kid and splice all preceding roots out. my $i = 0; my $k = $kids[0]->[1]; ++$i while $i <= $#roots && $k ge $roots[$i]->[1]; @result = splice @roots, 0, $i; } ## This is the slowest bit. while ( @roots && @kids ) { my $w = $roots[0]->[1] cmp $kids[0]->[1]; if ( $w < 0 ) { push @result, shift @roots } elsif ( $w > 0 ) { push @result, shift @kids } else { push @result, shift @roots, shift @kids } } @roots = ( @result, @roots, @kids ); } } else { @roots = @kids; } } } warn "vcp: ", $change_number, " changes found", $change_number ? sprintf " (%.2f mean revs/change)", $revs->get / $change_number : (), "\n"; $revs->set( @result ); } =item last_rev_in_filebranch my $rev_id = $dest->last_rev_in_filebranch( $source_repo_id, $source_filebranch_id ); Returns the last revision for the file and branch indicated by $source_filebranch_id. This is used to support --continue. Returns undef if not found. =cut sub last_rev_in_filebranch { my VCP::Dest $self = shift; return 0 unless defined $self->{DEST_HEAD_REVS}; return ($self->head_revs->get( \@_ ))[0]; } =item metadata_only This returns false by default, but the experimental branch_diagram destination requires only metadata. A source should look at this before going to the effort of checking out each file. =cut sub metadata_only { 0 } =back =head1 NOTES Several fields are jury rigged for "base revisions": these are fake revisions used to start off incremental, non-bootstrap transfers with the MD5 digest of the version that must be the last version in the target repository. Since these are "faked", they don't contain comments or timestamps, so the comment and timestamp fields are treated as "" and 0 by the sort routines. There is a special sortkey C<avg_comment_time> that allows revisions within the same time period (second, minute, day) to be sorted according to the average time of the comment for the revision (across all revisions with that comment). This causes changes that span more than one time period to still be grouped properly. =cut =head1 COPYRIGHT Copyright 2000, Perforce Software, Inc. All Rights Reserved. This module and the VCP package are licensed according to the terms given in the file LICENSE accompanying this distribution, a copy of which is included in L<vcp>. =head1 AUTHOR Barrie Slaymaker <barries@slaysys.com> =cut 1
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#57 | 4497 | Barrie Slaymaker |
- --rev-root documented - All destinations handle rev_root defaulting now |
||
#56 | 4487 | Barrie Slaymaker | - dead code removal (thanks to clkao's coverage report) | ||
#55 | 4483 | Barrie Slaymaker | - calls to skip_rev() are summarized to STDOUT | ||
#54 | 4021 | Barrie Slaymaker |
- Remove all phashes and all base & fields pragmas - Work around SWASHGET error |
||
#53 | 3855 | Barrie Slaymaker |
- vcp scan, filter, transfer basically functional - Need more work in re: storage format, etc, but functional |
||
#52 | 3850 | Barrie Slaymaker | - No longer stores all revs in memory | ||
#51 | 3809 | Barrie Slaymaker | - compare_base_revs() now always called with 2 parameters | ||
#50 | 3805 | Barrie Slaymaker | - VCP::Revs::fetch_files() removed | ||
#49 | 3802 | Barrie Slaymaker | - tweak whitespace | ||
#48 | 3800 | Barrie Slaymaker | - <branches> removed from all code | ||
#47 | 3769 | Barrie Slaymaker | - avg_comment_time sort key removed | ||
#46 | 3706 | Barrie Slaymaker | - VCP gives some indication of output progress (need more) | ||
#45 | 3460 | Barrie Slaymaker |
- Revamp Plugin/Source/Dest hierarchy to allow for reguritating options in to .vcp files |
||
#44 | 3429 | Barrie Slaymaker |
- Refactor db_location() into VCP::Plugin so VCP::Source::vss will be able to use it. |
||
#43 | 3409 | Barrie Slaymaker | - Minor doc and code readability improvements | ||
#42 | 3155 | Barrie Slaymaker |
Convert to logging using VCP::Logger to reduce stdout/err spew. Simplify & speed up debugging quite a bit. Provide more verbose information in logs. Print to STDERR progress reports to keep users from wondering what's going on. Breaks test; halfway through upgrading run3() to an inline function for speed and for VCP specific features. |
||
#41 | 3133 | Barrie Slaymaker |
Make destinations call back to sources to check out files to simplify the architecture (is_metadata_only() no longer needed) and make it more optimizable (checkouts can be batched). |
||
#40 | 3129 | Barrie Slaymaker |
Stop calling the slow Cwd::cwd so much, use start_dir instead. |
||
#39 | 3120 | Barrie Slaymaker | Move changeset aggregation in to its own filter. | ||
#38 | 3115 | Barrie Slaymaker |
Move sorting function to the new VCP::Filter::sort; it's for testing and reporting only and the code was bloating VCP::Dest and limiting VCP::Rev and VCP::Dest optimizations. Breaks test suite in minor way. |
||
#37 | 3096 | Barrie Slaymaker | Tuning | ||
#36 | 3087 | Barrie Slaymaker | Improve diagnostics | ||
#35 | 3084 | Barrie Slaymaker | Minor improvement to reporting. | ||
#34 | 3077 | Barrie Slaymaker | remove debugging output | ||
#33 | 3076 | Barrie Slaymaker | Improve change aggregation | ||
#32 | 3059 | Barrie Slaymaker | Minor cleanup of warning about undefined variable usage | ||
#31 | 3046 | Barrie Slaymaker | Fix revision sorting | ||
#30 | 3008 | John Fetkovich |
make state database files go under vcp_state in the program start directory (start_dir) instead of start_dir itself. Also escape periods (.) from the database directory as well as the characters already escaped. |
||
#29 | 2959 | John Fetkovich |
added dump method to lib/VCP/DB_File/sdbm.pm to dump keys => values from a sdbm file. removed similar code from bin/dump_head_revs, bin/dump_rev_map and bin/dump_main_branch_id and called this method instead. also made parse_files_and_revids_from_head_revs_db sub in TestUtils to use in test suites instead of parse_files_and_revids_from_p4_files et. al. |
||
#28 | 2928 | John Fetkovich |
Added empty sub to VCP::Utils.pm to check for empty or undefined strings. Added a couple of calls to it in Dest.pm. |
||
#27 | 2926 | John Fetkovich |
remove --state-location switch add --db-dir and --repo-id switches build state location from concatenation of those two. |
||
#26 | 2899 | Barrie Slaymaker |
Implement a natural sort that organizes the revs in to trees and then builts the submittal order by poping the first root off the trees and then sorting any child revs in to the roots list. |
||
#25 | 2873 | Barrie Slaymaker | Add MainBranchIdDB and a dump util. | ||
#24 | 2808 | Barrie Slaymaker | Pass source_repo_id in to last_rev_in_filebranch | ||
#23 | 2800 | Barrie Slaymaker | Get --continue working in cvs->foo transfers. | ||
#22 | 2725 | Barrie Slaymaker | Start using HeadRevs.pm. | ||
#21 | 2720 | Barrie Slaymaker | Factor RevMapDB code up in to VCP::Dest. | ||
#20 | 2713 | Barrie Slaymaker | Factor RevMapDB management up in to VCP::Dest | ||
#19 | 2330 | Barrie Slaymaker | Silence warnings in corner condition of transferring one file. | ||
#18 | 2324 | Barrie Slaymaker |
Take branch_id in to account in presort stage so that branched files with the same name get treated as independant files. |
||
#17 | 2241 | Barrie Slaymaker | RCS file scanning improvements, implement some of -r | ||
#16 | 2235 | Barrie Slaymaker | Debugging cvs speed reader. | ||
#15 | 2233 | Barrie Slaymaker | debug | ||
#14 | 2232 | Barrie Slaymaker | Major memory and sort speed enhancements. | ||
#13 | 2228 | Barrie Slaymaker | working checkin | ||
#12 | 2198 | Barrie Slaymaker | Minor bugfix for single file mode. | ||
#11 | 2154 | Barrie Slaymaker | Speed up sorting | ||
#10 | 2042 | Barrie Slaymaker | Basic source::p4 branching support | ||
#9 | 2009 | Barrie Slaymaker |
lots of fixes, improve core support for branches and VCP::Source::cvs now supports branches. |
||
#8 | 1855 | Barrie Slaymaker |
Major VSS checkin. Works on Win32 |
||
#7 | 1822 | Barrie Slaymaker |
Get all other tests passing but VSS. Add agvcommenttime sort field. |
||
#6 | 1809 | Barrie Slaymaker | VCP::Patch should ignore lineends | ||
#5 | 1055 | Barrie Slaymaker |
add sorting, revamp test suite, misc cleanup. Dest/revml is not portable off my system yet (need to release ...::Diff) |
||
#4 | 827 | Barrie Slaymaker | Add a test for and debug p4->cvs incremental exports. | ||
#3 | 628 | Barrie Slaymaker | Cleaned up POD in bin/vcp, added BSD-style license. | ||
#2 | 468 | Barrie Slaymaker |
- VCP::Dest::p4 now does change number aggregation based on the comment field changing or whenever a new revision of a file with unsubmitted changes shows up on the input stream. Since revisions of files are normally sorted in time order, this should work in a number of cases. I'm sure we'll need to generalize it, perhaps with a time thresholding function. - t/90cvs.t now tests cvs->p4 replication. - VCP::Dest::p4 now doesn't try to `p4 submit` when no changes are pending. - VCP::Rev now prevents the same label from being applied twice to a revision. This was occuring because the "r_1"-style label that gets added to a target revision by VCP::Dest::p4 could duplicate a label "r_1" that happened to already be on a revision. - Added t/00rev.t, the beginnings of a test suite for VCP::Rev. - Tweaked bin/gentrevml to comment revisions with their change number instead of using a unique comment for every revision for non-p4 t/test-*-in-0.revml files. This was necessary to test cvs->p4 functionality. |
||
#1 | 467 | Barrie Slaymaker | Version 0.01, initial checkin in perforce public depot. |