package VCP::Dest::p4 ; =head1 NAME VCP::Dest::p4 - p4 destination driver =head1 SYNOPSIS vcp p4:user:password@p4port:[] vcp p4:user(client):password@p4port:[] vcp p4:[] =head1 DESCRIPTION The spec is a perforce repository spec and must begin with // and a depot name ("//depot"), not a local filesystem spec or a client spec. There should be a trailing "/..." specified. If no user name, password, or port are given, the underlying p4 command will look at that standard environment variables. The password is passed using the environment variable P4PASSWD so it won't be logged in debugging or error messages, the other options are passed on the command line. If no client name is given, a temporary client name like "vcp_tmp_1234" will be created and used. The P4CLIENT environment variable will not be used. If an existing client name is given, the named client spec will be saved off, altered, used, and restored. the client was created for this import, it will be deleted when complete, regardless of whether the client was specified by the user or was randomly generated. WARNING: If perl coredumps or is killed with a signal that prevents cleanup--like a SIGKILL (9)--the the client deletion or restoral will not occur. The client view is not saved on disk, either, so back it up manually if you care. THE CLIENT SAVE/RESTORE FEATURE IS EXPERIMENTAL AND MAY CHANGE BASED ON USER FEEDBACK. VCP::Dest::p4 attempts change set aggregation by sorting incoming revisions. See L for the order in which revisions are sorted. Once sorted, a change is submitted whenever the change number (if present) changes, the comment (if present) changes, or a new rev of a file with the same name as a revision that's pending. THIS IS EXPERIMENTAL, PLEASE DOUBLE CHECK EVERYTHING! =head1 OPTIONS =for test_scripts t/90revml2p4_0.t t/90revml2p4_1.t t/90revml2p4_2.t t/90revml2p4_3.t =over =item --init-p4d Initializes a directory and starts a p4d in it on the given port. Refuses to init a non-empty directory. In this case the p4port portion of the destination specification must point to a directory and the port, if present, will be used for the port (otherwise a randomized port number other than p4d's 1666 default will be used.) A temporary p4d will be started that should be shut down upon process exit. If the process does not exit cleanly (for instance, if sent the QUIT signal), this shutdown may not occur. =item --delete-p4d-dir If C<--init-p4d> is passed and the target directory is not empty, it will be removed before running the p4d. THIS IS DANGEROUS AND SHOULD ONLY BE USED IN TEST ENVIRONMENTS. =item --change-branch-rev-1 Some SCMs don't create a branch of a file until there is actually a change made to that file. So the first revision of a file on a branch is different from its parent on the main branch. Normally, p4 does not work this way: rev #1 of a branched file is a very inexpensive copy of the parent revision: you do a p4 integrate, submit, edit, submit sequence to branch a file and introduce it's changes. This option forces VCP to do a p4 integrate, add, submit sequence to branch files, thus capturing the branch and the file alterations in one change. Using this option allows VCP to more exactly model the source repository in the destination repository revision-for-rebision, but leaves you with a perforce repository that may not be consistent with your work practices, so it is not the default behavior. =back =cut $VERSION = 1 ; use strict ; use vars qw( $debug ) ; $debug = 0 ; use Carp ; use File::Basename ; use File::Path ; use VCP::Debug ':debug' ; use VCP::Dest ; use VCP::Rev ; use VCP::Utils qw( empty ); use VCP::Utils::p4 qw( underscorify_name ); ## If we ever want to store state in the dest repo, this constant ## turns that on. It should become an option if it is ever ## reenabled, probably replacing the VCP::RevMapDB. use constant store_state_in_repo => 0; use base qw( VCP::Dest VCP::Utils::p4 ) ; use fields ( # 'P4_SPEC', ## The root of the tree to update 'P4_PENDING', ## Revs pending the next submit 'P4_WORK_DIR', ## Where to do the work. 'P4_REPO_CLIENT', ## See VCP::Utils::p4 for accessors and usage... 'P4_LABEL_FORM', ## A cached label form 'P4_ADDED_LABELS', ## A hash of labels that we've already added. ## TODO: Preload this using the p4 labels command ## to save some time when writing to big repos? 'P4_CHANGE_BRANCH_REV_1', ## Which command sequence to use when ## branching a file. See POD above. ## members for change number divining: 'P4_PREV_CHANGE_ID', ## The change_id in the r sequence, if any 'P4_PREV_COMMENT', ## Used to detect change boundaries 'P4_REV_MAP', ## RevMapDB ) ; sub new { my $class = shift ; $class = ref $class || $class ; my VCP::Dest::p4 $self = $class->SUPER::new( @_ ) ; ## Parse the options my ( $spec, $options ) = @_ ; $self->parse_p4_repo_spec( $spec ) ; $self->repo_id( "p4:" . $self->repo_server ); ## We use the rev_root only to munge branch specs. ## We let p4 set the rev_root by setting the ## client view to the destination path the user specified, so as perforce ## adds each file in our working dir, it puts them in the right spot ## in the repository (under the destination rev_root). $self->deduce_rev_root( $self->repo_filespec ); $self->parse_options( $options, "init-p4d!" => \my $init_p4d, "change-branch-rev-1!" => \$self->{P4_CHANGE_BRANCH_REV_1}, "delete-p4d-dir!" => \my $delete_p4d_dir, "run-p4d!" => \my $run_p4d, ); if ( $init_p4d ) { if ( $delete_p4d_dir ) { $self->rev_map->delete_db; $self->head_revs->delete_db; $self->files->delete_db; } $self->init_p4d( $delete_p4d_dir ); } else { warn "ignoring --delete-p4d-dir, which is only useful with --init-p4d\n" if $delete_p4d_dir; $self->run_p4d if $run_p4d; } $self->rev_map->open_db; $self->head_revs->open_db; $self->files->open_db; $self->set_up_p4_user_and_client; $self->init_p4_view ; return $self ; } sub init_p4d { my $self = shift; my ( $delete_p4d_dir ) = @_; my ( $dir, $port ) = split ":", $self->repo_server, 2; my @files; @files = glob "$dir/*" if -d $dir; if ( @files && $delete_p4d_dir ) { require File::Path; rmtree [ @files ]; @files = glob "$dir/*"; } die "vcp: cannot --init-p4d on non-empty dir $dir\n" if @files; $self->mkdir( $dir ) unless -e $dir; ## Ok, this is wierd: we need to fork & run p4d in foreground mode so that ## we can capture it's PID and kill it later. There doesn't seem to be ## the equivalent of a 'p4d.pid' file. If we let it daemonize, then I ## don't know how to get it's PID. $port = $self->launch_p4d( $dir, $port ); $self->repo_server( "localhost:$port" ); } sub checkout_file { my VCP::Dest::p4 $self = shift ; my VCP::Rev $r ; ( $r ) = @_ ; confess unless defined $self && defined $self->header ; debug "vcp: retrieving '", $r->as_string, "' from p4 dest repo" if debugging $self ; ## The rev_root was put in the client view, p4 will "denormalize" ## the name for us. my $work_path = $self->work_path( $r->name ) ; debug "vcp: work_path '$work_path'" if debugging $self ; my ( undef, $work_dir ) = fileparse( $work_path ) ; $self->mkpdir( $work_path ) unless -d $work_dir ; my $tag = store_state_in_repo ? "\@vcp_" . underscorify_name $r->id : "#" . ($self->rev_map->get( [ $r->source_repo_id, $r->id ] ))[1]; ## The -f forces p4 to sync even if it thinks it doesn't have to. It's ## not in there for any known reason, just being conservative. $self->p4( ['sync', '-f', $r->name . $tag ] ) ; die "'$work_path' not created in backfill" unless -e $work_path ; return $work_path ; } sub handle_header { my VCP::Dest::p4 $self = shift ; my ( $h ) = @_; $self->{P4_PENDING} = [] ; $self->{P4_PREV_COMMENT} = undef ; $self->{P4_PREV_CHANGE_ID} = undef ; $self->{P4_LABEL_FORM} = undef ; $self->{P4_ADDED_LABELS} = {}; $self->SUPER::handle_header( @_ ) ; if ( $h->{branches} ) { for my $b ( $h->{branches}->get ) { my $spec = $b->p4_branch_spec; next if empty $spec; ## Re-root the view. my $found_it; $spec = $self->build_p4_form( map { if ( $found_it ) { ( my $source_root = $h->{ rev_root} ) =~ s{^/*}{//}; ( my $dest_root = $self->rev_root ) =~ s{^/*}{//}; s{\Q$source_root}{$dest_root}g; undef $found_it; } $found_it = $_ eq "View"; $_; } $self->parse_p4_form( $spec ) ); $self->p4( [qw(branch -i -f)], \$spec ); } } } sub handle_rev { my VCP::Dest::p4 $self = shift ; my VCP::Rev $r ; ( $r ) = @_ ; if ( @{$self->{P4_PENDING}} && ( ( defined $r->change_id && defined $self->{P4_PREV_CHANGE_ID} && $r->change_id ne $self->{P4_PREV_CHANGE_ID} && ( debugging( $self ) ? debug "vcp: time to submit: change_id changed" : 1 ) ) || ( defined $r->comment && defined $self->{P4_PREV_COMMENT} && $r->comment ne $self->{P4_PREV_COMMENT} && ( debugging( $self ) ? debug "vcp: time to submit: comment changed [", $r->comment, "] vs [", $self->{P4_PREV_COMMENT}, "]" : 1 ) ) || ( ! $r->is_placeholder_rev && grep( ! $_->is_placeholder_rev && $r->name eq $_->name, @{$self->{P4_PENDING}} ) && ( debugging( $self ) ? debug "vcp: time to submit: name repeated" : 1 ) ) || ( $r->previous && grep( $_ == $r->previous, @{$self->{P4_PENDING}} ) && ( debugging( $self ) ? debug "vcp: time to submit: need a previous revision" : 1 ) ) ) ) { $self->submit; $self->{P4_PREV_CHANGE_ID} = $r->change_id; $self->{P4_PREV_COMMENT} = $r->comment; } $self->compare_base_revs( $r ) if $r->is_base_rev && defined $r->work_path ; return if $r->is_base_rev ; push @{$self->{P4_PENDING}}, $r; } ## Any revs that must be integrated may be integrated as a change ## before the desired change. sub _add_it { my VCP::Dest::p4 $self = shift; my VCP::Rev $r ; ( $r ) = @_ ; my $fn = $r->name ; my $work_path = $self->work_path( $fn ) ; ! -e $work_path; } ## Revs are only queued up by handle_rev() because we need to ## deal with integrate commands before we deal with other commands. sub delayed_handle_rev { my VCP::Dest::p4 $self = shift; my VCP::Rev $r ; ( $r ) = @_ ; return 0 if $r->is_placeholder_rev; my $fn = $r->name ; debug "vcp: importing ", $r->as_string if debugging $self ; my $work_path = $self->work_path( $fn ) ; debug "vcp: work_path '$work_path'" if debugging $self ; my $denorm_name = $self->denormalize_name( $r->name ); if ( $r->action eq "delete" ) { my ( @state ) = $self->files->get( [ $denorm_name ] ); unless ( @state ) { ## This must be a fake deletion from CVS: it does this when ## a file is added on a branch. So we fake up an empty ## file and add it. open F, ">$work_path" or die "vcp: $! opening $work_path\n"; close F; $r->dest_work_path( $work_path ) ; $self->p4( [ "add", $fn ] ); $self->p4_submit( undef, "spoof addition of deleted file", $r ); $self->files->set( [ $denorm_name ], "touched and added" ); } my $deleteit = @state && $state[0] ne "deleted"; if ( $deleteit && ! -e $work_path ) { $self->p4( [ "sync", "-f", $fn ] ); } if ( -e $work_path ) { unlink $work_path || die "$! unlinking $work_path" ; } $self->p4( [ "delete", $fn] ) if $deleteit; $self->files->set( [ $denorm_name ], "deleted" ); } else { ## TODO: Don't assume same filesystem or working link(). { my $filetype = defined $r->p4_info && $r->p4_info =~ /\((\S+)\)$/ ? $1 : $r->type ; my $add_it = $self->_add_it( $r ); if ( $add_it ) { $self->mkpdir( $work_path ) ; } else { $self->p4( [ "edit", "-t", $filetype, $fn ] ) ; unlink $work_path or die "$! unlinking $work_path" ; $self->files->set( [ $denorm_name ], "edited" ); } debug "vcp: linking ", $r->work_path, " to $work_path" if debugging $self ; link $r->work_path, $work_path or die "$! linking ", $r->work_path, " -> $work_path" ; $r->dest_work_path( $work_path ) ; if ( defined $r->mod_time ) { utime $r->mod_time, $r->mod_time, $work_path or die "$! changing times on $work_path" ; } if ( $add_it ) { $self->p4( [ "add", "-t", $filetype, $fn ] ); $self->files->set( [ $denorm_name ], "added" ); } } ## TODO: Provide command line options for user-defined tag prefixes $r->add_label( "vcp_" . $r->id ) if store_state_in_repo; } return 1; } sub handle_footer { my VCP::Dest::p4 $self = shift ; $self->submit if @{$self->{P4_PENDING}}; $self->SUPER::handle_footer ; } sub p4_submit { my VCP::Dest::p4 $self = shift; my ( $max_time, $description, @revs ) = @_; if ( length $description ) { $description =~ s/^/\t/gm ; $description .= "\n" if substr $description, -1 eq "\n" ; } my $change ; $self->p4( [ "change", "-o" ], undef, \$change ) ; $change =~ s/^Description:.*\r?\n\r?.*/Description:\n$description/m or die "vcp: Couldn't modify change description\n$change" ; my $submit_log; $self->p4([ "submit", "-i"], \$change, \$submit_log ) ; ## extract the change number and the file list from the submit output my $change_number; my %p4_rev_ids; { while ( $submit_log =~ m{\G(.*)([\r\n]+|\z)}g ) { my $line = $1; if ( $line =~ m{^\w+\s+//(.*)#(\d+)\z} ) { $p4_rev_ids{$1} = $2; } elsif ( $line =~ m{^Change (\d+) } ) { $change_number = $1; } } } ## Force the correct date and a user id { my $change; $self->p4( [ "change", "-o", $change_number ], \undef, \$change ) ; if ( defined $max_time ) { $change =~ s/^Date:.*/Date:\t$max_time/m or die "vcp: Couldn't modify change date\n$change" ; } my $user_id; for ( @{$self->{P4_PENDING}} ) { unless ( empty $_->user_id ) { $user_id = underscorify_name( $_->user_id ); last; } } if ( defined $user_id ) { $change =~ s/^User:.*/User:\t$user_id/m or die "vcp: Couldn't modify change user\n$change" ; } $self->p4( [ "change", "-i", "-f" ], \$change ) ; } for my $r ( @revs ) { next if $r->is_placeholder_rev; my $name = $self->denormalize_name( $r->name ); my $rev_id = $p4_rev_ids{$name}; unless ( defined $rev_id ) { $submit_log =~ s/^/ /mg; require Data::Dumper; die "vcp: no rev number found in p4 submit output for ", $r->as_string, ":\n", $submit_log, "looked for: $name\n", "p4 revs parsed: ", Data::Dumper::Dumper( \%p4_rev_ids ); } $self->rev_map->set( [ $r->source_repo_id, $r->id ], $name, $rev_id ); $self->head_revs->set( [ $r->source_repo_id, $r->source_filebranch_id ], $r->source_rev_id ); } } sub submit { my VCP::Dest::p4 $self = shift ; ## Take care of any integrations up front { my @integrated; for my $r ( @{$self->{P4_PENDING}} ) { if ( $self->_add_it( $r ) && $r->previous && ( $r->previous->branch_id || "" ) ne ( $r->branch_id || "" ) ) { ## Branch the previous version to make the new one. Leave ## add_it set so we can drop the new one in over the ## branched version in case it's changed. my $fn = $r->name ; my $work_path = $self->work_path( $fn ) ; $self->p4( [ "integrate", $r->previous->name, $fn ] ) ; if ( $self->{P4_CHANGE_BRANCH_REV_1} ) { ## Force it to be `p4 add`ed as well by delayed_handle_rev() debug "unlinking $work_path so it will be added\n" if debugging $self; unlink $work_path or die "$! unlinking $work_path\n"; } else { ## Integrate the first change push @integrated, $r; } } } if ( @integrated ) { $self->p4_submit( undef, "create branches", @integrated ); } } my $things_to_submit; $things_to_submit += $self->delayed_handle_rev( $_ ) for @{$self->{P4_PENDING}}; if ( $things_to_submit ) { my %pending_labels ; my %comments ; my $max_time ; if ( @{$self->{P4_PENDING}} ) { for my $r ( @{$self->{P4_PENDING}} ) { $comments{$r->comment} = $r->name if defined $r->comment ; $max_time = $r->time if ! defined $max_time || $r->time > $max_time ; for my $l ( $r->labels ) { push @{$pending_labels{$l}}, $r->dest_work_path ; } } if ( defined $max_time ) { my @f = reverse( (localtime $max_time)[0..5] ) ; $f[0] += 1900 ; ++$f[1] ; ## Day of month needs to be 1..12 $max_time = sprintf "%04d/%02d/%02d %02d:%02d:%02d", @f ; } elsif ( debugging $self ) { debug "No max_time found" ; } } my $description = join( "\n", keys %comments ) ; $self->p4_submit( $max_time, $description, @{$self->{P4_PENDING}} ); ## Create or add a label spec for each of the labels. The 'sort' is to ## make debugging output more legible. ## TODO: Modify RevML to allow label metadata (owner, desc, options) ## to be passed through. Same for user, client, jobs metadata etc. ## The assumption is made that most labels will apply to a single change ## number, so we do the labelling once per submit. I don't think that ## this will break if it doesn't, but TODO: add more labelling tests. for my $label ( sort keys %pending_labels ) { my $p4_label = underscorify_name $label; $self->{P4_ADDED_LABELS}->{$p4_label} ||= do { $self->{P4_LABEL_FORM} ||= do { my $out; $self->p4( [qw( label -o ), $p4_label], undef, \$out ) ; $out =~ s/(^.+:\s+)\Q$p4_label\E$/$1<<