package VCP::Source::svn ; =head1 NAME VCP::Source::svn - A Subversion repository source =head1 SYNOPSIS vcp svn:URI:/foo/... -r 2:HEAD # All files in foo from @2 to HEAD ## NOTE: Unlike svn, vcp requires spaces after option letters. =head1 DESCRIPTION Source driver enabling L|vcp> to extract versions from a svn repository. The source specification for SVN looks like: svn::/path/to/filespec [] where URI is a path to the repository root. The filespec and EoptionsE determine what revisions to extract. C may contain trailing wildcards, like C to extract an entire directory tree. =head1 OPTIONS =over =item -r -r v_0_001:v_0_002 -r v_0_002: Passed to C as a C<-r> revision specification. Note that a space is needed after the "-r". =back =for test_script t/91svn2revml_*.t =cut $VERSION = 1.2 ; @ISA = qw( VCP::Source VCP::Utils::svn ); use strict ; use Carp ; use File::Basename; use VCP::Debug qw( :debug :profile ) ; use VCP::Logger qw( lg pr pr_doing pr_done pr_done_failed BUG ); use VCP::Rev qw( iso8601format ); use VCP::Source ; use VCP::Utils qw( empty is_win32 shell_quote start_dir_rel2abs xchdir ); use VCP::Utils::svn ; use XML::Parser; sub new { my $self = shift->SUPER::new; ## Parse the options my ( $spec, $options ) = @_ ; $self->parse_svn_repo_spec( $spec ) unless empty $spec; $self->parse_options( $options ); return $self ; } sub parse_options { my $self = shift; $self->SUPER::parse_options( @_ ); } sub options_spec { my $self = shift; return ( $self->SUPER::options_spec, "r=s" => sub { shift; $self->rev_spec( @_ ) }, ); } sub init { my $self= shift ; $self->SUPER::init; ## Set default repo_id. $self->repo_id( "svn:" . $self->repo_server ) if empty $self->repo_id && ! empty $self->repo_server ; my $files = $self->repo_filespec ; $self->deduce_rev_root( $files ) unless defined $self->rev_root; ## Make sure the svn command is available and record its output ## for any revml that might be emitted. $self->svn( [ '--version' ], undef, \$self->{SVN_INFO} ) ; ## This does a checkout, so we'll blow up quickly if there's a problem. $self->create_svn_workspace ; my $spec = $self->repo_filespec; $spec =~ s{[\\]}{/}g; $spec =~ s{/+(\.\.\.)?\z}{}; ## svn recurses. $spec =~ s{^/*}{/}; $self->{SVN_SPEC_DIR} = $spec; $self->{SVN_URL} = $self->repo_server . $spec; } sub rev_spec { my $self = shift ; $self->{SVN_REV_SPEC} = shift if @_ ; return empty( $self->{SVN_REV_SPEC} ) ? "1:HEAD" : undef; } sub rev_spec_svn_option { my $self = shift ; return defined $self->rev_spec ? "-r" . $self->rev_spec : (), } sub denormalize_name { my $self = shift ; ( my $n = '/' . $self->SUPER::denormalize_name( @_ ) ) =~ s{/+}{/}g; return $n; } sub handle_header { my $self = shift; my ( $header ) = @_; $header->{rep_type} = 'svn'; $header->{rep_desc} = $self->{SVN_INFO}; $header->{rev_root} = $self->rev_root; $self->dest->handle_header( $header ); } sub get_source_file { my $self = shift ; my $r ; ( $r ) = @_ ; BUG "can't check out ", $r->as_string, "\n" unless $r->is_real_rev; my $wp = $self->tmp_dir( "revs", $r->source_name, $r->source_rev_id ) ; $self->mkpdir( $wp ) ; my $svn_name = $self->{SVN_URL}; $svn_name =~ s{[\\/][^\\/]*\z}{} if $self->{SVN_URL_IS_FILE}; $svn_name .= "/" . $r->source_name; ## Use SUPER:: to avoid getting the leading '/' ## svn cat seems to ignore -r and likes the "@" notation. $self->svn( [ "cat", $svn_name . "\@" . $r->source_rev_id, ], undef, $wp, ) ; return $wp; } sub parse_svn_log_output { my $self = shift ; my @cmd = ( "svn", "log", "-v", "--xml", $self->rev_spec_svn_option, $self->{SVN_URL}, ); my $cmd = join " ", shell_quote @cmd; my $p = XML::Parser->new( Handlers => { Start => sub { my $expat = shift ; my $tag = shift ; my $meth = "start_log_$tag"; $self->$meth( @_ ) if $self->can( $meth ); }, End => sub { my $expat = shift ; my $tag = shift ; $self->{SVN_LOG_TEXT} = undef; my $meth = "end_log_$tag"; $self->$meth( @_ ) if $self->can( $meth ); }, Char => sub { my $expat = shift ; ${$self->{SVN_LOG_TEXT}} .= shift if $self->{SVN_LOG_TEXT}; } }, ) ; pr "\$ $cmd"; $p->parsefile( "$cmd |" ); } sub start_log_logentry { my $self = shift; $self->{l} = {@_}; } sub start_log_msg { my $self = shift; $self->{SVN_LOG_TEXT} = \($self->{l}->{msg} = ""); } sub start_log_date { my $self = shift; $self->{SVN_LOG_TEXT} = \($self->{l}->{date} = ""); } sub start_log_author { my $self = shift; $self->{SVN_LOG_TEXT} = \($self->{l}->{author} = ""); } sub start_log_path { my $self = shift; my $p = { @_ }; if ( $p->{action} eq "A" && !empty( $p->{"copyfrom-path"} ) ) { $p->{action} = "branch"; } push @{$self->{l}->{paths}}, $p; $self->{SVN_LOG_TEXT} = \($p->{text} = ""); } my %actions = ( A => "add", D => "delete", M => "edit", branch => "branch", ); sub end_log_logentry { my $self = shift; my $l = $self->{l}; $self->{l} = undef; my $revision = $l->{revision}; my $msg = $l->{msg}; my $author = $l->{author}; my $time = $self->parse_time( $l->{date} ); my $repo_id = $self->repo_id; ## We assume the log file is in historical or reverse historical ## order (increasing rev order or decreasing rev order). We detect ## this whenever the revision number changes, then use the detected ## direction to set the "previous_id" field appropriately. if ( $revision ne $self->{SVN_INFO_REV} ) { if ( !$self->{SVN_LOG_ORDER} && !empty $self->{SVN_INFO_REV} ) { $self->{SVN_LOG_ORDER} = $revision > $self->{SVN_INFO_REV} ? "ascending" : "descending"; delete $self->{SVN_PREVIOUS_IDS} if $self->{SVN_LOG_ORDER} eq "descending"; } $self->{SVN_INFO_REV} = $revision; ## Get directory-ness $self->{SVN_TREE_INFO} = $self->get_svn_path_info( $self->{SVN_URL}, $revision, "-R", ); ## Get "type" for all files. ## ## No "--xml" available yet for this option, and this is likely to ## be faster and simpler anyway. $self->svn( [ "propget", "svn:mime-type", "-R", $self->{SVN_URL} . "\@" . $revision ], \undef, \my $output ); my $strip_leading_chars = do { my $server_root_url = $self->repo_server; $server_root_url =~ s{[\\/]+\z}{}; length $server_root_url; }; %{$self->{SVN_TYPES}} = ( map { my $line = $_; debug "svn propget: %line" if debugging; my ( $url, $value ) = split /\s+-\s+/, $_; warn( "couldn't parse propget output \"$_\"\n" ), next unless defined $value; my $fn = "/" . substr $url, $strip_leading_chars; $fn =~ s{[\\/][\\/]+}{/}g; $value eq "application/octet-stream" ? ( $fn => "binary" ) : (); } split /\r?\n+/, $output ); } ## TODO: note dir-ness by whether or not there are any children ## and (perhaps) avoid having to run svn info -R. my $info = $self->{SVN_TREE_INFO}; for my $p ( @{$l->{paths}} ) { my $fn = $p->{text}; ## svn log returns all paths. ## TODO: Apply shell-like regex filtering here. next unless 0 == index $fn, $self->{SVN_SPEC_DIR}; ## The first log entry is usually for the directory being ## scanned. Ignore any such paths. next if !$self->{SVN_URL_IS_FILE} && $fn =~ m{\A/*\Q$self->{SVN_SPEC_DIR}\E\z}; my $filebranch_id = $fn; my $mode = $self->rev_mode( $filebranch_id, $revision ); next unless $mode; my $action = $actions{$p->{action}} || "edit"; my $norm_name = $self->normalize_name( $fn ); my $this_info; if ( $action eq "delete" ) { ## We need to look at the previous revision to see ## whether this was a directory or not. ## We could store the previous SVN_TREE_INFO and look ## in that, but I don't expect directory deletions to ## be a high percentage of the actions, so I'm not ## doing that optimization right now. $this_info = $self->get_svn_path_info( "$self->{SVN_URL}/$norm_name", $revision - 1 ); } else { $this_info = $info->{$norm_name}; } next unless $this_info && $this_info->{kind} eq "file"; my $id = "$fn\@$revision"; my $branch_id = (fileparse $fn)[1]; my $previous_id; if ( $action eq "branch" ) { $previous_id = "$p->{'copyfrom-path'}\@$p->{'copyfrom-rev'}"; } elsif ( $self->{SVN_LOG_ORDER} eq "ascending" ) { ## If it's an "add", this gets undef, which is ok. $previous_id = $self->{SVN_PREVIOUS_IDS}->{$fn}; } my $r = VCP::Rev->new( id => $id, action => $action, name => $norm_name, source_name => $norm_name, source_filebranch_id => $filebranch_id, branch_id => $branch_id, source_branch_id => $branch_id, source_repo_id => $repo_id, rev_id => $revision, source_rev_id => $revision, change_id => $revision, source_change_id => $revision, time => $time, user_id => $author, previous_id => $previous_id, $action ne "branch" ? ( type => $self->{SVN_TYPES}->{$fn} || "text", ) : (), comment => $msg, ); $r->base_revify if $mode eq "base"; $self->set_last_rev_in_filebranch_previous_id( $r ) if $self->{SVN_LOG_ORDER} eq "descending"; $self->queue_rev( $r ); $self->{SVN_PREVIOUS_IDS}->{$fn} = $id if $self->{SVN_LOG_ORDER} ne "descending"; ## Note that this remembers previous_ids until we're sure ## that we don't need to remember them. } } sub get_revs_from_log_file { my $self = shift; $self->{SVN_TREE_INFO} = undef; $self->{SVN_INFO_REV} = ""; $self->{SVN_LOG_ORDER} = ""; ## will be "ascending" or "descending" $self->parse_svn_log_output; $self->{SVN_TREE_INFO} = undef; $self->{SVN_INFO_REV} = ""; $self->{SVN_LOG_ORDER} = ""; ## will be "ascending" or "descending" } sub scan_metadata { my $self = shift; $self->get_revs_from_log_file; pr "found ", $self->queued_rev_count, " rev(s)", "\n"; } =head1 LIMITATIONS Does not extract directory node metadata. Assumes binaryness is assigned with "application/octet-stream"; other svn:mime-type values are ignored (such files are assumed to be "text"). Incremental exports of tag branches (/tags/foo/...) to repositories that implement labels will not apply any labels in an incremental update to revisions that were exported in a previous update. The VCP::Filter::map module will emit warnings like: vcp: map: /path/to/file@3 not found, had labels queued: ... =head1 SEE ALSO L, L, L. =head1 AUTHOR Barrie Slaymaker =head1 COPYRIGHT Copyright (c) 2000, 2001, 2002 Perforce Software, Inc. All rights reserved. See L (C) for the terms of use. =cut 1