package VCP::Dest::revml ; =head1 NAME VCP::Dest::revml - Outputs versioned files to a revml file =head1 SYNOPSIS ## revml output class: revml:[<output-file>] revml:[<output-file>] -dtd <revml.dtd> revml:[<output-file>] -dtd <version> =head1 DESCRIPTION =head1 EXTERNAL METHODS =over =cut use strict ; use Carp ; use Digest::MD5 ; use Fcntl ; use Getopt::Long ; use MIME::Base64 ; use RevML::Doctype ; use RevML::Writer ; use Symbol ; use UNIVERSAL qw( isa ) ; use VCP::Rev ; use vars qw( $VERSION $debug ) ; $VERSION = 0.1 ; $debug = 0 ; use base 'VCP::Dest' ; use fields ( 'OUT_NAME', ## The name of the output file, or '-' for stdout 'OUT_FH', ## The handle of the output file 'WRITER', ## The XML::AutoWriter instance write with ) ; =item new Creates a new instance. The only parameter is '-dtd', which overrides the default DTD found by searching for modules matching RevML::DTD:v*.pm. Attempts to create the output file if one is specified. =cut sub new { my $class = shift ; $class = ref $class || $class ; my VCP::Dest::revml $self = $class->SUPER::new ; my @errors ; my ( $spec, $args ) = @_ ; my $parsed_spec = $self->parse_repo_spec( $spec ) ; my $file_name = $parsed_spec->{FILES} ; $self->{OUT_NAME} = defined $file_name && length $file_name ? $file_name : '-' ; if ( $self->{OUT_NAME} eq '-' ) { $self->{OUT_FH} = \*STDOUT ; ## TODO: Check OUT_FH for writability when it's set to STDOUT } else { require Symbol ; $self->{OUT_FH} = Symbol::gensym ; ## TODO: Provide a '-f' force option open( $self->{OUT_FH}, ">$self->{OUT_NAME}" ) or die "$!: $self->{OUT_NAME}" ; } my $doctype ; { local *ARGV = $args ; GetOptions( 'dtd|version' => sub { $doctype = RevML::Doctype->new( shift @$args ) ; }, ) or $self->usage_and_exit ; } $doctype = RevML::Doctype->new unless $doctype ; die join( '', @errors ) if @errors ; $self->writer( RevML::Writer->new( DOCTYPE => $doctype, OUTPUT => $self->{OUT_FH}, ) ); return $self ; } sub _ISO8601(;$) { my @f = reverse( ( @_ ? gmtime( shift ) : gmtime )[0..5] ) ; $f[0] += 1900 ; $f[1] ++ ; ## Month of year needs to be 1..12 return sprintf( "%04d-%02d-%02d %02d:%02d:%02dZ", @f ) ; } sub _emit_characters { my ( $w, $buf ) = @_ ; $w->setDataMode( 0 ) ; ## Note that we don't let XML munge \r to be \n!! while ( $$buf =~ m{\G(?: ( [\x00-\x08\x0b-\x1f\x7f-\xff]) | ([^\x00-\x08\x0b-\x1f\x7f-\xff]*) )}gx ) { if ( defined $1 ) { $w->char( "", code => sprintf( "0x%02x", ord $1 ) ) ; } else { $w->characters( $2 ) ; } } } sub handle_rev { my VCP::Dest::revml $self = shift ; my VCP::Rev $r ; ( $r ) = @_ ; my $w = $self->writer ; if ( $self->none_seen ) { $w->setDataMode( 1 ) ; $w->xmlDecl ; my $h = $self->header ; ## VCP::Source::revml passes through the original date. Other sources ## don't. $w->time( defined $h->{time} ? _ISO8601 $h->{time} : _ISO8601 ) ; $w->rep_type( $h->{rep_type} ) ; $w->rep_desc( $h->{rep_desc} ) ; $w->rev_root( $h->{rev_root} ) ; } my VCP::Rev $saw = $self->seen( $r ) ; ## If there's no work path for the current file, keep the previous one. ## This is a cheat that allows us to diff against the last known version ## if a file is deleted and then re-added. Without this line, we would ## have to include the new version of the file. $self->seen( $saw ) if $saw && ! defined $r->work_path ; my $fn = $r->name ; my $is_base_rev = $r->is_base_rev ; die( "Saw '", $saw->as_string, "', but found a later base rev '" . $r->as_string, "'" ) if $saw && $is_base_rev ; $w->start_rev ; $w->name( $fn ) ; $w->type( $r->type ) ; $w->p4_info( $r->p4_info ) if defined $r->p4_info ; $w->cvs_info( $r->cvs_info ) if defined $r->cvs_info ; $w->rev_id( $r->rev_id ) ; $w->change_id( $r->change_id ) if defined $r->change_id ; $w->time( _ISO8601 $r->time ) if ! $is_base_rev || defined $r->time ; $w->mod_time( _ISO8601 $r->mod_time ) if defined $r->mod_time ; $w->user_id( $r->user_id ) if ! $is_base_rev || defined $r->time ; ## Sorted for readability & testability $w->label( $_ ) for sort $r->labels ; if ( defined $r->comment && length $r->comment ) { $w->start_comment ; my $c = $r->comment ; _emit_characters( $w, \$c ) ; $w->end_comment ; $w->setDataMode( 1 ) ; } my $digestion ; my $cp = $r->work_path ; if ( $is_base_rev ) { sysopen( F, $cp, O_RDONLY ) or die "$!: $cp\n" ; $digestion = 1 ; } elsif ( $r->action eq 'delete' ) { $w->delete() ; $self->delete_seen( $r ) ; } else { sysopen( F, $cp, O_RDONLY ) or die "$!: $cp\n" ; my $buf ; my $read ; my $has_nul ; do { $read = sysread( F, $buf, 100_000 ) ; die "$! reading $cp\n" unless defined $read ; $has_nul = $buf =~ tr/\x00// if $read ; } while $read && ! $has_nul ; sysseek( F, 0, 0 ) or die "$! seeking on $cp\n" ; $read = sysread( F, $buf, 100_000 ) ; die "$! reading $cp\n" unless defined $read ; $buf = '' unless $read ; my $bin_char_count = $buf =~ tr/\x01-\x08\x0b-\x1f\x7f-\xff// ; my $encoding = $bin_char_count * 20 > length( $buf ) * 76/57 ? "base64" : "none" ; if ( ! $saw || ! defined $saw->work_path || $has_nul || $encoding ne "none" ) { ## Full content, no delta. $w->start_content( encoding => $encoding ) ; while () { last unless $read ; if ( $encoding eq "none" ) { _emit_characters( $w, \$buf ) ; } else { $w->characters( encode_base64( $buf ) ) ; } $read = sysread( F, $buf, 100_000 ) ; die "$! reading $cp\n" unless defined $read ; } $w->end_content ; $w->setDataMode( 1 ) ; } else { ## Delta from previous version $w->base_name( $saw->name ) if $saw->name ne $r->name ; $w->base_rev_id( $saw->rev_id ) ; $w->start_delta( type => 'diff-u', encoding => 'none' ) ; my $old_cp = $saw->work_path ; die "vcp: no old work path for '", $saw->name, "'\n" unless defined $old_cp && length $old_cp ; die "vcp: old work path '$old_cp' not found for '", $saw->name, "'\n" unless -f $old_cp ; ## TODO: Use Algorithm::Diff. Need to copy & pased newdiff.pl, then ## cut it down. ## TODO: Include entire contents if diff is larger than the contents. ## Accumulate a bunch of output so that characters can make a ## knowledgable CDATA vs <& escaping decision. ## We use '-a' since we don't wan't NULs and other control chars to ## make diff think it's binary. $self->run( [qw( diff -a -u ), $old_cp, $cp], '|', sub { $/ = "\n" ; <STDIN> ; <STDIN> ; ## Throw away first two lines my @accum ; while (<STDIN>) { push @accum, $_ ; if ( @accum > 1000 ) { print @accum ; @accum = () ; } } print @accum ; close STDOUT ; kill 9, $$ ; ## Avoid calling DESTROY()s }, '>', sub { _emit_characters( $w, \$_[0] ) ; }, ) ; $w->end_delta ; $w->setDataMode( 1 ) ; } $digestion = 1 ; } if ( $digestion ) { ## TODO: See if this should be seek or sysseek. sysseek F, 0, 0 or die "$!: $cp" ; my $d= Digest::MD5->new ; $d->addfile( \*F ) ; $w->digest( $d->b64digest, type => 'MD5', encoding => 'base64' ) ; close F ; } $w->end_rev ; # $self->seen( $r ) ; } sub handle_footer { my VCP::Dest::revml $self = shift ; my ( $footer ) = @_ ; $self->writer->endAllTags() ; return ; } sub writer { my VCP::Dest::revml $self = shift ; $self->{WRITER} = shift if @_ ; return $self->{WRITER} ; } =back =head1 COPYRIGHT Copyright 2000, Perforce Software, Inc. All Rights Reserved. This module and the VCP package are licensed according to the terms given in the file LICENSE accompanying this distribution, a copy of which is included in L<vcp>. =head1 AUTHOR Barrie Slaymaker <barries@slaysys.com> =cut 1
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#68 | 4515 | Barrie Slaymaker | - VCP::*::revml supports <release_id> | ||
#67 | 4507 | Barrie Slaymaker |
- RevML: - added <action>, removed <delete>, <placeholder> and <move> - added <from_id> for clones (and eventually merge actions) - Simplified DTD (can't branch DTD based on which action any more) - VCP::Source::cvs, VCP::Filter::changesets and VCP::Dest::p4 support from_id in <action>clone</action> records - VCP::Dest::perl_data added - VCP::Rev::action() "branch" added, no more undefined action strings - "placeholder" action removed |
||
#66 | 4496 | Barrie Slaymaker | - minor POD cleanups to prevent nags when building VCP::Help | ||
#65 | 4154 | Barrie Slaymaker | - dist/vcp.exe passes almost all tests | ||
#64 | 4021 | Barrie Slaymaker |
- Remove all phashes and all base & fields pragmas - Work around SWASHGET error |
||
#63 | 4012 | Barrie Slaymaker | - Remove dependance on pseudohashes (deprecated Perl feature) | ||
#62 | 3970 | Barrie Slaymaker |
- VCP::Source handles rev queing, uses disk to reduce RAM - Lots of other fixes |
||
#61 | 3930 | Barrie Slaymaker |
- VCP::Source::cvs and VCP::Dest::p4 handle cloning deletes - "placeholder" actions and is_placeholder_rev() deprecated in favor of is_branch_rev() and is_clone_rev(). - Misc cleanups and minor bugfixes |
||
#60 | 3908 | Barrie Slaymaker | - Debugging cleanups | ||
#59 | 3852 | Barrie Slaymaker | - VCP::Dest::revml detects empty filename BUGs | ||
#58 | 3850 | Barrie Slaymaker | - No longer stores all revs in memory | ||
#57 | 3813 | Barrie Slaymaker | - VCP::Rev::previous() is no more | ||
#56 | 3812 | Barrie Slaymaker | - VCP::Dest::* no longer need VCP::Rev->previous() | ||
#55 | 3811 | Barrie Slaymaker | - fetch_*() and get_rev() renamed get_source_file() | ||
#54 | 3805 | Barrie Slaymaker | - VCP::Revs::fetch_files() removed | ||
#53 | 3800 | Barrie Slaymaker | - <branches> removed from all code | ||
#52 | 3706 | Barrie Slaymaker | - VCP gives some indication of output progress (need more) | ||
#51 | 3532 | John Fetkovich |
changed File::Spec->rel2abs( blah, start_dir ) to start_dir_rel2abs blah everywhere. which does the same thing and is defined in VCP::Utils |
||
#50 | 3519 | John Fetkovich | fix warning | ||
#49 | 3501 | John Fetkovich | added ui_set_revml_repo_spec, and caller in the stml file | ||
#48 | 3489 | Barrie Slaymaker | - Document options emitted to .vcp files. | ||
#47 | 3460 | Barrie Slaymaker |
- Revamp Plugin/Source/Dest hierarchy to allow for reguritating options in to .vcp files |
||
#46 | 3430 | Barrie Slaymaker |
- Update the XML escaping heuristic - Add --no-encoding option to prevent base64 encoding of binary data (in case your data isn't all that binary) |
||
#45 | 3414 | Barrie Slaymaker |
- allow NUL characters to occur in text files, our diff and patch routines are NUL clean. |
||
#44 | 3380 | John Fetkovich |
bug fix: do rel2abs on file_name only AFTER checking file is not STDOUT. |
||
#43 | 3377 | John Fetkovich | bug fixes for weird input | ||
#42 | 3376 | John Fetkovich | small changes | ||
#41 | 3331 | John Fetkovich |
Small change in source revml state machine. split 'sub init' from 'sub new' in Source/revml.pm and Dest/revml.pm |
||
#40 | 3284 | John Fetkovich |
'sub new' constructor in Source and Dest p4.pm fixed so parse_p4_repo_spec only called when a $spec is provided to the constructor. parse_p4_repo_spec now also sets the repo_id. parse_repo_spec (TODO item) no longer returns a hash value of the values parsed, it only sets fields in $self. Fixed a few places where that return hash was used. |
||
#39 | 3164 | Barrie Slaymaker | Fix overzealous use of shell_qoute and clean up code. | ||
#38 | 3155 | Barrie Slaymaker |
Convert to logging using VCP::Logger to reduce stdout/err spew. Simplify & speed up debugging quite a bit. Provide more verbose information in logs. Print to STDERR progress reports to keep users from wondering what's going on. Breaks test; halfway through upgrading run3() to an inline function for speed and for VCP specific features. |
||
#37 | 3133 | Barrie Slaymaker |
Make destinations call back to sources to check out files to simplify the architecture (is_metadata_only() no longer needed) and make it more optimizable (checkouts can be batched). |
||
#36 | 3115 | Barrie Slaymaker |
Move sorting function to the new VCP::Filter::sort; it's for testing and reporting only and the code was bloating VCP::Dest and limiting VCP::Rev and VCP::Dest optimizations. Breaks test suite in minor way. |
||
#35 | 3008 | John Fetkovich |
make state database files go under vcp_state in the program start directory (start_dir) instead of start_dir itself. Also escape periods (.) from the database directory as well as the characters already escaped. |
||
#34 | 2972 | Barrie Slaymaker | Interim checkin | ||
#33 | 2931 | John Fetkovich | added empty() calls | ||
#32 | 2926 | John Fetkovich |
remove --state-location switch add --db-dir and --repo-id switches build state location from concatenation of those two. |
||
#31 | 2838 | John Fetkovich | Use parse_options rather than using Getopt::Long directly. | ||
#30 | 2802 | John Fetkovich |
Added a source_repo_id to each revision, and repo_id to each Source and Dest. The repo_ids include repository type (cvs,p4,revml,vss,...) and the repo_server fields. Changed the $self->...->set() and $self->...->get() lines in VCP::Dest::* to pass in a conglomerated key value, by passing in the key as an ARRAY ref. Also various restructuring in VCP::DB.pm, VCP::DB_file.pm and VCP::DB_file::sdbm.pm related to this change. |
||
#29 | 2774 | Barrie Slaymaker | Update HeadRevDB on submit/commit/write | ||
#28 | 2768 | John Fetkovich |
Allow revml to be output in non-indented form with --no-indent option on dest::revml |
||
#27 | 2764 | John Fetkovich |
add --compress switch to dest::revml add --uncompress switch to source::revml use gzip to compress/uncompress revml files |
||
#26 | 2743 | John Fetkovich |
Add fields to vcp: source_name, source_filebranch_id, source_branch_id, source_rev_id, source_change_id 1. Alter revml.dtd to include the fields 2. Alter bin/gentrevml to emit legal RevML 3. Extend VCP::Rev to have the fields 4. Extend VCP::{Source,Dest}::revml to read/write the fields (VCP::Dest::revml should die() if VCP tries to emit illegal RevML) 5. Extend VCP::{Source,Dest}::{cvs,p4} to read the fields 7. Get all tests through t/91*.t to pass except those that rely on ch_4 labels |
||
#25 | 2245 | Barrie Slaymaker | cvs -r (re)implemented for direct reads, passes all cvs-only tests | ||
#24 | 2059 | Barrie Slaymaker | Support for branching in p4->p4 added | ||
#23 | 2051 | Barrie Slaymaker | Enable p4_branch_spec to be carried through revml->revml. | ||
#22 | 2042 | Barrie Slaymaker | Basic source::p4 branching support | ||
#21 | 2017 | Barrie Slaymaker |
Interim checkin of id=/base_version_id for revml: and branch_diagram: |
||
#20 | 2015 | Barrie Slaymaker | submit changes | ||
#19 | 2014 | Barrie Slaymaker |
Give helpful error messages if the vcp command can't read/write RevML due to a missing required module. |
||
#18 | 2009 | Barrie Slaymaker |
lots of fixes, improve core support for branches and VCP::Source::cvs now supports branches. |
||
#17 | 1998 | Barrie Slaymaker | Initial, revml and core VCP support for branches | ||
#16 | 1850 | Barrie Slaymaker | Add "emitting revml for" debugging msg | ||
#15 | 1809 | Barrie Slaymaker | VCP::Patch should ignore lineends | ||
#14 | 1367 | Barrie Slaymaker | lots of docco updates | ||
#13 | 1358 | Barrie Slaymaker | Win32 changes | ||
#12 | 1174 | Barrie Slaymaker | Add and use VCP::DiffFormat | ||
#11 | 1171 | Barrie Slaymaker | Switch to using Text::Diff | ||
#10 | 1055 | Barrie Slaymaker |
add sorting, revamp test suite, misc cleanup. Dest/revml is not portable off my system yet (need to release ...::Diff) |
||
#9 | 695 | Barrie Slaymaker |
Cleaned up support for binary files in VCP::Dest::revml and altered test suite to deal with it better. Added some thoughts to the TODO file. |
||
#8 | 628 | Barrie Slaymaker | Cleaned up POD in bin/vcp, added BSD-style license. | ||
#7 | 609 | Barrie Slaymaker |
Add a file to the test procedure that it alternately added and deleted (file is named "readd"). Fixed all destinations to handle that. |
||
#6 | 608 | Barrie Slaymaker |
Lots of changes to get vcp to install better, now up to 0.066. Many thanks to Matthew Attaway for testing & suggestions. |
||
#5 | 480 | Barrie Slaymaker |
0.06 Wed Dec 20 23:19:15 EST 2000 - bin/vcp: Added --versions, which loads all modules and checks them for a $VERSION and print the results out. This should help with diagnosing out-of-sync modules. - Added $VERSION vars to a few modules :-). Forgot to increment any $VERSION strings. - VCP::Dest::cvs: The directory "deeply" was not being `cvs add`ed on paths like "a/deeply/nested/file", assuming "deeply" had no files in it. - VCP::Dest::revml: fixed a bug that was causing files with a lot of linefeeds to be emitted in base64 instead of deltaed. This means most text files. - Various minor cleanups of diagnostics and error messages, including exposing "Can't locate Foo.pm" when a VCP::Source or VCP::Dest module depends on a module that's not installed, as reported by Jeff Anton. |
||
#4 | 478 | Barrie Slaymaker |
0.05 Mon Dec 18 07:27:53 EST 2000 - Use `p4 labels //...@label` command as per Rober Cowham's suggestion, with the '-s' flag recommended by Christopher Siewald and Amaury.FORGEOTDARC@atsm.fr. Though it's actually something like vcp: running /usr/bin/p4 -u safari -c safari -p localhost:5666 -s files //.../NtLkly //...@compiler_a3 //.../NtLkly //...@compiler_may3 and so //on //for 50 parameters to get the speed up. I use the //.../NtLkly "file" as //a separator between the lists of files in various //revisions. Hope nobody has any files named that :-). What I should do is choose a random label that doesn't occur in the labels list, I guess. - VCP::Source::revml and VCP::Dest::revml are now binary, control code, and "hibit ASCII" (I know, that's an oxymoron) clean. The <comment>, <delta>, and <content> elements now escape anything other than tab, line feed, space, or printable chars (32 <= c <= ASCII 126) using a tag like '<char code="0x09">'. The test suite tests all this. Filenames should also be escaped this way, but I didn't get to that. - The decision whether to do deltas or encode the content in base64 is now based on how many characters would need to be escaped. - We now depend on the users' diff program to have a "-a" option to force it to diff even if the files look binary to it. I need to use Diff.pm and adapt it for use on binary data. - VCP::Dest::cvs now makes sure that no two consecutive revisions of the same file have the same mod_time. VCP::Source::p4 got so fast at pulling revisions from the repositories the test suite sets up that CVS was not noticing that files had changed. - VCP::Plugin now allows you to set a list of acceptable result codes, since we now use p4 in ways that make it return non-zero result codes. - VCP::Revs now croaks if you try to add two entries of the same VCP::Rev (ie matching filename and rev_id). - The <type> tag is now limited to "text" or "binary", and is meant to pass that level of info between foreign repositories. - The <p4_info> on each file now carries the one line p4 description of the file so that p4->p4 transferes can pick out the more detailed info. VCP::Source::p4, VCP::Dest::p4 do this. - VCP::{Source,Dest}::{p4,cvs} now set binaryness on added files properly, I think. For p4->p4, the native p4 type is preserved. For CVS sources, seeing the keyword substitution flag 'o' or 'b' implies binaryness, for p4, seeing a filetype like qr/u?x?binary/ or qr/x?tempobj/ or "resource" implies binaryness (to non-p4 destinations). NOTE: Seeing a 'o' or 'b' in a CVS source only ends up setting the 'b' option on the destination. That should be ok for most uses, but we can make it smarter for cvs->cvs transfers if need be. |
||
#3 | 473 | Barrie Slaymaker |
0.04 Tue Dec 12 00:15:57 EST 2000 - Reorg of VCP::Source::p4 - One large filelog command is run instead of many small ones. This takes advantage of the -m option to make sure enough changes are listed. Many extra revisions of most files are probably listed, but listing and ignoring them is quicker than spawning p4 over and over. Wish p4 filelog had a revision range... - it now doesn't suck the entire filelog output in to memory, it parses it line by line as it's emitted from the `p4 filelog` - `p4 print` is now used to print a bunch of files at once, using the header line to separate one file from the next, kind of like splitting a mime-encoded message. There's a very slight chance that it will misjudge the boundary between two files if a file happens to have a line that looks very much like the header line for the next file. This is pretty unlikely and I'll fix it if it crops up. I could batch them more, right now it never puts two revisions of the same filename in the same batch, for no really good reason. Another method might be to batch 25 or 50 revs each time. - it turns out there's a problem spawning multiple p4 commands at the same time against the same p4d (p4d is 99.2, FWIW). Or at least running large `p4 files ...` while there's a large `p4 filelog` still also running. - filelog lines beginning with "... ..." are now ignored. These are notifications of copy, branch, and integrate events that we don't yet do anything with. - deleted cur() and P4_CUR - deleted P4_IS_INCREMENTAL - Made an assertion in VCP::Dest::revml::handle_rev() a little clearer - Added some ok(1) calls to 90p4.t to make it easier to figure out which child process is whining or aborting - Made the message that's printed when a subcommand emits unexpected output say "stderr" instead of "stdout". - Cleaned up documentation for VC::Plugin::work_path(). |
||
#2 | 468 | Barrie Slaymaker |
- VCP::Dest::p4 now does change number aggregation based on the comment field changing or whenever a new revision of a file with unsubmitted changes shows up on the input stream. Since revisions of files are normally sorted in time order, this should work in a number of cases. I'm sure we'll need to generalize it, perhaps with a time thresholding function. - t/90cvs.t now tests cvs->p4 replication. - VCP::Dest::p4 now doesn't try to `p4 submit` when no changes are pending. - VCP::Rev now prevents the same label from being applied twice to a revision. This was occuring because the "r_1"-style label that gets added to a target revision by VCP::Dest::p4 could duplicate a label "r_1" that happened to already be on a revision. - Added t/00rev.t, the beginnings of a test suite for VCP::Rev. - Tweaked bin/gentrevml to comment revisions with their change number instead of using a unique comment for every revision for non-p4 t/test-*-in-0.revml files. This was necessary to test cvs->p4 functionality. |
||
#1 | 467 | Barrie Slaymaker | Version 0.01, initial checkin in perforce public depot. |