#! /usr/bin/env perl =comment Copyright (c) 2019, Perforce Software, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PERFORCE SOFTWARE, INC. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. User contributed content on the Perforce Public Depot is not supported by Perforce, although it may be supported by its author. This applies to all contributions even those submitted by Perforce employees. This file originated at public.perforce.com:1666 //guest/jason_gibson/misc/triggers/trig-bindeltX/bindeltXtrig.pl =cut use 5.014; use Carp 'longmess'; use Data::Dumper; use Digest::MD5 'md5_hex'; use File::Basename; use File::Spec; use File::Temp; use Getopt::Long; use IO::File; use Pod::Usage; use autodie; use diagnostics '-traceonly'; use sigtrap qw / handler sig_log normal-signals error-signals /; use strict; use warnings; my ( $errors, %log_entry, $lfh ) = 1; my ( $op, $rev, $lbr, $db, $log, $help, $init, $eval, $ver, $name, $ndelts, $dbg, $jnl, $jnlfh ); my $file_prefix = basename( $0 ) =~ s/\.pl$//r; BEGIN { $Carp::MaxArgLen = 1024; $Carp::MaxEvalLen = 1024; $Carp::Verbose = 1; } sub uDumper { local $Data::Dumper::Useqq = 1; local $Data::Dumper::Terse = 1; local $Data::Dumper::Sortkeys = 1; local $Data::Dumper::Indent = 0; local $Data::Dumper::Quotekeys = 0; Dumper @_ } sub self_checksum { uc md5_hex join( '', IO::File->new( $0, '<' )->getlines ) =~ s/\R/\012/gr } sub mk_ver_str { join( '@', '$File: //depot/dev/jgibson/utils/trig-bindeltX/bindeltXtrig.pl $$Change: 1755088 $' =~ /\$\S+: (.*?) \$/g ) . ' MD5: ' . self_checksum } sub end { # Try to ensure that error messages from things like invalid arguments # get passed back to the end user. Without reading a little from the # server, it just gets a 'broken pipe' error. STDIN->blocking( 0 ); my $si = ; # Have to get at least a line otherwise it doesn't work. # This doesn't work. #sysread STDIN, $_, 65536; $op //= 'unset'; $rev //= 'unset'; $lbr //= 'unset'; $log_entry{ start } //= time; $log_entry{ end } = time; $log_entry{ duration } = $log_entry{ end } - $log_entry{ start }; $log_entry{ errors } = $errors; # todo: failure to open log leaves this silent? # todo: don't log for -v/-h say $lfh uDumper \ %log_entry if $lfh; exit ( $errors // 1 ) } END { end } sub sig_log { no sigtrap; $SIG{ __DIE__ } = $SIG{ __WARN__ } = undef; return if state $sigged++; $name //= $file_prefix; say STDERR $log_entry{ sig_log } = "\n$name ${\ mk_ver_str }\n\nExiting unexpectedly: \n@_\n\n${\ longmess }"; end; } #sub ERR { die "$0 line ${\ caller }, $name, $op, $rev, '$lbr': @_" } sub ERR { die "$0 line ${\ caller }: @_\n" } ################################################################################ my %rsrs = do { local $/; map { s/\s*(\S+)\s*={40}.*/$1/r } grep length, split /\n{0,1}([^\n]+={40})\n/m, }; my $ext_shell = $^O eq 'MSWin32' ? 'cmd /c' : 'sh -c'; sub check_delta_prog { `$ext_shell bsdiff 2>&1` =~ /^bsdiff: usage:/ && `$ext_shell bspatch 2>&1` =~ /^bspatch: usage:/ } sub check_sqlite_ver { my ( $maj, $min ) = `$ext_shell "sqlite3 -version" 2>&1` =~ /^(\d+)\.(\d+)/; defined( $maj ) && defined( $min ) && $maj == 3 && $min >= 18 } sub help { my $trigger = eval "qq{$rsrs{ TRIGGER_DEF }}"; die "help eval trigger error: $@" if $@; return $trigger if $_[ 0 ] && $_[ 0 ] eq 'trigger'; $trigger =~ s/=head1 //; my $help = eval "qq{$rsrs{ HELP }}"; die "help eval error: $@" if $@; $help } sub tmp_file { my $act = $_[ 0 ] ? "$_[0]-" : ''; File::Temp->new( TEMPLATE => "$file_prefix-$$-$act-XXXXX", DIR => '.', SUFFIX => ( $_[ 1 ] // '.bin' ) ); } ################################################################################ sub quote($) { "'" . $_[ 0 ] =~ s/'/''/gr . "'" } # todo: only single-line csv? sub parse_csv { [ map { [ map { s/""/"/g; s/^"|"$//gr } grep defined, /("(?:(?:"")*|[^"]++|(?1))*")|([^,]+)/g ] } map { chomp; $_ } map { ref( $_ ) ? @{ $_ } : $_ } @_ ] } sub run_sql { my ( $sql, $out, $err ) = map { tmp_file $_, '.txt' } qw / sql out err /; my $stmts = eval "qq{$rsrs{ RUN_SQL }}"; say $sql $stmts; my $o = `sqlite3 -batch -bail -csv "$db" ".read $sql"`; die "error? $o" if $o; say $jnlfh ( '-' x 80 ) . "\n@_" if ! -s( $err->filename ) && "@_" !~ $rsrs{ DDL }; { results => [ $out->getlines ], error => ( join( '', $err->getlines ) // '' ) } } sub init_db { my $rs = run_sql $rsrs{ DDL }; die "init_db error: $rs->{ error }" if $rs->{ error } } sub get_revs { my ( $lbr, $rev ) = @_; my $rs = run_sql eval "qq{$rsrs{ GET_REVS }}"; die 'get_revs error: ' . ( $@ // $rs->{ error } ) if $@ || $rs->{ error }; parse_csv $rs->{ results } } sub get_prev_rev($$) { my ( $lbr, $rev ) = @_; my $rs = run_sql eval "qq{$rsrs{ GET_PREV_REV }}"; die 'get_prev_rev error: ' . ( $@ // $rs->{ error } ) if $@ || $rs->{ error }; $rs = parse_csv $rs->{ results }; my $prev = $rs->[ ( @$rs > 1 ) ? -2 : 0 ]->[ 0 ]; ( $prev && $prev == $_[ 1 ] ) ? undef : $prev } sub get_next_rev($$) { my ( $lbr, $rev ) = @_; my $rs = run_sql eval "qq{$rsrs{ GET_NEXT_REV }}"; die 'get_next_rev error: ' . ( $@ // $rs->{ error } ) if $@ || $rs->{ error }; $rs = parse_csv $rs->{ results }; @$rs ? $rs->[ 0 ]->[ 0 ] : undef } sub get_head_rev { my $lbr = $_[ 0 ]; my $rs = run_sql eval "qq{$rsrs{ HEAD_REV }}"; die 'get_head_rev error: ' . ( $@ // $rs->{ error } ) if $@ || $rs->{ error }; ${ parse_csv $rs->{ results } }[ 0 ]->[ 0 ] } sub ndeltas { @{ get_revs @_ } - 1 } sub get_data { my $id = $_[ 0 ]; my $afh = tmp_file 'archive'; my $rs = run_sql eval "qq{$rsrs{ GET_DATA }}"; die 'get_data error: ' . ( $@ // $rs->{ error } ) if $@ || $rs->{ error }; $afh } sub make_bin($$) { my ( $lbr, $rev ) = @_; my @ids = map { $_->[ 0 ] } @{ get_revs $lbr, $rev }; my $base = get_data shift @ids; map { my $bin = tmp_file 'archive'; my $e = `bspatch "$base" "$bin" "$_" 2>&1`; die "bspatch e: $e" if $e; $base = $bin; } map { get_data $_ } @ids; $base } sub expand_rev { my ( $lbr, $rev ) = @_; my $full = make_bin $lbr, $rev; my $rs = run_sql eval "qq{$rsrs{ EXPAND_REV }}"; die 'expand_rev error: ' . ( $@ // $rs->{ error } ) if $@ || $rs->{ error }; } sub make_delta { my ( $lbr, $rev, $newfile ) = @_; my ( $oldfile, $patchfile ) = ( make_bin( $lbr, $rev ), tmp_file 'delta' ); my $e = `bsdiff "$oldfile" "$newfile" "$patchfile" 2>&1`; die "bspatch e: $e" if $e; $patchfile } # todo: gzip diffs? sub deltify_rev { my ( $lbr, $rev ) = @_; # todo: necessary to check this? my $prev = get_prev_rev $lbr, $rev; return if ! $prev; my $patch = make_delta $lbr, $prev, make_bin $lbr, $rev; my $rs = run_sql eval "qq{$rsrs{ DELTIFY_REV }}"; die 'deltify_rev error: ' . ( $@ // $rs->{ error } ) if $@ || $rs->{ error }; } sub add_rev { my ( $lbr, $rev, $afh ) = @_; my $rs = run_sql eval "qq{$rsrs{ ADD_REV }}"; die 'add_rev error: ' . ( $@ // $rs->{ error } ) if $@ || $rs->{ error }; } sub rev_is_full { my ( $lbr, $rev ) = @_; my $rs = run_sql eval "qq{$rsrs{ REV_IS_FULL }}"; die 'rev_is_full error: ' . ( $@ // $rs->{ error } ) if $@ || $rs->{ error }; $rs = parse_csv $rs->{ results }; $rs->[ 0 ]->[ 0 ] } sub del_rev { my ( $lbr, $rev ) = @_; my $next_rev = get_next_rev $lbr, $rev; expand_rev $lbr, $next_rev if $next_rev && ! rev_is_full $lbr, $next_rev; my $rs = run_sql eval "qq{$rsrs{ DEL_REV }}"; die 'del_rev error: ' . ( $@ // $rs->{ error } ) if $@ || $rs->{ error }; } ################################################################################ sub xfer { my ( $rfh, $wfh, $nread, $nwrite, $data ) = @_; do { $nread = sysread $rfh, $data, 32_768; die "error reading archive from $rfh: $!" if ! defined $nread; $nwrite = syswrite $wfh, $data; die "error writing archive from $wfh: $!" if ! defined $nwrite || $nwrite != length $data; } while $nread; # todo: need another write here if it was short last time? $wfh } ################################################################################ $SIG{ __DIE__ } = $SIG{ __WARN__ } = \ &sig_log; $log_entry{ PID } = $$; $log_entry{ start } = time; $log_entry{ OARGV } = join ' | ', @ARGV; GetOptions 'op=s' => \ $op , 'rev=s' => \ $rev, 'lbr=s' => \ $lbr, 'log=s' => \ $log, 'db=s' => \ $db , 'help:s' => \ $help, 'init' => \ $init, 'eval=s' => \ $eval, 'ver' => \ $ver, 'name=s' => \ $name, 'ndeltas=i' => \ $ndelts, 'debug=i' => \ $dbg, 'jnl=s' => \ $jnl, or pod2usage -exitval => 1, -input => IO::File->new( \ help, '<' ), -verbose => 0; exit pod2usage -exitval => ( $errors = 0 ), -verbose => 2, -noperldoc => 1, -input => IO::File->new( \ help( $help ), '<' ) if defined $help; $log //= "$file_prefix.log"; $db //= "$file_prefix.db"; $jnl //= "$file_prefix.jnl"; $name //= "trigger_$file_prefix"; $ndelts //= 10; $lfh = IO::File->new( $log, '>>' ) or die "Unable to open log '$log': $!"; $lfh->autoflush( 1 ); $jnlfh = IO::File->new( $jnl, '>>' ) or die "Unable to open log '$log': $!"; $jnlfh->autoflush( 1 ); $log_entry{ name } = $name; $log_entry{ version } = mk_ver_str; $log_entry{ checksum } = self_checksum; $log_entry{ op } = $op; $log_entry{ rev } = $rev; $log_entry{ lbr } = $lbr; ERR 'bsdiff/bspatch missing' unless check_delta_prog; ERR 'sqlite3 missing or not 3.18+' unless check_sqlite_ver; $log_entry{ sqlver } = `sqlite3 -version 2>&1`; init_db, exit if $init; exit ( ( eval $eval ) // 1 ) if $eval; exit pod2usage -message => 'Must Specify op/rev/lbr', -exitval => 1, -input => IO::File->new( \ help, '<' ) unless $op && $lbr && defined $rev; die 'unsupported op!' if $op ne 'write' && $op ne 'read' && $op ne 'delete'; die "Unkown rev format: $rev" if $rev !~ /^\d+\.\d+$/; $rev =~ s/^1\.//; # 1.2 -> 2. rev is always leading with '1.' init_db if ! -e $db; binmode STDOUT; binmode STDIN; goto uc $op; ################################################################################ WRITE: # todo: collapse these into a single get_revs_n query? my $head = get_head_rev( $lbr ) // 0; my $prev = get_prev_rev( $lbr, $head ) // 0; del_rev $lbr, $rev if $head == $rev; # Clean up a failed submit. deltify_rev $lbr, $head if $prev && rev_is_full( $lbr, $head ) && $ndelts > ndeltas $lbr, $prev; add_rev $lbr, $rev, xfer \ *STDIN, tmp_file 'write'; $errors = 0; goto DONE; READ: xfer make_bin( $lbr, $rev ), \ *STDOUT; $errors = 0; goto DONE; DELETE: del_rev $lbr, $rev; $errors = 0; DONE: # The END block now writes the log. __DATA__ HELP ======================================== =pod =encoding UTF-8 =head1 NAME ${\ basename $0 }: binary delta archive trigger =head1 VERSION ${\mk_ver_str} =head1 SYNOPSIS Perforce Server archive trigger to store binary files in delta format to minimize disk space usage. Uses the F program to perform the diffing. =head1 OPTIONS =over =item --op=\%op% Server-supplied operation (write/read/delete a revision). =item --rev=\%rev% Server-supplied file revision to work with. =item --lbr=\%quote%\%lbr%\%quote% Server-supplied filesystem path of the file. =item --log=file.log File name to log trigger actions to. =item --db=file.db Database file name to use to store trigger data. =back =head1 NOTES The trigger's filetype should be set as text+kx so it can report the correct version string. If retyping existing revs to use this trigger, it's best to start at #head and work backwards since ... =head1 USAGE $trigger p4 edit -t +X build.exe #p4 retype -l -t +X build.txt TRIGGER_DEF ======================================== =head1 Triggers: bin_delta archive //... "perl ${\ basename $0 } --ndeltas=10 --op=\%op% --rev=\%rev% --lbr=\%quote%\%file%\%quote%" RUN_SQL ======================================== .log $err .output $out --PRAGMA synchronous = OFF; PRAGMA foreign_keys = ON; BEGIN TRANSACTION; @_ COMMIT; DDL ======================================== -- PRAGMA journal_mode = WAL; PRAGMA page_size = 32768; CREATE TABLE IF NOT EXISTS revs ( id INTEGER PRIMARY KEY NOT NULL, lbr TEXT NOT NULL, rev INTEGER NOT NULL CHECK ( rev > 0 ), src INTEGER NOT NULL CHECK ( src > 0 ), UNIQUE( lbr, rev ) ); CREATE INDEX IF NOT EXISTS rr_revs on revs ( rev desc, rev asc ); CREATE INDEX IF NOT EXISTS dr_revs on revs ( lbr, rev desc ); CREATE TABLE IF NOT EXISTS data ( id INTEGER PRIMARY KEY NOT NULL REFERENCES revs ( id ) ON DELETE CASCADE, data BLOB NOT NULL, UNIQUE ( id, data ) ); -- Prevent creating a delta of a full revision by accident. CREATE TRIGGER IF NOT EXISTS update_data BEFORE UPDATE ON data BEGIN SELECT CASE WHEN ( SELECT id FROM revs WHERE id = NEW.id AND src = rev ) AND length( NEW.data ) < length( OLD.data ) AND ( SELECT id FROM revs WHERE id = OLD.id AND rev = src ) THEN RAISE( ROLLBACK, 'update_data: shrinking full rev!' ) END; END; -- Prevent deleting a rev if rev+1 is a delta. -- todo: create a view of ancestor/child revs for a rev? CREATE TRIGGER IF NOT EXISTS delete_rev_delta BEFORE DELETE ON revs BEGIN SELECT CASE WHEN ( SELECT id FROM revs WHERE OLD.lbr = lbr AND rev <> src AND rev = ( SELECT rev FROM revs WHERE rev > OLD.rev AND OLD.lbr = lbr ORDER BY REV ASC LIMIT 1 ) ) THEN RAISE( ROLLBACK, 'delete_rev1: deleting `rev` where `rev`+1 is a delta!' ) END; END; -- Prevent deleting rows from data when there exists a revs entry. -- The foreign key cascade delete on revs should be the only thing -- deleting from data. -- todo: create a view of ancestor/child revs for a rev? CREATE TRIGGER IF NOT EXISTS delete_data BEFORE DELETE ON data BEGIN SELECT CASE WHEN ( SELECT id FROM revs WHERE id = OLD.id ) THEN RAISE( ROLLBACK, 'delete_data: delete from data with existing rev!' ) END; END; -- Prevent updating revs when the src rev does not exist. CREATE TRIGGER IF NOT EXISTS update_rev BEFORE UPDATE ON revs BEGIN SELECT CASE WHEN NEW.rev <> NEW.src AND NOT ( SELECT count( id ) FROM revs WHERE lbr = OLD.lbr AND rev = NEW.src ) THEN RAISE( ROLLBACK, 'update_rev: new `src` revision does not exist!' ) END; SELECT CASE WHEN NEW.rev = NEW.src AND OLD.rev = OLD.src THEN RAISE( ROLLBACK, 'update_rev: old/new `src` same?!' ) END; SELECT CASE WHEN ( select OLD.lbr <> NEW.lbr ) THEN RAISE( ROLLBACK, 'update_rev: tried to change the `lbr`!' ) END; END; GET_REVS ======================================== SELECT id, rev, src FROM revs WHERE rev <= $rev AND rev >= ( SELECT rev FROM revs WHERE lbr = ${\ quote $lbr } AND rev <= $rev AND rev = src ORDER BY rev DESC LIMIT 1 ) ORDER BY rev ASC; GET_PREV_REV ======================================== SELECT rev FROM revs WHERE lbr = ${\ quote $lbr } AND rev < $rev ORDER BY rev DESC LIMIT 1; GET_NEXT_REV ======================================== SELECT rev FROM revs WHERE lbr = ${\ quote $lbr } AND rev > $rev ORDER BY rev ASC LIMIT 1; GET_DATA ======================================== SELECT writefile( ${\ quote $afh }, data ) FROM data WHERE id = $id; DELTIFY_REV ======================================== UPDATE revs SET src = $prev WHERE lbr = ${\ quote $lbr } AND rev = $rev; UPDATE data SET data = readfile( ${\ quote $patch } ) WHERE id = ( SELECT id FROM revs WHERE lbr = ${\ quote $lbr } AND rev = $rev ); EXPAND_REV ======================================== UPDATE revs SET src = $rev WHERE lbr = ${\ quote $lbr } AND rev = $rev; UPDATE data SET data = readfile( ${\ quote $full } ) WHERE id = ( SELECT id FROM revs WHERE lbr = ${\ quote $lbr } AND rev = $rev ); ADD_REV ======================================== INSERT INTO revs ( lbr, rev, src ) VALUES ( ${\ quote $lbr }, $rev, $rev ); INSERT INTO data ( id, data ) VALUES ( last_insert_rowid(), readfile( ${\ quote $afh } ) ); DEL_REV ======================================== DELETE FROM revs WHERE lbr = ${\ quote $lbr } AND rev = $rev; REV_IS_FULL ======================================== SELECT src = id FROM revs where lbr = ${\ quote $lbr } AND rev = $rev; FULL_REVS ======================================== SELECT rev FROM revs WHERE lbr = ${\quote $lbr} AND rev = src ORDER BY rev ASC; HEAD_REV ======================================== SELECT rev FROM revs WHERE lbr = ${\quote $lbr} ORDER BY rev DESC LIMIT 1;