sdbm.pm #15

package VCP::DB_File::sdbm;

=head1 NAME

VCP::DB_File::sdbm - Subclass providing SDBM_File storage

=head1 SYNOPSIS

    use VCP::DB_File;
    VCP::DB_File->new;

=head1 DESCRIPTION

To write your own DB_File filetype, copy this file and alter it.  Then
ask us to add an option to the .vcp file parsing to enable it.

=over

=for test_script t/01db_file_sdbm.t

=cut

$VERSION = 1 ;

@ISA = qw( VCP::DB_File );

use strict ;

use VCP::Debug qw( :debug );
use Fcntl;
use File::Spec;
use SDBM_File;
use VCP::DB_File;
use VCP::Debug qw( :debug );
use VCP::Logger qw( lg BUG );

#use base qw( VCP::DB_File );

#use fields (
#   'Hash',  ## The hash we tie
#);

sub db_file {
   my $self = shift;
   return File::Spec->catfile(
      $self->store_loc,
      "db"
   );
}


sub close_db {
   my $self = shift;

   return unless $self->{Hash};

   $self->SUPER::close_db;

   $self->{Hash} = undef;

   close $self->{AbbreviationsFH};
   $self->{AbbreviationsFH} = undef;
}


sub delete_db {
   my $self = shift;

   my $store_files_pattern = $self->store_loc . "/*";

   my $has_store_files = -e $self->store_loc;
   if ( $has_store_files ) {
      require File::Glob;
      my @store_files = File::Glob::glob( $store_files_pattern );
      $has_store_files &&= @store_files;
   }

   return
      unless $has_store_files;

   $self->SUPER::delete_db;
   $self->rmdir_store_loc unless $ENV{VCPNODELETE};
}


sub open_abbreviations_file {
   my $self = shift;
   my ( $fn ) = @_;

   local *ABBREVIATIONS;
   $self->{CurrentAbbreviation} = 0;
   my $abbrev_fn = "${fn}_abbreviations.txt";
   open ABBREVIATIONS, "+>> $abbrev_fn"
      or die "$!: $abbrev_fn";
   seek ABBREVIATIONS, 0, 0;
   while (<ABBREVIATIONS>) {
      chomp;
       my ( $abbrev, $key ) = split /\s+/, $_, 2;
       $self->{KeysByAbbreviation}->{$abbrev} = $key;
       $self->{AbbreviationsByKey}->{$key} = $abbrev;
       ( $self->{CurrentAbbreviation} ) = $abbrev =~ /(\d+)/;;
   }
   $self->{AbbreviationsFH} = *ABBREVIATIONS{IO};
}


sub open_db {
   my $self = shift;

   $self->SUPER::open_db;
   $self->mkdir_store_loc;

   $self->{Hash} = {};

   my $fn = $self->db_file;

   tie %{$self->{Hash}}, "SDBM_File", $fn, O_RDWR|O_CREAT, 0660
      or die "$! while opening DB_File SDBM file '$fn'";

   $self->open_abbreviations_file( $fn );
}


sub open_existing_db {
   my $self = shift;

   $self->SUPER::open_db;
   $self->mkdir_store_loc;

   $self->{Hash} = {};

   my $fn = $self->db_file;

   tie %{$self->{Hash}}, "SDBM_File", $fn, O_RDWR, 0
      or die "$! while opening DB_File SDBM file '$fn'";

   $self->open_abbreviations_file( $fn );
}


sub raw_set { ## so big_records.pm can call us with prepacked stuff
   my $self = shift;
   my ( $key, $value ) = @_;

   if ( exists $self->{AbbreviationsByKey}->{$key} ) {
       $key = $self->{AbbreviationsByKey}->{$key};
   } elsif ( length( $key ) + length( $value )  > 20 ) {
      ## Work around SBDM's 1008 byte record limit, assuming that
      ## our values are smaller than 990 bytes or so.
      my $abbrev = sprintf( ";;;;;;;;ABBREV_%010d", ++$self->{CurrentAbbreviation} );
	 ## Semicolons are used to join fields in a key, here we
	 ## assume no keys will be composed of a bunch of empty
	 ## fields and a field like "ABBREV_0000190"
      $self->{AbbreviationsByKey}->{$key} = $abbrev;
      $self->{KeysByAbbreviations}->{$abbrev} = $key;
      my $fh = $self->{AbbreviationsFH};
      print $fh "$abbrev $key\n";
      lg "Abbreviating \"$key\" as \"$abbrev\"";
      $key = $abbrev;
   }
   $self->{Hash}->{$key} = $value;
}


sub set {
   my $self = shift;
   my $key_parts = shift;
   BUG "key must be an ARRAY reference"
      unless ref $key_parts eq "ARRAY";

   debug "setting ",
      ref $self, " ",
      join( ",", @$key_parts ), " => ",
      join( ",", @_ )
      if debugging;

   $self->raw_set( 
      $self->pack_values( @$key_parts ),
      $self->pack_values( @_ )
   );
}


sub raw_get {
   my $self = shift;
   my $key = shift;
   if ( exists $self->{AbbreviationsByKey}->{$key} ) {
       $key = $self->{AbbreviationsByKey}->{$key};
   }
   $self->{Hash}->{$key};
}


sub get {
   my $self = shift;
   my $key_parts = shift;
   BUG "key must be an ARRAY reference"
      unless ref $key_parts eq "ARRAY";
   BUG "extra args found"
      if @_;
   BUG "called in scalar context"
      if defined wantarray && !wantarray;

   my $key = $self->pack_values( @$key_parts );

   my $v = $self->raw_get( $key );

   return unless defined $v;

   $self->unpack_values( $v );
}


sub exists {
   my $self = shift;
   my $key_parts = shift;
   BUG "key must be an ARRAY reference"
      unless ref $key_parts eq "ARRAY";

   my $key = $self->pack_values( @$key_parts );

   if ( exists $self->{AbbreviationsByKey}->{$key} ) {
       $key = $self->{AbbreviationsByKey}->{$key};
   }

   return $self->{Hash}->{$key} ? 1 : 0;
}


sub keys {
   my $self = shift;

   map [ $self->unpack_values( $_ ) ], keys %{$self->{Hash}};
}

=item dump

   $db->dump( \*STDOUT );
   my $s = $db->dump;
   my @l = $db->dump;

Dumps keys and values from a DB, in lexically sorted key order.
If a filehandle reference is provided, prints to that filehandle.
Otherwise, returns a string or array containing the entire dump,
depending on context.


=cut

sub dump {
   my $self = shift;
   my $fh = @_ ? shift : undef;

   my( @keys, %vals );
   my @w;

   while ( my ( $k, $v ) = each %{$self->{Hash}} ) {
      if ( exists $self->{KeysByAbbreviation}->{$k} ) {
         $k = $self->{KeysByAbbreviation}->{$k};
      }

      my @key = $self->unpack_values( $k );

      for ( my $i = 0; $i <= $#key; ++$i ) {
         $w[$i] = length $key[$i]
            if ! defined $w[$i] || length $key[$i] > $w[$i];
      }

      push @keys, $k;
      $vals{$k} = [ $self->unpack_values( $v ) ];
   }

   ## This does not take file separators in to account, but that's ok
   ## for a debugging tool and the ids that are used as key values
   ## are supposed to be opaque anyway
   @keys = sort @keys;

   # build format string
   my $f = join( " ", map "%-${w[$_]}s", 0..$#w ) . " => %s\n";

   my @lines;
   while ( @keys ) {
      my $k = shift @keys;

      my @v = map { "'$_'" } @{$vals{$k}};

      my $s = sprintf $f,
         $self->unpack_values( $k ),
         @v == 1 ? $v[0] : join join( ",", @v ), "(", ")";

      if( defined $fh ) {
         print $fh $s;
      }
      else {
         push @lines, $s;
      }
   }

   unless( defined $fh ) {
      if( wantarray ) {
         chomp @lines;
         return @lines;
      }
      return join "", @lines;
   }
}

=back

=head1 LIMITATIONS

There is no way (yet) of telling the mapper to continue processing the
rules list.  We could implement labels like C< <<I<label>>> > to be
allowed before pattern expressions (but not between pattern and result),
and we could then impelement C< <<goto I<label>>> >.  And a C< <<next>>
> could be used to fall through to the next label.  All of which is
wonderful, but I want to gain some real world experience with the
current system and find a use case for gotos and fallthroughs before I
implement them.  This comment is here to solicit feedback :).

=head1 AUTHOR

Barrie Slaymaker <barries@slaysys.com>

=head1 COPYRIGHT

Copyright (c) 2000, 2001, 2002 Perforce Software, Inc.
All rights reserved.

See L<VCP::License|VCP::License> (C<vcp help license>) for the terms of use.

=cut

1

#	Change	User	Description
#18	5347	Barrie Slaymaker	- Undo artificially low abbreviation limit of 20 chars
#17	5088	Barrie Slaymaker	- sbdm::keys() works with workaround
#16	5085	Barrie Slaymaker	- Prevent $_ from getting clobbered, esp. in sdbm.pm
#15	4970	Barrie Slaymaker	- Allow sdbm files to handle large keys.
#14	4021	Barrie Slaymaker	- Remove all phashes and all base & fields pragmas - Work around SWASHGET error
#13	3772	Barrie Slaymaker	- VCP::DB_File::sdbm supports lower level access to packed values to allow VCP::DB_File::big_records.pm to subclass it. - VCP::DB_File::sdbm obeys VCPNODELETE
#12	3433	Barrie Slaymaker	- Merge in new VSS code.
#11	3155	Barrie Slaymaker	Convert to logging using VCP::Logger to reduce stdout/err spew. Simplify & speed up debugging quite a bit. Provide more verbose information in logs. Print to STDERR progress reports to keep users from wondering what's going on. Breaks test; halfway through upgrading run3() to an inline function for speed and for VCP specific features.
#10	3080	Barrie Slaymaker	Add some tracing output
#9	2959	John Fetkovich	added dump method to lib/VCP/DB_File/sdbm.pm to dump keys => values from a sdbm file. removed similar code from bin/dump_head_revs, bin/dump_rev_map and bin/dump_main_branch_id and called this method instead. also made parse_files_and_revids_from_head_revs_db sub in TestUtils to use in test suites instead of parse_files_and_revids_from_p4_files et. al.
#8	2870	Barrie Slaymaker	Fix sort, un/packing empty value problems
#7	2865	Barrie Slaymaker	Improve error message when non-ARRAY ref passed as a key.
#6	2807	Barrie Slaymaker	Clean up debugging messages
#5	2802	John Fetkovich	Added a source_repo_id to each revision, and repo_id to each Source and Dest. The repo_ids include repository type (cvs,p4,revml,vss,...) and the repo_server fields. Changed the $self->...->set() and $self->...->get() lines in VCP::Dest::* to pass in a conglomerated key value, by passing in the key as an ARRAY ref. Also various restructuring in VCP::DB.pm, VCP::DB_file.pm and VCP::DB_file::sdbm.pm related to this change.
#4	2786	Barrie Slaymaker	Differentiate RevMapDB (which confesses when an undefined value is returned) from HeadRevsDB (which doesn't).
#3	2785	Barrie Slaymaker	Generate a stack trace when an uknown key is looked up in an sdbm file.
#2	2769	Barrie Slaymaker	Suppress extraneous stale db warning, demote the stale state db detection to RevMapDB, allow the head revs db to be dumped.
#1	2723	Barrie Slaymaker	Finish generalizing DB_File, implement HeadRevsDB