#!/bin/perl # ====================================================================== # oblit_check.pl # # This script parses the output of an obliterate command (done without -y), and does several things: # it determines how many files have more than one lazy copy, and lists them with space statistics # it provides information on how much space would be freed, or used if lazy copy files obliterated or not. # it determines how much space would be freed by other revisions of files with some mult copy revisions # it writes a client spec that can be used to do the actual obliterate, with exclusion lines for files with # more than one lazy copy # # usage: oblit_check.pl file1 # file1 the output of p4 obliterate (no -y) # # Note: Handling of lazy copies in obliterates changes in 2005.1, this script is for releases before that # # $Id: //esi/perforce/scripts/tools/oblit_check.pl#7 $ # $Author: sstevens $ # $Date: 2005/04/12 $ # ====================================================================== # Get system modules. use strict; # this path is needed for the space checks my $Root = "/perforce/1780/root"; # this is the name of the client spec and file that will be created my $client_spec_name = "sstevens_oblit3"; # edit this to your name, it is the owner field of the client spec my $owner = "sstevens"; # list oblit paths here, they are used to determine if lazy copies are undone to areas also to be obliterated # be sure to leave off trailing /'s # for example: my @oblit_paths = ("//releases/test1", "//releases/test2"); my @oblit_paths = (); if($#ARGV < 0) { print "usage: oblit_check.pl file1 file - the output of p4 obliterate (no -y) \n This script will list the files with mulitple references, so pointless to obliterate, in a format for exclusions in a file named after $client_spec_name, and report on space statistics\n"; exit 1; } my $File1 = $ARGV[0]; my %branched; # hash of files mentioned in oblit output file as having lazy copies or being obliterated my %cleared; # files that have no branches, will save space by being obliterated my %spec_line; # files that should go in client spec as exclusions if(!open (FILE1, "< $File1")) { print "$File1: can't open, exiting.\n"; exit 1; } print "Searching $File1 for multiple lazy copy undos...\n"; printf ("%s", `date`); print "Sizes are in bytes\n"; print "Paths to be obliterated are:\n"; foreach my $opath (@oblit_paths) { print "\t$opath\n"; } print "\n"; # put each source file (to be obliterated) into a hash, count times it is hit my $all_count = 0; my $dup_count_total = 0; my @lines = ; close(FILE1); LINE: foreach my $line (@lines) { if($line =~ /^(.*) - copy from \/(.*)$/) { my $dest = $1; my $tsrc = $2; # put src name in #xx format (like obliterate output should do) $tsrc =~ /^(.*) 1.(.*)$/; my $src = $1 . "#" . $2; my $name = $1; # if it is copying to a path to also be purged, don't count as a hit foreach my $path (@oblit_paths) { if($dest =~ /$path/) { next LINE; } } # push dest into array of values for this file's hash key if(!exists($branched{$src})) { $all_count++; } push ( @{$branched{$src}}, $dest); } elsif($line =~ /^\/(.*) - purged/) { my $file = $1; # if it's to be purged but had no lazy copies, put it in the list of files safe to oblit if(!exists($branched{$file})) { $cleared{$file} = 0; $all_count++; } # more than one lazy copy means it will cost space to oblit it # one lazy copy means no space savings, but still oblit it elsif ($#{$branched{$file}} > 0) { $file =~ /(.*)#.*$/; $spec_line{$1} = 1; $dup_count_total++; } } } close(FILE1); my $tot_size = 0; # bytes reclaimed if no files with more than one copy obliterated my $extra_size = 0; # bytes lost if all lazy copies undone my $extra_reclaim_size = 0; # bytes reclaimed by obliterating other revs of files that have a rev which > 1 copy # open up a file to make a client spec from for obliteration, add exclusion lines for multiple link files if(!open (CLIENT_FILE, "> $client_spec_name")) { print "$client_spec_name: can't open, exiting.\n"; exit 1; } print CLIENT_FILE " Client: $client_spec_name Owner: $owner Description: Created by $owner for offline obliterate Root: /user/$owner/workspaces/oblit Options: noallwrite noclobber nocompress unlocked nomodtime normdir LineEnd: local View:\n"; foreach my $Path (@oblit_paths) { $Path =~ /\/(.*)/; printf CLIENT_FILE ("\t\"%s...\" \"/\/%s%s...\"\n", $Path, $client_spec_name, $1); } foreach my $File (keys %spec_line) { printf CLIENT_FILE ("\t\"-\/%s\" \"/\/%s%s\"\n", $File, $client_spec_name, $File); } close(CLIENT_FILE); # sort on greatest number of branches and print foreach my $File (sort { ($#{$branched{$b}}+1) <=> ($#{$branched{$a}}+1) } keys %branched) { # count up the bytes of revs that can safely be obliterated with no space loss; # put together a real filename $File =~ /(.*)#(.*)$/; my $FileSize = 0; my $fpath = $1; my $fver = $2; my $pname = $Root . $fpath . ",d/1." . $fver . ".gz"; if(-e $pname) { $FileSize = (stat($pname))[7]; } elsif ((-e ($pname = $Root . $fpath . ",v")) || (-e ($pname = $Root . $fpath . ",t"))) { $FileSize = (stat($pname))[7]; } elsif (-e ($pname = $Root . $fpath . ",d/1." . $fver)) { $FileSize = (stat($pname))[7]; } # there are other types, but amount of space used by them is insignificant (I think) # no reason to mention files with only one copy, they can be obliterated # but will save no space if ($#{$branched{$File}} > 0) { my $tot_gain = $FileSize * ($#{$branched{$File}}); my $copies = $#{$branched{$File}} + 1; print "$copies copies, $FileSize * $#{$branched{$File}} = $tot_gain. $File\n"; $extra_size += $tot_gain; # print out the where each copy would be made to foreach my $match (@{$branched{$File}}) { print "\t$match\n"; } } } foreach my $File (keys %cleared) { # count up the actual amount of stuff that can safely be obliterated; # put together a real filename $File =~ /(.*)#(.*)$/; my $FileSize = 0; my $fpath = $1; my $fver = $2; my $pname = $Root . $fpath . ",d/1." . $fver . ".gz"; if(-e $pname) { $FileSize = (stat($pname))[7]; } elsif ((-e ($pname = $Root . $fpath . ",v")) || (-e ($pname = $Root . $fpath . ",t"))) { $FileSize = (stat($pname))[7]; } elsif (-e ($pname = $Root . $fpath . ",d/1." . $fver)) { $FileSize = (stat($pname))[7]; } # there are other types, but amount of space used by them is insignificant (I think) # no lazy copies, count up space to be reclaimed # all revs of files with more than one multiple link version will be excluded from total if(!exists($spec_line{$fpath})) { $tot_size += $FileSize; } else { $extra_reclaim_size += $FileSize; } } print "$dup_count_total files have more than one lazy copy, out of $all_count files\n"; printf "$tot_size bytes (%.2f GB) can be cleaned out with no lazy copy space gain, using client $client_spec_name\n", $tot_size/1000000000.0; printf "$extra_size bytes (%.2f GB) would be added as duplicate files, if files with duplicates obliterated.\n", $extra_size/1000000000.0; printf "$extra_reclaim_size bytes (%.2f GB) of other file revs would be deleted, if duplicates obliterated.\n", $extra_reclaim_size/1000000000.0; printf " so, if dups obliterated, net change in storage would be %s (%.2f GB)\n", $tot_size - $extra_size + $extra_reclaim_size, ($tot_size - $extra_size + $extra_reclaim_size)/1000000000.0; print "client spec file with exclusion lines for dup files is $client_spec_name\n"; exit (0);