#!C:/Programs/SDK/Perl64/bin/perl.exe # includeCleaner.pl # - a script to assist in cleaning up c/c++ includes # identifies where an include is in both the .h and .cpp files # as well as cases when a header is double-included in the same file. # © 2014 Gabriel Weiss # gweiss@perforce.com use strict; use File::Find; # Pass in root folder of source tree or put in default path for ease of repeated use. my $startDir = $ARGV[1]; if ($startDir eq "") { $startDir = 'C:\perforce\gweiss_main_stream\src'; } if (!-d $startDir) { print "You need to pass the path to the source root folder as the first argument."; exit 1; } # Gather all our filenames recursively my @fileNames; find( \&gatherFiles, $startDir); my $tmpFile = ""; my $fileName = ""; my $fullPath = ""; my %includes = (); my $fileLine = ""; # This could be used to catch any type of duplicate text. For our purposes I wanted # to simply clean up our includes. The $includeText variable could be changed to # whatever you want to be looking at for cleanup. my $includeText = '#include'; foreach (@fileNames) { $fullPath = $_; $fileName = ""; # Grab just the name of the file sans extension so # we know which filename we should be comparing # Our filenames only contain text, underscores or dashes. If your codebase # includes other characters, this is the regexp that will need adjusting. if ($_ =~ /([\w\-_]*)\./) { $fileName = $1; } else { print "Couldn't match: ".$fullPath."\n"; exit 1; } # If our temp variable equals our matched filename, it means that we have a corresponding # file that we need to test for duplicates. E.g. we just previously parsed through our cpp # file and now we're looking at the matching header. If your files aren't ordered by name # in the same directory, this basically won't work. if ($tmpFile eq $fileName) { open (FILE, "<", $fullPath) or die "Couldn't open the file: ".$_; # Parse through the file and test any include lines while () { $fileLine = $_; if ($fileLine =~ /^$includeText (.*)$/) { # If we hit a match in our includes hash it means it was present in our # previously tested file. if (exists($includes{$1})) { print "header already included - ".$fullPath." ".$1."\n"; } } } close FILE; } # Now that we've tested against the previous file, go ahead and parse through the same # file again, this time looking for duplicates in the current file while re-populating # the includes hash. This could be optimized by doing this at the same time as the # previous parse through, but with how fast this is done even against some of the # monster code files we have, it seemed like for the sake of clarity easier to simply # do it in a second pass against the file. $tmpFile = $fileName; %includes = (); open (FILE, "<", $fullPath) or die "Couldn't open the file: ".$_; while () { $fileLine = $_; if ($fileLine =~ /^$includeText (.*)$/) { # Now our exists test means we've already added the include in our current pass # so it's a duplicate in the same file. if (exists($includes{$1})) { print "duplicate header - ".$fullPath." ".$1."\n"; } else { $includes{$1} = "1"; } } } close FILE; } exit 0; # Tweaked to only apply to .h and .cpp, but could be adjusted to handle any files sub gatherFiles { my $file = $File::Find::name; if ($file =~ /.*\.h|cpp$/) { push @fileNames, $file; } }