viz_data_gen.rb #3

  • //
  • guest/
  • matt_attaway/
  • vdg/
  • viz_data_gen.rb
  • View
  • Commits
  • Open Download .zip Download (14 KB)
#!/usr/bin/env ruby
#
# Copyright (c) Matthew Attaway, Perforce Software Inc, 2013. All rights reserved
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1.  Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#
# 2.  Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PERFORCE
# SOFTWARE, INC. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR 
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF 
# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
# # DAMAGE.
#
# User contributed content on the Perforce Public Depot is not supported by Perforce, although it may be supported by its author. 
# This applies to all contributions even those submitted by Perforce employees.
#
# = Synopsis
# 
# code_swarm_generator: builds the XML file from a Perforce server to feed into the code_swarm development visualizer
#
# = Usage
# 
# code-swarm_generator -p <port> -u <user> -s <startingChange> -e <endingChange> -d <depotpath> -C <config file> --verbose(-v) --condense(-c) 
#
#Configuration File Syntax
#
#The config file uses the syntax <Keyword>=<Value>
#
#Keywords:
#	DepotPath - a path to look for changes on. There can be any number of DepotPath entries
#	IgnorePath - changes on this path will be excluded from the results. There can be any nnumber of IgnorePath entries.
#	IgnoreUser - changes and jobs from this user will be discarded
#	IgnoreExtension - files that match the extension will be excluded
#	User - the user to connect to the server with
#	Port - the server to connect to
#	Verbose - boolean. Shows generator progress.
#	Condense - collapses all activity of branched files into one file. This is good for cutting down on the amount of files code_swarm has to track.
#	StartingChange - the change to start generating from
#	EndingChange - the change stop generating at
#	CalculateWeight - boolean. Looks for the number of lines changed per file revision and weights the file appropriately.
#	IncludeJobs	- boolean. Adds events for each modified job during the specified span of changes
#	JobModifiedDateField - the name of the field used to track the last modification date of a job
#	JobModifiedByField - the name of the field used to track who made the last change to a job
#	SpecDepotname - the name of the spec depot. Don't include any '/', just list the name

require 'rubygems'
require 'P4'
require 'getoptlong'
require 'rdoc'
require 'time'
require 'iconv' unless String.method_defined?(:encode)

# from http://jeffgardner.org/2011/08/04/rails-string-to-boolean-method/, with modifications
class String
  def to_bool
    return true   if self == true   || self =~ (/(true|t|yes|y|1)$/i)
    return false  if self == false  || self =~ (/(false|f|no|n|0)$/i)
    raise ArgumentError.new("invalid value for Boolean: \"#{self}\"")
  end
end

# translate invalid utf-8 sequences.
# solution from: http://stackoverflow.com/a/8873922/502497
def StripInvalidUTF8( text )

	if String.method_defined?(:encode)
	  return text.encode('UTF-8', 'UTF-8', :invalid => :replace)
	else
	  ic = Iconv.new('UTF-8', 'UTF-8//IGNORE')
	  return ic.iconv(text)
	end
end

#
# suck the data out of the config file
#
def ParseConfigFile( configFile )
	data = Array.new

	File.open( configFile, 'r' ) do |f|
		data = f.readlines
	end
	
	data.each do | l |
		args = l.split( '=' )
		
		case args[0]
			when 'DepotPath'
				$depotPaths.push( args[1].chomp )
			when 'IgnorePath'
				$ignorePaths.push( args[1].chomp )
			when 'IgnoreUser'
				$ignoreUsers[ args[1].chomp ] = 1
			when 'IgnoreExtension'
				$ignoreExtensions[ args[1].chomp ] = 1 
			when 'User'
				$user = args[1].chomp
			when 'Port'
				$port = args[1].chomp
			when 'Verbose'
				$verbose = args[1].to_bool
			when 'Condense'
				$condense = args[1].to_bool
			when 'StartingChange'
				$startingChange = args[1].chomp
			when 'EndingChange'
				$endingChange = args[1].chomp
			when 'CalculateWeights'
				$calculateWeight = args[1].to_bool
			when 'IncludeJobs'
				$includeJobs = args[1].to_bool
			when 'JobModifiedDateField' 
				$jobModifiedDateField = args[1].chomp
			when 'JobModifiedByField'
				$jobModifiedByField = args[1].chomp
			when 'JobNameField'
				$jobNameField = args[1].chomp
			when 'SpecDepotName'
				$specDepotName = args[1].chomp
			when 'Visualizer'
				$visualizer = args[1].chomp
		end
	end
end


#
# get the total number of lines changed for each file
#
def GetLinesChanged( strs )
	fileDict = {}
	file = ""
	
	strs.each do |s|

        str = StripInvalidUTF8( s )
		total = 0
		if( str =~ /^==== / )
			file = str.chomp
			file.slice! 0,6
			file.slice!( file.rindex('#'), file.length )
		end	
		if( str !~ /^add|deleted|changed\s\d+\schunks\s[\s\d\/]+\slines$/ || str =~ /^Change|job|\/\// )
			next
		end

		args = str.split ' '

		total += args[3].to_i
		total += args[8].to_i
		vals = Array.new
		vals.push args[13].to_i
		vals.push args[15].to_i
		total += vals.max
		
		fileDict[file] = total
	end
	
	return fileDict
end

def PrintHeader(output)
	case $visualizer
	    when 'code_swarm'
	        output.puts "<?xml version=\"1.0\"?>\n<file_events>\n"
	end
end

def PrintFooter(output)
    case $visualizer
        when 'code_swarm'
            output.puts "</file_events>\n"
    end 
end

#
# turn a job into code_swarm event
#
def PrintJob( output, job )	

    # no decent way to show jobs with gource; skip 'em
	if( $visualizer == 'gource' )
        return
	end

	file = ""
	date = Time.new
	weight = 1
	author = ""	
	
	job = StripInvalidUTF8( job )

	job.each_line do |j|
		if( j =~ /^#/ )
			next
		end
		
		args = j.split( ":\t" )
		
		case args[0]
			when $jobNameField
				file = args[1].chomp
			when $jobModifiedDateField
				date = Time.parse( args[1] ) #if args[1] != nil
			when $jobModifiedByField
				author = args[1].chomp
		end
	end
	
    file.gsub!("&", "&amp;")
    file.gsub!("<", "&lt;")
    file.gsub!(">", "&gt;")
    file.gsub!("'", "&apos;")
    file.gsub!("\"", "&quot;")
	
	if( $ignoreUsers.has_key?( author ) )
		return
	end
	
	if( author == "\"\"" || file == "\"\"" || file == "new.job" )
		return
	end
	
	output.puts  "<event date=\"" + date.to_i.to_s + "000" + "\" filename=\"" + file + ".job" + "\" author=\"" + author + "\" weight=\"" + weight.to_s + "\"/>\n"

rescue
	return

end

def PrintChange(output, cTime, path, cUser, weight, action)
    case $visualizer
        when 'code_swarm'
            output.puts "<event date=\"" + cTime + "000" + "\" filename=\"" + path.gsub( /&/, '&amp;' ) + "\" author=\"" + cUser + "\" weight=\"" + weight.to_s + "\"/>\n"
        when 'gource'
        	shortAction = 'M'
        	if( action == 'add' )
        		shortAction = 'A'
        	elsif( action == 'delete' )
        		shortAction = 'D'
        	end
            output.puts "#{cTime}|#{cUser}|#{shortAction}|#{path}"    
    end 
end

#
# main
#
begin
	progName = "viz_data_gen"

	$port = ""
	$user = ""
	
	$startingChange = "1"
	$endingChange = 0

	$depotPaths  = Array.new
	$ignorePaths = Array.new
	$ignoreUsers = {}
	$ignoreExtensions = {}

	$verbose = false
	$condense = false
	$calculateWeight = false
	$includeJobs = false
	
	$jobModifiedDateField = "Date"
	$jobModifiedByField   = ""
	$jobNameField         = "Job"
	$specDepotName        = ""

    $visualizer = 'code_swarm'

	configFile = ""

	# get the command line options if any, overriding the defaults
	opts = GetoptLong.new(
		  [ '--help', '-h', GetoptLong::NO_ARGUMENT ],
		  [ '--user', '-u', GetoptLong::REQUIRED_ARGUMENT ],
		  [ '--port', '-p', GetoptLong::REQUIRED_ARGUMENT ],
		  [ '--verbose', '-v', GetoptLong::NO_ARGUMENT ],
		  [ '--condense', '-c', GetoptLong::NO_ARGUMENT ],
		  [ '--startingChange', '-s', GetoptLong::REQUIRED_ARGUMENT ],
		  [ '--endingChange', '-e', GetoptLong::REQUIRED_ARGUMENT ],
		  [ '--depotPath', '-d', GetoptLong::REQUIRED_ARGUMENT ],
		  [ '--configFile', '-C', GetoptLong::REQUIRED_ARGUMENT ],
		  [ '--visualizer', '-V', GetoptLong::REQUIRED_ARGUMENT ]
		)

	optDict = {}
	opts.each{ | opt, arg |
	    optDict[ opt ] = arg
	}

	# parse the config file if any. args provided on the command line will override config file values
	if( optDict.has_key?( "--configFile" ) )
		ParseConfigFile( optDict["--configFile"] )
	end

	optDict.each do |opt, arg|
		  case opt
			  when '--help'
				exit
			  when '--verbose'
				$verbose = true
			  when '--condense'
				$condense = true
			  when '--user'
				$user = arg
			  when '--port'
				$port = arg
			  when '--startingChange'
				$startingChange = arg
			  when '--endingChange'
				$endingChange = arg
			  when '--depotPath'
			    $depotPaths.clear
				$depotPaths.push( arg )
			  when '--visualizer'
			  	$visualizer = arg
		  end
	end

	# make sure we have the settings we need if the user asked to include jobs
	if( $includeJobs && ( $jobModifiedDateField == "" || $jobModifiedByField == "" ) )
		STDERR.puts "JobModifiedDateField and JobModifiedByField must be set to include job changes."
		exit
	end

    if( $visualizer != 'code_swarm' && $visualizer != 'gource')
        STDERR.puts 'Visualizer must be set to either code_swarm or gource'
        exit
    end

    if( $visualizer == 'gource' && !$condense )
    	puts "Condensed history is automatically enabled for Gource."
        $condense = true
    end

    if( $visualizer == 'gource' && $includejobs )
    	puts "Job activity is not supported for Gource. Disabling."
        $includejobs = false
    end

    if( $visualizer == 'gource' && $calculateWeight )
        puts 'Weight calculations are not supported in gource. Disabling.'
        $calculateWeight = false
    end

	# open target file
	filename = 'perforce.xml'

    if( $visualizer == 'gource' )
    	filename = 'perforce.gource'
    end

	output = File.new(filename, "w")

	# print header
	PrintHeader(output)

	# set us up the Perforce
	p4 = P4.new()
	p4.prog = progName

	if( $port != "" )
		p4.port = $port
	end

	if( $user != "" )
		p4.user = $user
	end

	p4.connect()

	# fetch job data
	if( $includeJobs && $visualizer == 'code_swarm')
	
		if( $specDepotName == "" )
			$stderr.puts "Please specify a spec depot in the config file using the SpecDepotName variable" 
			exit
		end
	
		# get bounding dates
		sd = p4.run_describe( $startingChange )
		d = Time.at( sd[0]["time"].to_i )
		startingDate =  d.year.to_s + "/" + d.month.to_s + "/" + d.day.to_s
		
		endingDate = ""
		if( $endingChange != 0 )
			ed = p4.run_describe( $endingChange )
			d = Time.at( ed[0]["time"].to_i )
			endingDate =  d.year.to_s + "/" + d.month.to_s + "/" + d.day.to_s
		else
			endingDate = "now"
		end
	
		js = p4.run_files( "-a", "//" + $specDepotName + "/job/...@" + startingDate + "," + endingDate )

		p4.tagged = false
		js.each do |j|
			if ( $verbose )
				puts "Processing job " + j["depotFile"] + "#" + j["rev"]
			end
		
			p4.exception_level =  P4::RAISE_NONE
			job = p4.run_print( "-q", j["depotFile"] + "#" + j["rev"] )
			
            line = ""
            if ( job.length != 0 )
			    PrintJob( output, job.join( "\n" ) )
		    end
		end
		p4.tagged = true
	end

	# build the changes command, and get the changes
	$depotPaths.each_index do | i |
		$depotPaths[i] += "@>" + $startingChange

		if( $endingChange != 0 )
			$depotPaths[i] += "," + $endingChange
		end
	end

	changeDict = {}

	$depotPaths.each do | dp |
		cs = p4.run_changes( dp )

		cs.each do | c |
			if( !changeDict.has_key?( c["change"].to_i ) && !$ignoreUsers.has_key?(c["user"].chomp) )
				changeDict[c["change"].to_i] = 1
			end
		end
	end

	changes = changeDict.keys.sort

	# delete changes that are in the ignore paths
	$ignorePaths.each do | ip |
		cs = p4.run_changes( ip )
		
		cs.each do | c |
			changes.delete( c["change"].to_i )
		end
	end

	files = {}

	# run through each change to get the pertinent info
	changes.each do | change |
		weightDict = {}
		p4.exception_level =  P4::RAISE_NONE
		result = p4.run_describe( change )
		
		if( $calculateWeight )
			p4.tagged = false;
			diffs = p4.run_describe( "-ds", change )
			weightDict = GetLinesChanged( diffs )
			p4.tagged = true;
		end
	 
		if( result.length == 0 )
			next
		end
			
		cUser = result[0]["user"]
		cTime = result[0]["time"]

		if( result[0]["depotFile"] == nil )
			next
		end

		if ( $verbose )
			print "Processing change " + change.to_s + "...\n"
		end

		for i in 0..result[0]["depotFile"].length-1 do

		  path   = result[0]["depotFile"][i]
		  action = result[0]["action"][i]
		  rev    = result[0]["rev"][i]
		  
		  require 'iconv' unless String.method_defined?(:encode)
		  if String.method_defined?(:encode)
			path.encode!('UTF-16', 'UTF-8', :invalid => :replace, :replace => '')
			path.encode!('UTF-8', 'UTF-16')
		  else
			ic = Iconv.new('UTF-8', 'UTF-8//IGNORE')
			path = ic.iconv(path)
		  end

		  path =~ /.*(\..*)/
		  if( $ignoreExtensions.has_key?( $1 ) )
			next
		  end


		  if ( $condense )

			if ( action == "branch" )
				pathrev = path + '#' + rev
				log = p4.run_filelog( "-m1", pathrev )
				if ( log.length < 1 || log[0].revisions.length < 1 )
						next
				end
				for j in 0..log[0].revisions[0].integrations.length-1 do
					integ = log[0].revisions[0].integrations[j]
					if ( integ.how != "branch from" )
						next
					end
					if ( files.has_key?(integ.file) )
						files[path] = files[integ.file]
						break
					end
					files[path] = integ.file
				end
				next
			end

			if ( action == "integrate" )
				next
			end

			if ( files.has_key?(path) )
				path = files[path]
			end

		  end # condense
					
		  weight = 1
		  if( weightDict.has_key?( result[0]["depotFile"][i] ) )
			weight = weightDict[result[0]["depotFile"][i]]
		  end
					
		  PrintChange(output, cTime, path, cUser, weight, action)
		end

	end
	
	# print footer
	PrintFooter(output)
end
# Change User Description Committed
#3 8566 drakino Converting path between UTF-8 to UTF-16 to scan for and replace any bad UTF byte sequences.
 Resolves possible issue where a bad character in a filename would result in an exception "invalid byte sequence in UTF-8 (ArgumentError)" being thrown.

Suggested fix from http://stackoverflow.com/questions/2982677/ruby-1-9-invalid-byte-sequence-in-utf-8

#review @matt_attaway
#2 8399 Matt Attaway Add support for Gource friendly output

With this change adding 'Visualizer=gource' to the config file
will shockingly enough enable output for the quite excellent
gource version control visualizer. To get a copy of gource visit:
http://code.google.com/p/gource/
#1 8372 Matt Attaway Add tool to generate data for the code_swarm version control visualizer.

This tool generates the necessary data for code_swarm to display the evolution
of your source code. It's unique in that it has options for compressing variants
into one node, excluding paths and file extensions, and including jobs that are
filed and fixed so that your QA people aren't left out of the fun.

All details are in the ruby script. More info on code_swarm can be found at
http://www.michaelogawa.com/code_swarm/

Thanks to @sam_stafford for the variant folding code.