#!/bin/bash
#==============================================================================
# Copyright and license info is available in the LICENSE file included with
# the Server Deployment Package (SDP), and also available online:
# https://swarm.workshop.perforce.com/projects/perforce-software-sdp/view/main/LICENSE
#------------------------------------------------------------------------------

#==============================================================================
# Declarations and Environment

# Version ID Block. Relies on +k filetype modifier.
#------------------------------------------------------------------------------
# shellcheck disable=SC2016
declare VersionID='$Id: //p4-sdp/dev_c2s/Unsupported/Maintenance/EvilTwinDetector.sh#2 $ $Change: 31472 $'
declare VersionStream=${VersionID#*//}; VersionStream=${VersionStream#*/}; VersionStream=${VersionStream%%/*};
declare VersionCL=${VersionID##*: }; VersionCL=${VersionCL%% *}
declare Version=${VersionStream}.${VersionCL}
[[ "$VersionStream" == r* ]] || Version="${Version^^}"

export P4U_HOME=${P4U_HOME:-/p4/common/bin}
export P4U_LIB=${P4U_LIB:-/p4/common/lib}
export P4U_ENV=$P4U_LIB/p4u_env.sh
export P4U_LOG=Unset
export VERBOSITY=${VERBOSITY:-3}

# Environment isolation.  For stability and security reasons, prepend
# PATH to include dirs where known-good scripts exist.
# known/tested PATH and, by implication, executables on the PATH.
export PATH=$P4U_HOME:$PATH:~/bin:.

[[ -r "$P4U_ENV" ]] || {
   echo -e "\nError: Cannot load environment from: $P4U_ENV\n\n"
   exit 1
}

declare BASH_LIBS=$P4U_ENV
BASH_LIBS+=" $P4U_LIB/libcore.sh"
BASH_LIBS+=" $P4U_LIB/libp4u.sh"

for bash_lib in $BASH_LIBS; do
   source $bash_lib ||\
      { echo -e "\nFATAL: Failed to load bash lib [$bash_lib]. Aborting.\n"; exit 1; }
done

declare -i SilentMode=0
declare ReportOnly=1
declare WorkingDir="/offline"
declare WorkspaceStorage=
export VERBOSITY=3

#==============================================================================
# Local Functions

#------------------------------------------------------------------------------
# Function: find_evil_twins_between_streams
# Find evil twins between a stream and its parent.
function find_evil_twins_between_streams ()
{
   declare stream=${1:-Unset}
   declare parent=
   declare integCmd=
   declare changelist=
   declare clientName=
   declare clientRoot=
   declare -i count

   parent=$(p4 -ztag -F %Parent% stream -o $stream)

   [[ $stream == Unset ]] && bail "find_evil_twins_between_streams(): BAD USAGE."
   msg "${H2}\nSearching for evil twins between [$stream] and its parent [$parent]:"

   clientName="tmp.auto.etd.$P4USER.${stream//\//_}.TO.${parent//\//_}"
   clientRoot="$WorkspaceStorage/${clientName#tmp.auto.}"
   export P4CLIENT="$clientName"

   echo -e "Client: $P4CLIENT\n\nOwner: $P4USER\n\nDescription:\n\tUsed by $THISSCRIPT\n\nRoot: $clientRoot\n\nStream: $parent\n\n" > $TmpFile

   msg "Using generated temporary workspace $P4CLIENT."
   p4 -s client -i < $TmpFile || \
   bail "Failed to create/update temp client using this spec:\n$(cat $TmpFile)\n"

   if [[ ! -d "$clientRoot" ]]; then
      run "mkdir -p $clientRoot" ||\
         bail "Failed to create workspace root dir."
   fi

   cd "$clientRoot" ||\
      bail "Failed to cd to $clientRoot."

   vmsg "Operating in: $PWD"

   echo -e ".p4ignore*\n.p4config*\nEvilTwin.notes" >> .p4ignore
   echo -e "P4CLIENT=$clientName\nP4USER=$P4USER\nP4IGNORE=.p4ignore\n" > .p4config
   export P4CONFIG=.p4config

   run "p4 opened -m 1" \
      "Checking for opened files in workspace $P4CLIENT." 0 0 \
      "not opened on this client" ||\
      bail "Failed to verify that no files are opened in workspace $P4CLIENT."

   vmsg "Verified: No files opened in workspace $P4CLIENT."

   run "p4 -s client -f -s -S $parent" \
      "Switching workspace to target/parent stream [$parent]."

   run "p4 -s sync -q" \
      "Syncing workspace."

   integCmd="p4 -s integ -Ro -n -S $stream"

   msg "Checking for evil twins with: $integCmd | grep 'without -i flag'"
   echo -e "$integCmd | grep \'without -i flag\'" >> EvilTwin.notes
   $integCmd | grep 'without -i flag' > $TmpFile 2>&1

   count=0

   if [[ -s "$TmpFile" ]]; then
      count=$(wc -l $TmpFile | cut -d ' ' -f 1)
      msg "Found $count evil twins."
      if [[ $VERBOSITY -gt 4 ]]; then
         msg "Contents of $TmpFile:"
         cat $TmpFile
      fi
      EvilTwinCount=$((EvilTwinCount+count))
   fi

   rm -f $TmpFile2

   while read integData; do
      targetFile=${integData%% - *}
      targetFile=${targetFile#info: }
      sourceFile=${integData##*t integrate from }
      sourceFile=${sourceFile% without -i flag}
      echo "${sourceFile}__TO__${targetFile}" >> $TmpFile2
   done < $TmpFile

   if [[ $VERBOSITY -gt 4 ]]; then
      msg "Contents of $TmpFile2:"
      cat $TmpFile2
   fi

   if [[ ! -s "$TmpFile2" ]]; then
      msg "No evil twins detected between $stream and $parent."
      return 0
   fi

   integCmd="p4 -s integ -i"

   # If we're fixing rather than merely reporting, generate a pending changelist number.
   if [[ $ReportOnly -eq 0 ]]; then
      changelist=$(echo -e "Change: new\n\nClient: $P4CLIENT\n\nUser: $P4USER\n\nStatus: new\n\nDescription:\n\tFixing evil twins between $stream and $parent.\n\n" | p4 change -i | awk '{print $2}')
      if [[ $changelist =~ ^[0-9]+$ ]]; then
         integCmd+=" -c $changelist"
      else
         bail "Failed to generate a pending changelist."
      fi
   fi

   [[ $ReportOnly -eq 1 ]] && integCmd+=" -n"

   msg "Fixing evil twins between $stream and $parent."
   while read integPaths; do
      sourceFile=${integPaths%%__TO__*}
      targetFile=${integPaths##*__TO__}
      echo $integCmd "$sourceFile" "$targetFile"
      echo "$integCmd \"$sourceFile\" \"$targetFile\"" >> EvilTwin.notes
      $integCmd "$sourceFile" "$targetFile" ||\
         bail "This integ command failed: $integCmd $integPaths $sourceFile $targetFile"
   done < $TmpFile2

   if [[ $ReportOnly -eq 0 ]]; then
      run "p4 -s resolve -ay" "Resolving accepting target." ||\
         "Resolve failed!"

      run "p4 -s submit -c $changelist" "Submitting change $changelist." ||\
         bail "Failed to submit change $changelist."
   fi
   
   return 0
}

#------------------------------------------------------------------------------
# Function: terminate
function terminate
{
   # Disable signal trapping.
   trap - EXIT SIGINT SIGTERM

   # Don't litter.
   cleanTrash

   vvmsg "$THISSCRIPT: EXITCODE: $OverallReturnStatus"

   # Stop logging.
   [[ "${P4U_LOG}" == off ]] || stoplog

   # With the trap removed, exit.
   exit $OverallReturnStatus
}

#------------------------------------------------------------------------------
# Function: usage (required function)
#
# Input:
# $1 - style, either -h (for short form) or -man (for man-page like format).
#------------------------------------------------------------------------------
function usage
{
   declare style=${1:--h}

   # tag::includeManual[]
   echo "USAGE for $THISSCRIPT version $Version:

$THISSCRIPT [-i <instance>] { -d <stream_depot> | -s //source/stream } [-w <work_dir>] [-f] [-L <log>] [-si] [-v<n>] [-D]

or

$THISSCRIPT [-h|-man|-V]
"
   if [[ $style == -man ]]; then
      echo -e "
DESCRIPTION:
	Detect and optionally fix \"evil twins\" in a given stream depot.

	The term \"evil twin\" has origins in the ClearCase version control
	system, the first to have truly sophisticated branching and merging
	capability.  With this sophistication came complexity, and an evil
	twin is one such complexity.  An evil twin can occur in any
	version control system with sophisticated branching and merging
	(Perforce, Git, ClearCase, AccuRev, etc.).

	In Perforce, an evil twin occurs when two files with the same name
	relative to the root of a stream are created in both streams with
	'p4 add', rather than the preferred workflow of creating src/foo.c
	in one stream and branching it into another.  When a file is created
	with 'p4 add' twice rather than the preferred 'add then branch' flow,
	there is no integration history connecting the files.  This creates
	a problem when merging across streams, as their is no 'base' or
	common ancestor that is needed to calculate merge results.

	In ClearCase, this was a bad situation indeed - one such file needed
	to be designated as the \"evil\" twin, and that file and its history
	would need to be obliterated ('rmelem' in ClearCase parlance). In
	Perforce, the situation is not nearly so bad.  Rather than designating
	one of the twins as evil, we simply do a 'baseless integration' and
	establish a family history, after which point the two twins can have
	a happy family reunion.

	Note that many ClearClase sites deployed an Evil Twin prevention
	trigger that could prevent evil twins from occurring in the first
	place, and deploying such a trigger is a best practice in
	ClearCase environments.  There is a similar trigger for Perforce,
	but it is not commonly deployed, since evil twins aren't nearly
	as problematic in Perforce.  They don't occur often.

	At a high level:
	* Evil twin detection means doing an integrate preview, and checking
	for the \"can't integrate without -i flag\" warning.  The -i flag is
	for baseless integrations, essentially evil twins.
	* Evil twin correction means doing an integrate with the -i flag on
	the individual files, and then doing a 'p4 resolve -ay' (accept
	\"yours,\" i.e. accept target), and submitting. That creates an
	integration record, visible as a merge arrow in P4V.
 
	This solution using 'p4 resolve -ay' is the best choice for mass
	detection and resolution of evil twins.  The assumption here is that
	the contents are already correct, and you only want to draw merge
	arrows and avoid changing content in the processing.

	For evil twins that occur organically in natural development, e.g.
	due to developer error or miscoordination among developers working
	in different streams, you might choose to resolve with -ay or -at
	(accept \"theirs,\" i.e. accept source).  Natural evil twins are
	uncommon and occur typically only for individual files.  (And if they
	are common, that could be a sign of a larger process/communication
	issue among teams).

	=== WARNING WARNING WARING ===
	This script may require a large amount of temporary storage, as it
	creates a large number of stream workspaces and does a sync in them.
	See the '-w' flag.

OPTIONS:
 -i <instance>
	Specify the SDP instance name.

	This is required unless the SDP environment has previously been loaded
	in the current shell, i.e. by sourcing the $P4CBIN/p4_vars
	file and specifying the instance, e.g. :

	source $P4CBIN 1

 -d <stream_sdepot>
	Specify a stream depot to process, e.g. -d fgs to process the //fgs stream depot.

	This argument is required unless '-s //source/stream' is specified,
	in which case the stream depot is inferred from the stream path.

 -s //source/stream
	Specify a particular source stream to search.  The specified stream
	and its parent will be checked for evil twins.

 -w <work_dir>
	Specify a working directory, used for temporary workspace storage,
	etc. The default is \"/scratch\".

	=== WARNING WARNING WARING ===
	This script may require a large amount of temporary storage, as it
	creates a large number of stream workspaces and does a sync in them.

	This flag controls the scratch storage area used by this script.

	The workspace storage directory will be in an \"etd\" directory
	below the specified <work_dir>  So by default, workspace root
	directories will appear one directory level below \"/scratch/etd\".
	This script will attempt to create the \"etd\" directory under
	specified <work_dir> if it does not already exist, and abort
	if it cannot be created.


 -f	Fix evil twins with forced integrates doing baseless merges and
	resolving with '-ay', effectively drawing merge arrows to establish
	a branching relationship, but not affecting target file content.

	By default, a report shows the commands that would be run, but they
	are not executed.

 -v<n>	Set verbosity 1-5 (-v1 = quiet, -v5 = highest).

 -L <log>
	Specify the path to a log file, or the special value 'off' to disable
	logging.  All output (stdout and stderr) are captured in the log.

	NOTE: This script is self-logging.  That is, output displayed on the screen
	is simultaneously captured in the log file.  Do not run this script with
	redirection operators like '> log' or '2>&1', and do not use 'tee.'

-si	Operate silently.  All output (stdout and stderr) is redirected to the log
	only; no output appears on the terminal.  This cannot be used with
	'-L off'.
      
 -D     Set extreme debugging verbosity.

HELP OPTIONS:
 -h	Display short help message
 -man	Display man-style help message
 -V	Display version info for this script and its libraries.

EXAMPLES:
	First, setup environment:
	cd $P4CBIN
	source ./p4_vars 1

	Then run as per the following examples, suitable for a long-running		script.

	Example 1: Search for all Evil Twins in the fgs depot:
	nohup ./$THISSCRIPT -i 1 -w /big/scratch -d fgs < /dev/null > /tmp/etd.log 2>&1 &
	tailf \$(ls -t \$LOGS/${THISSCRIPT%.sh}.*.log|head -1)

	Example 2: Find and fix all Evil Twins in the fgs depot:
	nohup ./$THISSCRIPT -i 1 -w /big/scratch -d fgs -f < /dev/null > /tmp/etd.log 2>&1 &
	tailf \$(ls -t \$LOGS/${THISSCRIPT%.sh}.*.log|head -1)

	Example 3: Find and fix all Evil twins between //fgs/dev and its parent.
	nohup ./$THISSCRIPT -i 1 -w /big/scratch -s //fgs/dev < /dev/null > /tmp/etd.log 2>&1 &
	tailf \$(ls -t \$LOGS/${THISSCRIPT%.sh}.*.log|head -1)
"
   # end::includeManual[]
   fi

   exit 1
}

#==============================================================================
# Command Line Processing

declare StreamListFile=/tmp/tmp.StreamList.$$.$RANDOM
declare TmpFile=/tmp/tmp.EvilTwins.$$.$RANDOM
declare TmpFile2=/tmp/tmp.EvilTwins2.$$.$RANDOM
declare StreamDepot=Unset
declare StreamData=Unset
declare SourceStream=Unset
declare DepotTypeCheck=Unset
declare -i PathwayCount=0
declare -i EvilTwinCount=0
GARBAGE+=" $StreamListFile $TmpFile $TmpFile2"

declare -i shiftArgs=0

set +u
while [[ $# -gt 0 ]]; do
   case $1 in
      (-d) StreamDepot=$2; shiftArgs=1;;
      (-w) WorkingDir=$2; shiftArgs=1;;
      (-s) SourceStream=$2; shiftArgs=1;;
      (-f) ReportOnly=0;;
      (-h) usage -h;;
      (-man) usage -man;;
      (-V) show_versions; exit 1;;
      (-v1) export VERBOSITY=1;;
      (-v2) export VERBOSITY=2;;
      (-v3) export VERBOSITY=3;;
      (-v4) export VERBOSITY=4;;
      (-v5) export VERBOSITY=5;;
      (-i) export SDP_INSTANCE=$2; shiftArgs=1;;
      (-L) export P4U_LOG=$2; shiftArgs=1;;
      (-si) SilentMode=1;;
      (-n) export NO_OP=1;;
      (-D) set -x;; # Debug; use 'set -x' mode.
      (*) usageError "Unknown arg ($1).";;
   esac

   # Shift (modify $#) the appropriate number of times.
   shift; while [[ $shiftArgs -gt 0 ]]; do
      [[ $# -eq 0 ]] && usageError "Bad usage."
      shiftArgs=$shiftArgs-1
      shift
   done
done
set -u

#==============================================================================
# Command Line Verification

[[ $SilentMode -eq 1 && $P4U_LOG == off ]] && \
   usageError "Cannot use '-si' with '-L off'."

if [[ "$SourceStream" != Unset ]]; then
   [[ "$SourceStream" != "//"* ]] && \
      usageError "The source stream specified with '-s' must be of the form //depot/stream."

   # Determine stream depot from source stream if it wasn't provided with '-d'.
   if [[ $StreamDepot == Unset ]]; then
      StreamDepot=${SourceStream#//}
      StreamDepot=${StreamDepot%%/*}
   fi
fi

[[ $StreamDepot == Unset ]] && \
   usageError "The '-d <stream_depot>' parameter is required."

export SDP_INSTANCE=${SDP_INSTANCE:-Unset}
[[ $SDP_INSTANCE == Unset ]] && \
   usageError "The SDP environment must be loaded, or the '-i <instance>' parameter provided."

# Load and then tweak SDP environment.
source p4_vars "$SDP_INSTANCE"
export P4ENVIRO=/dev/null/.p4enviro
unset P4CONFIG

[[ $P4U_LOG == Unset ]] && \
   export P4U_LOG="${LOGS}/${THISSCRIPT%.sh}.$(date +'%Y%m%d-%H%M').log"

#==============================================================================
# Main Program

trap terminate EXIT SIGINT SIGTERM

declare -i OverallReturnStatus=0

if [[ "${P4U_LOG}" != off ]]; then
   touch ${P4U_LOG} || bail "Couldn't touch log file [${P4U_LOG}]."

   # Redirect stdout and stderr to a log file.
   if [[ $SilentMode -eq 0 ]]; then
      exec > >(tee ${P4U_LOG})
      exec 2>&1
   else
      exec >${P4U_LOG}
      exec 2>&1
   fi

   initlog
fi

msg "$THISSCRIPT version $Version started at $(date)."

# If depot was specified as '//x', normalize to 'x'.
[[ "$StreamDepot" == "//"* ]] && StreamDepot=${StreamDepot#//}

DepotTypeCheck=$(p4 -ztag -F %Type% depot -o $StreamDepot 2>/dev/null)

if [[ -n "$DepotTypeCheck" ]]; then
   [[ "$DepotTypeCheck" != "stream" ]] && \
      bail "The depot specified by '-d' must be of type 'stream', not $DepotTypeCheck."
else
   bail "Could not determine depot type for depot $StreamDepot. Aborting."
fi

WorkspaceStorage="$WorkingDir/etd"

# Avoid issues with dirs with spaces.
if [[ "$WorkspaceStorage" != *" "* ]]; then
   vmsg "Verified: The working dir [$WorkingDir] does not contain spaces."
else
   bail "The '-w <work_dir>' specified cannot contain spaces."
fi

if [[ ! -d "$WorkspaceStorage" ]]; then
   run "/bin/mkdir -p $WorkspaceStorage" ||\
      bail "Could not create dir: $WorkspaceStorage"
fi

if [[ $SourceStream == Unset ]]; then

   msg "Getting list of streams in //$StreamDepot."
   p4 -ztag -F "%Type%:%Stream%:%Parent%" streams "//$StreamDepot/*" > $StreamListFile

   # Process all non-virtual, non-mainline, non-task streams.
   while read StreamData; do
      [[ $StreamData == "mainline:"* || $StreamData == "virtual:"* || $StreamData == "task:"* ]] && continue

      SourceStream=${StreamData#*:}
      SourceStream=${SourceStream%:*}

      find_evil_twins_between_streams "$SourceStream" ||\
         OverallReturnStatus=1

      PathwayCount=$((PathwayCount+1))
   done < $StreamListFile
else

   find_evil_twins_between_streams "$SourceStream" ||\
      OverallReturnStatus=1

   PathwayCount=1
fi

if [[ $OverallReturnStatus -eq 0 ]]; then
   msg "${H}\nAll processing completed successfully.\n"
else
   msg "${H}\nProcessing completed, but with errors.  Scan above output carefully.\n" 
fi

# Illustrate using $SECONDS to display runtime of a script.
msg "${H2}Summary:\n\tFound $EvilTwinCount evil twins searching $PathwayCount pathway(s). Note that this may contain duplicates across multiple streams.\n"

msg "That took $(($SECONDS/3600)) hours $(($SECONDS%3600/60)) minutes $(($SECONDS%60)) seconds.\n"

# See the terminate() function, which is really where this script exits.
exit $OverallReturnStatus
