p4gf_fast_push.py #2

#! /usr/bin/env python3.3
"""Quickly translate Git commits to Perforce changes
using P4D 15.1's new `p4 unzip` bulk importer.
"""
import binascii
import bisect
from   collections                  import defaultdict, deque, namedtuple
import copy
import datetime
from   functools                    import lru_cache
import hashlib
import json
import logging
import os
from   pickle                       import Pickler, Unpickler
from   pprint                       import pformat
import types

import p4gf_branch
import p4gf_branch_id
from   p4gf_case_conflict_checker   import CaseConflictChecker
import p4gf_config
import p4gf_const
from   p4gf_copy_to_p4              import G2P
import p4gf_create_p4
import p4gf_depot_branch
from   p4gf_desc_info               import DescInfo
import p4gf_eta
from   p4gf_fast_push_librarian     import LibrarianStore, lbr_rev_str
from   p4gf_fast_push_rev_history   import RevHistoryStore
from   p4gf_fastexport              import FastExport
from   p4gf_filemode                import FileModeStr, FileModeInt
from   p4gf_g2p_user                import G2PUser
from   p4gf_gfunzip                 import GFUnzip
import p4gf_git
import p4gf_gitmirror
from   p4gf_hex_str                 import md5_str
from   p4gf_l10n                    import _, NTR
from   p4gf_new_depot_branch        import NDBCollection
from   p4gf_object_type             import ObjectType
import p4gf_p4dbschema
import p4gf_p4filetype
import p4gf_p4key                   as     P4Key
from   p4gf_p4typemap               import P4TypeMap
import p4gf_path
from   p4gf_preflight_checker       import PreflightChecker, PreflightException
import p4gf_progress_reporter       as     ProgressReporter
import p4gf_pygit2
from   p4gf_receive_hook            import PreReceiveTupleLists
import p4gf_squee_value
from   p4gf_time_space_recorder     import TimeSpaceRecorder
import p4gf_time_zone
from   p4gf_usermap                 import UserMap
import p4gf_util
import p4gf_version_3

LOG = logging.getLogger('p4gf_fast_push')

MD5_NONE = "0"*32       # Value for deleted revisions

                        # For less typing in LOG format calls
_ab  = p4gf_util.abbrev
_fms = FileModeStr.from_int


class FastPush:
    """Holder of common knowledge used during conversion from Git to
    Perforce.

    Expected call sequence:

    from_pre_receive()  Factory returns a skeletal instance if
                        current push can be fast-pushed.

    pre_receive()       Run preflight check for push acceptability,
                        AND also create giant 'p4 unzip' archive that
                        we'll eventually send to Perforce,
                        AND store temp files and pickled version of
                        ourself to local filesystem.

    from_post_receive() Inflate from pickled version.

    post_receive()      Transmit the giant 'p4 unzip' archive to Perforce.
                        Use 'p4 unzip' response to learn correct changelist
                        numbers, apply to gitmirror data
                        Build gitmirror 'p4 unzip' archive,
                        transmit to Perforce.
                        Bulk-set ObjectType 'p4 keys'
                        Bulk-update changelist descriptions to replace
                        ":123" gfmarks with actual changelist numbers,
                        set "push-state: complete" for branch heads.
    """
    # pylint:disable=too-many-instance-attributes
    #
    # Yeah, this is pretty excessive. Especially since it's a two-phase class
    # where several members are only populated/used in pre-receive, several
    # others only populated/used in post-receive.

    def __init__( self, *
                , ctx
                , prl
                , ndb
                ):
        self.ctx  = ctx
        self._ndb = ndb
        self._prl = prl

                        # branch_id to PopulateCommit or None.
                        #
                        # PopulateCommit records the first commit on each
                        # branch during _assign_changelist_numbers() for use
                        # later when building changelists.
                        #
                        # None records that this is a root/orphan branch,
                        # no populate required.
        self._branch_id_to_popcom = {}
                        # Same values as _branch_id_to_popcom, but indexed by
                        # the gfmark of the changelist that the PopulateCommit
                        # precedes.
        self._gfmark_to_popcom    = {}

                        # branch/commit/changelist assignments
                        #
                        # Memory is expected to be on the order of what
                        # the branch Assigner consumes, so this should
                        # fit in memory as long as we drain the Assigner
                        # at the same rate that we grow this store.
        self._othistory = ObjectTypeHistory()

                        # A partial G2P instance that knows how to do
                        # Git/Perforce user lookup and other things
                        # that we'd rather reuse than copy-and-paste.
                        #
                        # Use sparingly.
        self._g2p       = None

                        # FastExport instance, set in _git_fast_export().
        self._gfe       = None

                        # git-fast-export marks file.
        self._gfe_mark_to_sha1 = {}

                        # The next changelist mark number to use.
                        # Incremented and returned (as string ":123")
                        # in _next_gfmark()
        self._next_change_num = 1

                        # Our BigStore of millions of librarian records,
                        # for blobs. Addressed by sha1. This is how we
                        # de-duplicate file revision blobs.
                        #
                        # Exists only during pre-receive.
        self._lbr_store     = None # LibrarianStore()

                        # Our BigStore of millions of ls-tree sha1s.
                        # We only need to know "yeah, I already added it",
                        # not a full librarian record. So this only needs
                        # a set() API.
                        #
                        # Exists only during pre-receive.
        self._tree_store    = None # TreeStore()

                        # Storage type for all our librarian files.
                        # S_GZ: Store as a *,d/ directory of individual
                        #       gzipped file revisions
        self._lbr_filetype_storage = p4gf_p4dbschema.FileType.S_GZ

                        # Our BigStore of revision history for every single
                        # depot file we create as a translated copy of a
                        # Git file.
                        # This is how we get the next revision number for
                        # each new revision, and also how we calculate
                        # #startRev,endRev ranges for integ sources.
                        #
                        # Exists only during pre-receive.
        self._rev_store     = None # RevHistoryStore()

                        # Our head commit/changelist for each branch.
                        # Just need commit sha1 and change_num, but since we
                        # have a full ObjectType handy, we'll use that.
        self._branch_id_to_curr_head_ot = {}

                        # Which date to use for each changelist?
        self._changelist_date_source = ctx.repo_config.get(
                          p4gf_config.SECTION_GIT_TO_PERFORCE
                        , p4gf_config.KEY_CHANGELIST_DATE_SOURCE
                        , fallback=p4gf_config.VALUE_DATE_SOURCE_P4_SUBMIT
                        ).lower()

                        # A persistent root that we delete only once we're
                        # done. Directory and its contents must survive
                        # pre- and post-receive hook time and into bgpush
                        # time, where we actually 'p4 unzip' these contents
                        # into the server.
        self._persistent_dir = _calc_persistent_dir(ctx.config.repo_name)
        p4gf_util.ensure_dir(self._persistent_dir)

                        # Our blobs-and-trees zip archive.
        self._bat_gfunzip               = None
        self._bat_gfunzip_abspath       = None

                        # Our translated changelists zip archive.
        self._commit_gfunzip              = None
                        # Count of currently open commit_gfunzip's written
                        # db.rev records. To avoid consumig too much Perforce
                        # server memory, Call _rotate_commit_gfunzip() when
                        # this gets too high.
        self._commit_gfunzip_rev_ct       = 0  # of current open gfunzip's revs
                        # gfmark of first db.change in current commit_gfunzip
        self._commit_gfunzip_begin_gfmark = ":1"
                        # List of closed gfunzip archives:
                        # dict{ abspath      : <path>
                        #     , begin_gfmark : ":123" }
        self._commit_gfunzip_list         = []

                        # Our GitMirror zip archive.
        self._gitmirror_gfunzip         = None
        self._gitmirror_gfunzip_abspath = None

                        # Our DescInfo zip archive.
                        # Created/filled/sent entirely in post-receive.
        self._desc_info_gfunzip         = None
        self._desc_info_gfunzip_abspath = None

                        # File that pre-receive gradually fills with
                        # DescInfo struct for each changelist we write, and
                        # which post-receive reads when updating gfmarks to
                        # their final submitted changelist numbers.
        self._desc_info_fp              = None
        self._desc_info_abspath         = None

                        # After 'p4 unzip'ing our commit_gfunzip, what integer
                        # offset did Perforce apply to our gfmark numbers to
                        # produce submitted changelist numbers?
        self._gfmark_offsets            = GFMarkOffsets()

                        # Where do all our branch heads go when the push
                        # completes successfully?
        self._branch_head_otl           = None

                        # PreflightChecker and G2PUser instances used
                        # during pre_receive() to both reject the unworthy
                        # and assign changelist owner.
                        #
                        # Left None during post_receive().
        self._preflight_checker         = None
        self._g2p_user                  = None
        self._case_conflict_checker     = None

                        # 'p4 typemap' filetype assigner P4TypeMap instance.
                        # Used during pre_receive, None during post_receive.
        self._p4typemap                 = None

                        # Instrumentation.
                        # How much time and space does each
                        # translated file revision consume?
        self._rev_timer = TimeSpaceRecorder(
                  report_period_event_ct = 1000
                , log = LOG.getChild("memory.rev")
                , fmt = "   {curr_event_ct:>15,} db.rev total"
                        "   {diff_time:5.2f} seconds"
                        "   {events_per_second:>7,} db.rev/second"
                        "   {events_per_mb:>10,} db.rev/mb"
                        "   {bytes_per_event:>10,} bytes/ct"
                        "   {memory_mb:7.2f} MB total"
                        "   {diff_kb:>15,} KB new"
                        "   {diff_aux_ct:>15,} copied_bytes"
                        "   {aux_per_second:>10,} copied_bytes/second"
                )

        self._eta       = None

                        # How many raw uncompressed bytes of blob data have
                        # we extracted from Git and written to the bat_zip
        self._byte_ct = 0

                        # How many db.revs can we write to a single 'p4 unzip'
                        # commit_gfunzip payload before it's time to rotate to
                        # a new one? 'p4 unzip' does all its work in Perforce
                        # server memory, including some big index structures.
                        # Seems to be about 30K per file revision, mostly due
                        # to our really long depot_paths.
        self._max_rev_per_commit_gfunzip = 10**3

                        # Debugging limit.
                        # How many commits to translate before terminating?
                        # Useful when testing a subset of a huge repo.
        self._max_translate_commit_ct    = 0 # 10000

        LOG.debug("__init__")

    @staticmethod
    def from_pre_receive( *
                        , ctx
                        , prl
                        , gsreview_coll
                        , ndb
                        ):
        """Factory.
        If this push will be a fast push, return a FastPush instance.
        If not, return None.
        """
        if FastPush.can_fast_push(ctx, gsreview_coll):
            return FastPush( ctx = ctx
                           , prl = prl
                           , ndb = ndb
                           )
        # else:
        #     LOG.error("No fast push for you!")
        return None

    @staticmethod
    def from_post_receive(ctx):
        """Factory.
        Inflate from the pickle (not JSON) file that pre_receive() wrote."""
        return FastPush._from_pickled(ctx)

    def pre_receive(self):
        """Perform all the work that we do while the Git client is still
        connected and able to disconnect or terminate us.
        Since we can be terminated, it is unsafe to send changelists
        to Perforce here. Do that in post_receive().

        Run 'git-fast-export', convert results to a zip archive
        containing everything Perforce needs to submit all the translated
        commits (just the translated commits and their files, not any
        gitmirror or other Git Fusion metadata).

        Raise a PreflightException or other error if we cannot translate
        something.

        Create local temp files to transfer knowledge to post_receive().
        """
        LOG.debug("pre_receive() enter")
        try:
            start_dt = datetime.datetime.now()
            self._delete_fast_push_files()
            p4gf_util.ensure_dir(self._persistent_dir)
            self._create_big_stores()
            assigner = self._assign_branches()
            self._create_g2p()
            self._finish_branch_instantiations()
            self._git_fast_export()
            self._assign_changelist_numbers(assigner)
            self._open_bat_gfunzip()
            self._open_commit_gfunzip()
            self._open_desc_info_file(for_write=True)
            self._create_preflight_checker()
            self._create_p4typemap()
            self._translate_commits()
            self._write_dbi_files()
            self._close_desc_info_file()
            self._close_commit_gfunzip()
            self._close_bat_gfunzip()
            self._write_pickle()
            self._log_pre_receive_summary(start_dt)
        except Exception: # pylint: disable=broad-except
            self._delete_fast_push_files()
            LOG.debug("pre_receive() exit with exception")
            raise
        LOG.debug("pre_receive() exit")

    def _log_pre_receive_summary(self, start_dt):
        """Write a one-line summary to log at INFO level about what we did
        and how long it took.

        Intended to match p4gf_copy_to_git.py's G2P.copy() summary line.
        """
        now_dt = datetime.datetime.now()
        duration_seconds = (now_dt - start_dt).total_seconds()
        LOG.info('Done. Changelists: {}  File Revisions: {}  Seconds: {}'
                 .format( len(self._othistory)
                        , self._rev_timer.event_ct
                        , int(duration_seconds)))

    def post_receive(self):
        """
        Perform work that we do while the Git client is no longer
        connected and thus unable to disconnect or terminate us.

        Send the `p4 unzip` archive to Perforce and wait for a
        receipt once Perforce finishes importing it.

        Send gitmirror data of Git commit and tree objects.

        Bulk-update changelist descriptions to replace ":123" gfmark
        placeholders with actual changelist numbers, and convert
        branch heads to "push-state: complete".

        Release atomic push and repo locks.
        """
        LOG.debug("post_receive() enter")
        try:
            self._send_bat_gfunzip()
            self._send_commit_gfunzips()
            self._open_gitmirror_gfunzip()
            self._fill_gitmirror_gfunzip()
            self._close_gitmirror_gfunzip()
            self._send_gitmirror_gfunzip()
            self._fill_branch_head_otl()
            self._submit_config_files()
            self._open_desc_info_gfunzip()
            self._fill_desc_info_gfunzip()
            self._close_desc_info_gfunzip()
            self._send_desc_info_gfunzip()
            self._set_p4keys()
            self._delete_fast_push_files()
        except Exception: # pylint: disable=broad-except
            LOG.exception("FastPush.post_receive() failed.")
            raise
        LOG.debug("post_receive() exit")

    def prl(self):
        """Return a p4gf_receive_hook.PreReceiveTupleLists instance that
        describes the original pre-receive tuples that Git sent to our stdin.
        """
        return self._prl

    def ndb_coll(self):
        """Return the p4gf_new_depot_branch.NDBCollection of newly defined
        branches, based on Git branch ref names and configuration settings.
        """
        return self._ndb

    def _write_pickle(self):
        """Write a pickle file to transmit ourself from pre-receive time
        to post-receive time.

        Modified from p4gf_pre_receive_hook.write_packet().
        """
        d = dict()
        config1 = self.ctx.repo_config.repo_config
        config2 = self.ctx.repo_config.repo_config2
        d['config'] = p4gf_config.to_dict(config1)
        if config2 is not None:
            d['config2'] = p4gf_config.to_dict(config2)
        d['prl'] = self._prl.to_dict()
        d['branch_dict'] = p4gf_branch.to_dict(self.ctx.branch_dict())
        if self._ndb:
            d['ndb'] = self._ndb.to_dict()
        d["othistory"] = self._othistory.to_dict_list()
        d["bat_gfunzip_abspath"] = self._bat_gfunzip_abspath
        d["commit_gfunzip_list"] = self._commit_gfunzip_list
        d["desc_info_abspath"]   = self._desc_info_abspath

        try:
            file_abspath = _pickle_file_abspath(self.ctx.config.repo_name)
            p4gf_util.ensure_parent_dir(file_abspath)
            with open(file_abspath, "wb") as f:
                pickler = Pickler(f)
                pickler.dump(d)
            LOG.info("Fast Push state written: {}".format(file_abspath))
        except TypeError as exc:
            LOG.error(_("Cannot serialize push data: {}")
                      .format(pformat(d)))
            raise RuntimeError(_("Cannot serialize push data.")) from exc

    @staticmethod
    def _from_pickled(ctx):
        """Create and fill in a new FastPush instance from a _write_pickle()
        file.

        Modified from p4gf_post_receive_hook.read_packet()
        """
        file_abspath = _pickle_file_abspath(ctx.config.repo_name)
        if not os.path.exists(file_abspath):
            return None
        with open(file_abspath, "rb") as f:
            unpickler = Unpickler(f)
            d = unpickler.load()
        LOG.info("Fast Push state read   : {}".format(file_abspath))
        #noisy!
        #LOG.debug3("Fast Push state:\n{}".format(pformat(d)))

        # read the pre-receive tuples
        prl = PreReceiveTupleLists.from_dict(d.pop('prl'))
        # read the branch dictionary (likely modified by assigner in preflight)
        branch_dict = p4gf_branch.from_dict(d.pop('branch_dict'), ctx.config.p4client)
        ctx.reset_branch_dict(branch_dict)
        # read the configuration data
        config = p4gf_config.from_dict(d.pop('config'))
        config2 = None
        if 'config2' in d:
            config2 = p4gf_config.from_dict(d.pop('config2'))
        ctx.repo_config.set_repo_config(config, None)
        ctx.repo_config.set_repo_config2(config2, None)
        ndb = None
        if 'ndb' in d:
            ndb = NDBCollection.from_dict(ctx, d.pop('ndb'))
        fp = FastPush( ctx = ctx
                     , prl = prl
                     , ndb = ndb
                     )
                        #pylint:disable=protected-access
        fp._othistory = ObjectTypeHistory.from_dict_list(d["othistory"])
        fp._bat_gfunzip_abspath = d["bat_gfunzip_abspath"]
        fp._commit_gfunzip_list = d["commit_gfunzip_list"]
        fp._desc_info_abspath   = d["desc_info_abspath"]
        return fp

    def _delete_fast_push_files(self):
        """Attempt to delete the views/{repo}/fast_push/ directory.

        Okay if this fails: we'll be leaving behind a potentially large
        directory, but that's just wasteful, not fatal.
        """
        try:
                        # Be very careful with 'rm -rf'. If path doesn't look
                        # right, die rather than delete.
            dir_abs_path = os.path.abspath(self._persistent_dir)
            if not os.path.exists(dir_abs_path):
                LOG.debug2("_delete_fast_push_files() nothing to delete {}"
                           .format(dir_abs_path))
                return
            LOG.debug("_delete_fast_push_files() {}".format(dir_abs_path))
            if (   "/views/" not in dir_abs_path
                or not dir_abs_path.endswith("/fast_push") ):
                LOG.error("Fast Push directory {} does not look"
                          " like something that Git Fusion can"
                          " safely delete."
                          .format(dir_abs_path))
                return
                        # Immediately remove it from our way
            LOG.debug("Fast Push cleanup: delete directory {}"
                      .format(self._persistent_dir))
            p4gf_util.rm_dir_contents(self._persistent_dir)
        except Exception:  # pylint: disable=broad-except
            LOG.exception("Fast Push directory %s could not be deleted.", self._persistent_dir)

    @staticmethod
    def can_fast_push(ctx, gsreview_coll):
        """This code is optimized for a very specific, yet common, case.
        Return True if this is such a case.
        """
        if gsreview_coll:
            LOG.debug2("can_fast_push() 0 gsreview_coll")
        elif not _is_configured(ctx):
            LOG.debug2("can_fast_push() 0 not _is_configured()")
        elif not ctx.server_is_case_sensitive:
            LOG.debug2("can_fast_push() 0 server not case sensitive")
        elif not _server_supports_unzip(ctx):
            LOG.debug2("can_fast_push() 0 not _server_supports_unzip()")
        elif not _is_repo_view_empty(ctx):
            LOG.debug2("can_fast_push() 0 not _is_repo_view_empty()")
        elif ctx.is_lfs_enabled:
            LOG.debug2("can_fast_push() 0 ctx.is_lfs_enabled")
        elif ctx.find_copy_rename_enabled:
            LOG.debug2("can_fast_push() 0 ctx.find_copy_rename_enabled")
        else:
            LOG.debug("can_fast_push() 1")
            return True
        return False

    def _create_big_stores(self):
        """Create our disk-based storage for pre-receive calculations."""
        default = p4gf_config.VALUE_FAST_PUSH_WORKING_STORAGE_SQLITE_MULTIPLE_TABLES
        store_how = self.ctx.repo_config.get(
                          p4gf_config.SECTION_GIT_TO_PERFORCE
                        , p4gf_config.KEY_FAST_PUSH_WORKING_STORAGE
                        , fallback = default)

        LOG.debug("store_how = {}".format(store_how))
        expected = [ p4gf_config.VALUE_FAST_PUSH_WORKING_STORAGE_DICT
                   , p4gf_config.VALUE_FAST_PUSH_WORKING_STORAGE_SQLITE_MEMORY
                   , p4gf_config.VALUE_FAST_PUSH_WORKING_STORAGE_SQLITE_SINGLE_TABLE
                   , p4gf_config.VALUE_FAST_PUSH_WORKING_STORAGE_SQLITE_MULTIPLE_TABLES
                   ]
        if store_how not in expected:
            LOG.warning("{key} {value} not in {expected}, using {default}"
                    .format(
                          key      = p4gf_config.KEY_FAST_PUSH_WORKING_STORAGE
                        , value    = store_how
                        , expected = expected
                        , default  = default))
            store_how = default

                        # even for the linux kernel, these fit in memory.
                        #  200MB LibrarianStore.db
                        #   91MB TreeStore.db
                        #
        store_how_smaller = p4gf_config.VALUE_FAST_PUSH_WORKING_STORAGE_DICT

        d = self._persistent_dir # for less typing
        self._lbr_store  = LibrarianStore(
                                os.path.join(d, "LibrarianStore.db")
                              , store_how_smaller)
        self._tree_store = TreeStore(
                                os.path.join(d, "TreeStore.db")
                              , store_how_smaller)
        self._rev_store  = RevHistoryStore(
                                self.ctx
                              , os.path.join(d, "RevHistoryStore.db")
                              , store_how)

    def _assign_branches(self):
        """Create our branch Assigner and let it do its thing."""
        assigner = p4gf_branch_id.Assigner(
              branch_dict      = self.ctx.branch_dict()
            , pre_receive_list = self._prl.set_heads
                        # Context not required if we're not extending an
                        # existing repo's set of branches. But also used
                        # for uuid-sequential, so supply one anyways to make
                        # testing easier.
            , ctx=self.ctx
            )
                        # First push has no previous branch sha1
                        # to connect up to. Don't bother looking
                        # for those previous branch locations.
        assigner.connect_to_previous_branch_sha1 = False

                        # Do not waste time flattening Assigner internal
                        # elements to save memory. We're going to destroy
                        # those elements almost immediately in
                        # _assign_changelist_numbers().
        assigner.flatten_memory = False

                        # Do not waste time scanning for existing branch
                        # assignments: there won't be any since this is the
                        # first push of new data, and all those 'p4 files -e
                        # <ObjectType commit path> can be expensive for huge
                        # repos.
        assigner.assign_previous = False

                        # Switch Assigner to tunneling mode before
                        # assigning.
        # tunnel_max_ct is tunable, seems to work best in range 10-50,
        # Results from apps-Calendar:
        #
        # tunnel  lw branch  changelist   P4ROOT    _translate_commits()
        # max_ct  ct         ct           MB        seconds
        # ------  ---------  -----------  --------  --------
        #   0     908         7125        1000      101
        #   1     660         7142         835      101
        #   5     373         7840         608       81
        #  10     238         8817         494       80
        #  50     109        11703         431       81
        # 100     102        12000         444       89
        # 500      92        18424         521      101
        assigner.tunnel_max_ct = 10
        assigner.tunnel_assign = True

        assigner.assign()
        return assigner

    def _assign_changelist_numbers(self, assigner):
        """Assign ":123" gfmark strings to every changelist we will create.

        DESTRUCTIVE READ of branch Assigner, draining its .assign_dict
        as it converts each commit/branch association into an ObjectType
        commit/branch/changelist association. We need that memory back.
        """

                        # Why sequence by sorted gfe_mark instead of
                        # Assigner rev-list order?
                        #
                        # Required: changelist numbers must be assigned in the
                        # same order as git-fast-export commits.
                        # _translate_commit() requires all parent commits to
                        # already have changelist and file rev numbers to act
                        # as integ sources.
        sorted_gfe_marks = sorted( self._gfe_mark_to_sha1.keys()
                                 , key=int)
        for gfe_mark in sorted_gfe_marks:
            sha1 = self._gfe_mark_to_sha1[gfe_mark]
            branch_assignment = assigner.assign_dict.pop(sha1)
            for branch_id in branch_assignment.branch_id_list():
                change_gfmark = self._assign_ghost(
                                       ghost_precedes_sha1 = sha1
                                     , branch_id           = branch_id
                                     , branch_assignment   = branch_assignment)
                if not change_gfmark:
                    change_gfmark = self._next_gfmark()

                        # Remember this changelist so we can use it
                        # as a parent of later commit/changelists.
                ot = ObjectType.create_commit(
                          sha1       = sha1
                        , repo_name  = self.ctx.config.repo_name
                        # SURPRISE: change_num here is a gf-mark ":123",
                        # not a real changelist number
                        , change_num = change_gfmark
                        , branch_id  = branch_id
                        )
                self._othistory.add(ot)

                        # Sever the child<->parent links within the Assigner
                        # nodes so that their memory becomes available while.
                        # we iterate, rather than only at the end when we drop
                        # the very last node.
            branch_assignment.release_links()

                        # Because our OTHistory replaces/impersonates a branch
                        # Assigner for PreflightChecker, copy this bool to
                        # OTHistory so that we can reject unworthy attempts to
                        # push anonymous branches when configured to reject
                        # anonymouse branches.
                        # This has nothing to do with OTHistory. We're just
                        # not going to complicate PreflightChecker's API
                        # with Yet Another API for this one bool. Sorry.
        self._othistory.have_anonymous_branches \
                = assigner.have_anonymous_branches

    def _assign_ghost( self, *
                     , ghost_precedes_sha1
                     , branch_id
                     , branch_assignment ):
        """If this the first commit on a branch, then we usually need
        to insert a ghost changelist to populate the branch with a copy
        of this commit's first-parent.

        Allocate a PopulateCommit instance to eventually hold that ghost
        changelist.

        Return the gfmark allocated to the non-ghost that follows this ghost.

        If no ghost required, return None.
        """
                        # Branch already has at least one commit?
                        # Then it's already populated.
        if branch_id in self._branch_id_to_popcom:
            return None
                        # Is this a root/orphan commit? Then no
                        # populate required.
        if not branch_assignment.parents:
            self._branch_id_to_popcom[branch_id] = None
            return None

        ghost_of_sha1 = branch_assignment.parents[0]

                        # Re-repo works better if it can dereference a
                        # ghost changelist to find the original copied-to-Git
                        # non-ghost changelist that we're ghosting.
        ghost_of_otl  = self._othistory.sha1_to_otl(ghost_of_sha1)
        if ghost_of_otl:
            ghost_of_change_num = ghost_of_otl[0].change_num
            parent_branch_id    = ghost_of_otl[0].branch_id
        else:
            ghost_of_change_num = None
            parent_branch_id    = None

        ghost_gfmark          = self._next_gfmark()
        ghost_precedes_gfmark = self._next_gfmark()
        pc = PopulateCommit(
                         branch_id             = branch_id
                       , ghost_of_sha1         = ghost_of_sha1
                       , ghost_of_change_num   = ghost_of_change_num
                       , ghost_of_branch_id    = parent_branch_id
                       , ghost_gfmark          = ghost_gfmark
                       , ghost_precedes_gfmark = ghost_precedes_gfmark
                       , ghost_precedes_sha1   = ghost_precedes_sha1
                       )
        assert pc.ghost_precedes_gfmark not in self._gfmark_to_popcom
        self._branch_id_to_popcom[branch_id]  = pc
        self._gfmark_to_popcom[ghost_precedes_gfmark] = pc
        return ghost_precedes_gfmark

    def _create_g2p(self):
        """Instantiate the G2P kitchen sink class that we use for a desc_info,
        but not for the entire translation.
        """
        assert not self._g2p
        g2p = G2P( ctx            = self.ctx
                 , assigner       = None
                 , gsreview_coll  = None
                 )
                        # Stub out some functions that
                        # G2P.change_desc_info() calls, but which we choose
                        # not to permit because they drag in too many
                        # assumptions about already-submitted changelists, or
                        # cross-branch ancestry. Just the kind of time-
                        # consuming calculation we're trying to avoid.
        def return_none(_self):
            """ NOP replacement. """
            return None

        def commit_sha1_to_otl(_self, sha1):
            """Replacement for G2P.commit_sha1_to_otl()

            Use our own FakeObjectTypeHistory() instead of live
            //.git-fusion/objects/.... or G2P.marks.
            """
                        # SURPRISE: parameter _self is a G2P instance,
                        # which knows nothing of our fake OT history.
                        # Use our own FastPush instance "self",
                        # bound when this _create_g2p() runs.
            return self._othistory.sha1_to_otl(sha1)

                        # Monkey patch/Duck punch instance (not class) method
                        # recipe from
                        #   http://stackoverflow.com/questions/962962
                        #pylint: disable=protected-access
        g2p.commit_sha1_to_otl = types.MethodType(commit_sha1_to_otl, g2p)
        g2p._parent_branch     = types.MethodType(return_none,        g2p)

                        # We don't (yet) have any fast-export mark/sha1 dict.
                        # We'll re-poke this into G2P later, after we
                        # run git-fast-export. For now, have emptiness.
        g2p.fast_export_marks = _FPMarks(gfe_mark_to_sha1 = dict())

        self._g2p = g2p

    def _create_preflight_checker(self):
        """Create the PreflightChecker instance that we use
        during pre_receive().
        """
        usermap = UserMap( self.ctx.p4gf
                         , self.ctx.email_case_sensitivity )
        self._g2p_user = G2PUser( ctx     = self.ctx
                                , usermap = usermap )
        self._preflight_checker = PreflightChecker(
              ctx                           = self.ctx
            , g2p_user                      = self._g2p_user
            , assigner                      = self._othistory # SURPISE!
            , gsreview_coll                 = None
            , already_copied_commit_runner  = None
            , finish_branch_definition      = None
            )
        self._preflight_checker.fast_export_marks = self._g2p.fast_export_marks
                        # Suppress per-commit 'p4 client -i' view swap.
        self._preflight_checker.set_client_on_branch_switch = False

                        # Not a PreflightChecker data member, but passed to
                        # PreflightChecker.check_commit_for_branch().
                        # Might as well create it here along with other
                        # PreflightChecker requirements.
        if not self.ctx.server_is_case_sensitive:
            self._case_conflict_checker = CaseConflictChecker(self.ctx)

    def _create_p4typemap(self):
        """Instantiate the 'p4 typemap' filetype assigner for pre_receive()."""
        self._p4typemap = P4TypeMap(self.ctx)

    def _finish_branch_instantiations(self):
        """Assigner fills in branch_dict, but with skeletal Branch instances
        and no DepotBranchInfo instances. Flesh 'em out.
        """
        assert self._g2p
        for branch in p4gf_branch.ordered(self.ctx.branch_dict().values()):
            self._finish_branch_instantiation(branch)

    def _finish_branch_instantiation(self, branch):
        """Assigner creates skeletal branches with no view mapping or
        depot branch. Fill 'em in.

        Skip dbi parent calculation: all our branches are fully populated
        with no inheritance, no parent, no fully populated basis.
        """
        dbi = branch.depot_branch
        if isinstance(branch.depot_branch, str):
                        # DBID assigned, but no depot branch instantiated yet.
            dbi = p4gf_depot_branch.new_definition(
                      dbid      = branch.depot_branch
                    , repo_name = self.ctx.config.repo_name
                    , p4        = self.ctx.p4gf )
        elif dbi is None and not branch.view_p4map:
                        # No DBID or view assigned yet, just a branch ID.
            dbi = p4gf_depot_branch.new_definition(
                      repo_name = self.ctx.config.repo_name
                    , p4        = self.ctx.p4gf )
        if dbi:
            self.ctx.depot_branch_info_index().add(dbi)
            branch.depot_branch = dbi

        if not branch.view_p4map:
                        # No view mapping assigned yet.
                        # Create a mapping by rerooting "master" to dbi.
            self._g2p.define_branch_view( branch       = branch
                                        , depot_branch = dbi
                                        , parent_otl   = [])
            branch.set_rhs_client(self.ctx.p4.client)

    def _next_gfmark(self):
        """Increment and return a changelist mark number ":1234"."""
        r = ":{}".format(self._next_change_num)
        self._next_change_num += 1
        return r

    def _git_fast_export(self):
        """
        Run git-fast-export to a temp file.

        Does NOT parse the file: that occurs commit-by-commit when
        you iterate over gfe.parse_next_command(), which streams
        commit dicts and avoids building a giant list.

        Memory: FastExport loads the entire output into memory, and keeps
        it there. About 1GB for the linux kernel.
        """
        self._gfe = FastExport(
                  ctx             = self.ctx
                , last_old_commit = None
                , last_new_commit = [prt.new_sha1 for prt in self._prl.sets()]
                )
        self._gfe.run(parse_now = False)
        self._gfe_mark_to_sha1 = self._gfe.marks

                        # Poke the new mark dict into our hacked/patched G2P
                        # so that it will see the newly fast-exported marks.
        self._g2p.fast_export_marks = _FPMarks(self._gfe_mark_to_sha1)

    def _open_bat_gfunzip(self):
        """Create a new object to receive blobs and trees as
        we encounter them later when we translate commits.
        """
        self._bat_gfunzip = self._create_gfunzip("bat_gfunzip")

    def _open_commit_gfunzip(self):
        """Create a new object to receive changelists as
        we encounter them later when we translate commits into changelists.
        """
        dir_index = 1 + len(self._commit_gfunzip_list)
        dir_name = "commit_gfunzip.{:03d}".format(dir_index)
        self._commit_gfunzip = self._create_gfunzip(dir_name)
        self._commit_gfunzip_rev_ct = 0
        self._rev_store.clear_all_contextdata_written()

    def _open_gitmirror_gfunzip(self):
        """Create a new object to receive ObjectType copies
        of Git commits, depot branch-info files, and p4gf_config(2)
        files.
        """
        self._gitmirror_gfunzip = self._create_gfunzip("gitmirror_gfunzip")

    def _open_desc_info_gfunzip(self):
        """Create a new object to receive db.change and db.desc
        updates of already-submitted changelists.
        """
        self._desc_info_gfunzip = self._create_gfunzip("desc_info_gfunzip")

    def _create_gfunzip(self, dir_name):
        """Create a new XxxxZip object to receive data that we'll
        send to Perforce in a 'p4 unzip' command later.
        """
        obj = GFUnzip( self.ctx
                     , os.path.join(self._persistent_dir, dir_name)
                     )
        obj.open()
        return obj

    def _open_desc_info_file(self, for_write = True):
        """Open a JSON file through which we'll send our DescInfo from
        pre-receive to post-receive.
        """
        self._desc_info_abspath = os.path.join( self._persistent_dir
                                              , "desc_info.dat" )
        if for_write:
            flags = "w"
        else:
            flags = "r"
        self._desc_info_fp = open( self._desc_info_abspath
                                 , flags
                                 , encoding = "utf-8" )

    def _close_desc_info_file(self):
        """Close the DescInfo file that pre-receive writes and
        post-receive reads.
        """
        if self._desc_info_fp:
            self._desc_info_fp.close()
            self._desc_info_fp = None

    def _close_bat_gfunzip(self):
        """Flush the blobs-and-trees journal files, copy to zip, close
        the zip.
        """
        self._bat_gfunzip.close()
        self._bat_gfunzip_abspath = self._bat_gfunzip.p4unzip.zip.abspath

    def _close_commit_gfunzip(self):
        """Flush the translated-commits journal files, copy to zip, close
        the zip.
        """
        self._commit_gfunzip.close()
        d = { "abspath"      : self._commit_gfunzip.p4unzip.zip.abspath
            , "begin_gfmark" : self._commit_gfunzip_begin_gfmark
            }
        self._commit_gfunzip_list.append(d)

    def _close_gitmirror_gfunzip(self):
        """Flush the GitMirror journal files, copy to zip, close
        the zip.
        """
        self._gitmirror_gfunzip.close()
        self._gitmirror_gfunzip_abspath \
                = self._gitmirror_gfunzip.p4unzip.zip.abspath

    def _close_desc_info_gfunzip(self):
        """Flush the db.change/db.desc journal file, copy to zip, close
        the zip.
        """
        self._desc_info_gfunzip.close()
        self._desc_info_gfunzip_abspath \
                = self._desc_info_gfunzip.p4unzip.zip.abspath

    def _reset_p4_connection(self):
        """Help P4D defend against memory leaks: reset our connection to
        p4 after every 'p4 unzip' request.

        P4D leaves a lot of memory around after 'p4 unzip', all
        tied to the connection that ran the unzip. Resetting the connection
        frees up that memory on the P4D server.
        """
        p4gf_create_p4.p4_disconnect(self.ctx.p4)
        p4gf_create_p4.p4_connect(self.ctx.p4)

    def _send_bat_gfunzip(self):
        """Network transfer the blobs-and-trees zip archive to Perforce,
        import it into Perforce.
        """
        LOG.debug("_send_bat_gfunzip() {}".format(self._bat_gfunzip_abspath))
        if not os.path.exists(self._bat_gfunzip_abspath):
            raise RuntimeError(_("Blobs-and-trees zip file not found: {}")
                               .format(self._bat_gfunzip_abspath))
        cmd = [ "unzip"
              , "-A"            # include (a)rchived revisions
              , "-f"            # (f)orce: clobber existing revisions
              , "-I"            # skip (i)nteg records (we don't send any)

                                # deep-undoc flag to tell P4D to leave our
                                # lbrRev "1.1" values unchanged. Allows us
                                # to know a blob's full lbr record without
                                # having to run a HUGE O(n) revisions query
                                #   p4 files //.gf/objects/blobs/...
                                #
              , "--retain-lbr-revisions"

              , "--transfer"    # deep-undoc network transfer flag.
              , "-i", self._bat_gfunzip_abspath
              ]
        LOG.debug3("_send_bat_gfunzip() p4 {}".format(" ".join(cmd)))
        r = self.ctx.p4run(*cmd)
        LOG.debug3(pformat(r))
        self._reset_p4_connection()

    def _send_commit_gfunzips(self):
        """Network transfer the translated commits zip archives to Perforce,
        import them into Perforce.
        """
        for d in self._commit_gfunzip_list:
            self._send_commit_gfunzip(
                  commit_gfunzip_abspath = d["abspath"]
                , first_pushed_gfmark    = d["begin_gfmark"]
                )

    def _send_commit_gfunzip(self
            , commit_gfunzip_abspath
            , first_pushed_gfmark
            ):
        """Network transfer a single commits zip archive to Perforce,
        import it into Perforce.
        """
        LOG.debug("_send_commit_gfunzip() {}"
                  .format(commit_gfunzip_abspath))
        if not os.path.exists(commit_gfunzip_abspath):
            raise RuntimeError(_("Commit zip file not found: {path}")
                               .format(path=commit_gfunzip_abspath))
        cmd = [ "unzip"
              , "--transfer"    # deep-undoc network transfer flag.
              , "-i", commit_gfunzip_abspath
              ]
        LOG.debug3("_send_commit_gfunzip() p4 {}".format(" ".join(cmd)))
        r = self.ctx.p4run(*cmd)
        first_pushed_change_num \
            = p4gf_util.first_value_for_key(r, "firstPushedChange")
        last_pushed_change_num \
            = p4gf_util.first_value_for_key(r, "lastPushedChange")
        LOG.debug("_send_commit_gfunzip()"
                  " first_gfmark={}"
                  " firstPushedChange={}"
                  " lastPushedChange={}"
                  .format( first_pushed_gfmark
                         , first_pushed_change_num
                         , last_pushed_change_num))
        self._gfmark_offsets.append(
                  gfmark               = first_pushed_gfmark
                , submitted_change_num = first_pushed_change_num )
        self._reset_p4_connection()

    def _send_gitmirror_gfunzip(self):
        """Network transfer the blobs-and-trees zip archive to Perforce,
        import it into Perforce.
        """
        LOG.debug("_send_gitmirror_gfunzip() {}"
                    .format(self._gitmirror_gfunzip_abspath))
        if not os.path.exists(self._gitmirror_gfunzip_abspath):
            raise RuntimeError(_("GitMirror zip file not found: {}")
                               .format(self._gitmirror_gfunzip_abspath))
        cmd = [ "unzip"
              , "-A"            # include (a)rchived revisions
              , "-f"            # (f)orce: clobber existing revisions
              , "-I"            # skip (i)nteg records (we don't send any)

                                # deep-undoc flag to tell P4D to leave our
                                # lbrRev "1.1" values unchanged. Allows us
                                # to know a blob's full lbr record without
                                # having to run a HUGE O(n) revisions query
                                #   p4 files //.gf/objects/blobs/...
                                #

              , "--transfer"    # deep-undoc network transfer flag.
              , "-i", self._gitmirror_gfunzip_abspath
              ]
        LOG.debug3("_send_gitmirror_gfunzip() p4 {}".format(" ".join(cmd)))
        r = self.ctx.p4run(*cmd)
        LOG.debug3(pformat(r))
        self._reset_p4_connection()

    def _send_desc_info_gfunzip(self):
        """Network transfer the bulk-update 'change -f' zip archive to
        Perforce, import it into Perforce.
        """
        LOG.debug("_send_desc_info_gfunzip() {}"
                  .format(self._desc_info_gfunzip_abspath))
        if not os.path.exists(self._desc_info_gfunzip_abspath):
            raise RuntimeError(_("Changelist desc_info zip file not found: {path}")
                               .format(path=self._desc_info_gfunzip_abspath))
        cmd = [ "unzip"
              , "-f"            # (f)orce: clobber existing revisions
              , "-I"            # skip (i)nteg records (we don't send any)
              , "-R"            # skip (r)ev records (we don't send any)

              , "--transfer"    # deep-undoc network transfer flag.
              , "-i", self._desc_info_gfunzip_abspath
              ]
        LOG.debug3("_send_desc_info_gfunzip() p4 {}".format(" ".join(cmd)))
        r = self.ctx.p4run(*cmd)
        LOG.debug3(pformat(r))
        self._reset_p4_connection()

    def _translate_commits(self):
        """Iterate through all git-fast-export commits, converting to
        Perforce changelists.
        """
        changelist_ct = self._othistory.ct()
        self._eta = p4gf_eta.ETA(total_ct = changelist_ct)
        with ProgressReporter.Determinate(changelist_ct):
            LOG.debug("Total changelists: {:,d}".format(changelist_ct))
            for gfe_commit in self._gfe.parse_next_command():
                self._translate_commit(gfe_commit)

                            # Debugging: early termination for huge repos.
                if self._max_translate_commit_ct:
                    self._max_translate_commit_ct -= 1
                    if self._max_translate_commit_ct <= 0:
                        break

                        # Raise all "accumulate then report at the end"
                        # errors.
                        #
                        # Could move this copypasta into PreflightChecker
                        # along with the rest of CaseConflictChecker
                        # creation/feeding/management.
                        #
        if self._case_conflict_checker:
            cc_text = self._case_conflict_checker.conflict_text()
            if cc_text:
                raise PreflightException(cc_text)


    def _translate_commit(self, gfe_commit):
        """Convert one git-fast-export commit dict to
        one Perforce changelist for each branch to which it
        is assigned.
        """
        sha1 = gfe_commit['sha1']
        otl  = self._othistory.sha1_to_otl(sha1)
        self._g2p_user.get_author_pusher_owner(gfe_commit)
        self._preflight_checker.check_commit(gfe_commit)

        for ot in otl:
            self._preflight_checker.check_commit_for_branch(
                      commit                = gfe_commit
                    , branch_id             = ot.branch_id
                    , any_locked_files      = False
                    , case_conflict_checker = self._case_conflict_checker
                    )
            self._translate_commit_ot(gfe_commit, ot)

        self._copy_trees_to_bat(sha1)

    def _translate_commit_ot(self, gfe_commit, ot):
        """Convert one git-fast-export commit dict to
        one Perforce changelist on the branch specified in ot.
        """
        branch_id = ot.branch_id
        branch    = self.ctx.branch_dict()[branch_id]

        if LOG.isEnabledFor(logging.DEBUG):
            self._eta.increment()
            ProgressReporter.increment(
                    _("Converting changelists...   eta {et} {ed}")
                    .format( et = self._eta.eta_str()
                           , ed = self._eta.eta_delta_str()))
            LOG.debug("_translate_commit_ot() eta={et} {ed}   ot={ot}"
                      .format( ot = ot
                             , et = self._eta.eta_str()
                             , ed = self._eta.eta_delta_str()))

                        # First commit/changelist on a new depot branch?
                        # If, during changelist assignment, we also assigned
                        # a PopulateCommit to precede this changelist on this
                        # branch, then now's the time to populate.
        popcom = self._gfmark_to_popcom.get(ot.change_num)
        if popcom:
            self._populate_branch(popcom, branch)

                        # Before starting this commit/changelist, if the
                        # commit_gfunzip file is getting full, close it
                        # and open a fresh one.
                        #
                        # Do this AFTER any popcom/ghost changelist, just to
                        # simplify our code: unconditionally record
                        # ot.change_num as the first changelist written to
                        # the new gfunzip.
        gfe_files = gfe_commit["files"]
        if not self._commit_gfunzip_can_accept(len(gfe_files)):
            self._rotate_commit_gfunzip(next_gfmark = ot.change_num)
        self._commit_gfunzip_rev_ct += len(gfe_files)

                        # If the commit that git-fast-export used as the
                        # "before" half of its diff is not what this Perforce
                        # branch currently holds, replace git-fast-export's
                        # useless diff with one we generate against the current
                        # branch contents.
        if not self._branch_head_is_gfe_first_parent(gfe_commit, ot):
            gfe_files = self._replace_gfe_with_gdt(gfe_commit, ot)

                        # Mark submodule file delete actions with "gitlink" tag
                        # so that _translate_rev() can skip over them: they are
                        # not tracked as files in Perforce.
                        #
        gitlinks = self._mark_submodule_deletes(gfe_files, ot)
                        #
                        # Stuff that list of gitlinks list into gfe_commit for
                        # transfer to DescInfo. Yeah it's a bit of a hack, but
                        # it's either that or widen a LOT of API to get this
                        # gitlink list through to G2P.change_desc_info().
                        #
                        # Set/clear this each time through: we reuse the same
                        # gfe_commit object for multiple branches, don't want
                        # to repeat some other branch's submodule list.
        if gitlinks:
            gfe_commit["gitlinks"] = gitlinks
        else:
            gfe_commit.pop("gitlinks", None)

                        # Start a new changelist as a db.change record. Can't
                        # write it until after processing file revisions: need
                        # the greatest common depot directory from the file
                        # revisions for the db.change.root field.
        di = self._to_desc_info(gfe_commit, branch)
        change_utc_dt = self._calc_changelist_utc_dt(di)
        self._record_desc_info( gfmark        = ot.change_num
                              , desc_info     = di
                              , change_utc_dt = change_utc_dt
                              , owner         = gfe_commit["owner"]
                              )
        self._write_change_records( gfmark        = ot.change_num
                                  , di            = di
                                  , change_utc_dt = change_utc_dt
                                  )

        integ_src_otl = self._create_integ_src_otl(di, ot.branch_id)

                        # Extract the commit's file revisions from Git and
                        # store directly into the zip archive. Reuse already-
                        # copied revisions as lazy copies. Record newly copied
                        # revisions so we can reuse them later.
        translated_rev_ct = self._translate_revs(
              ot            = ot
            , branch        = branch
            , change_utc_dt = change_utc_dt
            , gfe_files     = gfe_files
            , integ_src_otl = integ_src_otl
            )
        if not translated_rev_ct:
            self._create_empty_placeholder_dbrev(ot, branch, change_utc_dt)

                        # Remove any submodule delete markers we  stored in
                        # gfe_file dicts, in case this gfe_files list is reused
                        # on a second or later Perforce depot branch, where the
                        # diff-against-head might no longer be a submodule
                        # delete.
        if gitlinks:
            self._clear_submodule_deletes(gfe_files)

        self._set_curr_head_ot(ot)

    def _replace_gfe_with_gdt(self, gfe_commit, ot):
        """Convert one commit dict to a single Perforce changelist, running our
        own git-diff-tree to calculate which actions to record in Perforce.
        """
        LOG.debug("_replace_gfe_with_gdt {}".format(ot))

                        # What do we currently have in the Perforce depot
                        # branch? Can be empty tree if this is first commit
                        # to a root/orphan branch.
        curr_head_ot = self._curr_head_ot(ot.branch_id)
        curr_head_sha1 = p4gf_const.EMPTY_TREE_SHA1
        if curr_head_ot:
            curr_head_sha1 = curr_head_ot.sha1

                        # Run git-diff-tree, parse results into a dict
                        # that looks like what FastExport produces, so that
                        # we can funnel that through the same code.
        gdt_files = []
        for gdt in p4gf_git.git_diff_tree(
                  old_sha1              = curr_head_sha1
                , new_sha1              = gfe_commit["sha1"]
                , find_copy_rename_args = None
                ):
            gfe_file = { "action" : gdt.action }

            if gdt.action in ["A", "M", "T"]:
                gfe_file = { "action"    : gdt.action
                           , "mode"      : gdt.new_mode
                           , "sha1"      : gdt.new_sha1
                           , "path"      : gdt.gwt_path
                           }
            elif gdt.action == "D":
                gfe_file = { "action"    : gdt.action
                           , "path"      : gdt.gwt_path
                           }
            elif gdt.action in ["R", "C"]:
                gfe_file = { "action"    : gdt.action
                           , "mode"      : gdt.new_mode
                           , "sha1"      : gdt.new_sha1
                           , "path"      : gdt.gwt_path
                           , "from_path" : gdt.from_path
                           }
            else:
                raise RuntimeError("Unexpected git-diff-tree action '{}'"
                                   .format(gdt.action))
            gdt_files.append(gfe_file)

        return gdt_files

    def _branch_head_is_gfe_first_parent(self, gfe_commit, ot):
        """Is the current head changelist on ot.branch_id the same commit
        sha1 as what git-fast-export used as first-parent for gfe_commit?

        If not, then git-fast-export's list of diffs are inapplicable to
        the current branch head, a new set must be generated.
        """
                        # Which branch did git-fast-export use for its diff?
                        # Can be None if this is first commit on an
                        # orphan/root branch
        gfe_par1_gfe_mark = gfe_commit.get("from")
        gfe_par1_sha1     = self._gfe_mark_to_sha1.get(gfe_par1_gfe_mark, None)

                        # What did we most recently record to this branch?
        curr_head_sha1 = self._curr_branch_head_sha1(ot.branch_id)
        match = gfe_par1_sha1 == curr_head_sha1
        if (                    LOG.isEnabledFor(logging.DEBUG2)
            or ((not match) and LOG.isEnabledFor(logging.DEBUG))
            ):
            level = logging.DEBUG2 if match else logging.DEBUG
            how   = "=="           if match else "!="

            LOG.log( level
                   , "_branch_head_is_gfe_first_parent()  {gfmark:>6}"
                     " {sha1}   {branch_id}  gfe par1={gfe}"
                     " {how} {brhead}=branch head"
                     .format(
                          how       = how
                        , gfmark    = ot.change_num
                        , sha1      = _ab(ot.sha1)
                        , branch_id = _ab(ot.branch_id)
                        , gfe       = _ab(gfe_par1_sha1)
                        , brhead    = _ab(curr_head_sha1)
                        ))
        return match

    def _curr_branch_head_sha1(self, branch_id):
        """Return the given branch's most recently recorded commit sha1,
        if any. Return None if branch is empty.
        """
        curr_head_ot = self._curr_head_ot(branch_id)
        if curr_head_ot:
            return curr_head_ot.sha1
        return None

    def _mark_submodule_deletes(self, gfe_files, ot):
        """Mark gfe_file dict elements of gfe_files that delete a submodule.

        Return a list of (sha1, gwt_path) for each submodule in ot, including
        any in ot's parent deleted in ot.

        Other submodule gfe_file actions (A M T) carry a file mode 160000 to
        tell us it's a submodule action, don't bother copying to a Perforce
        file revision. But D delete? No file mode. So instead, insert a
        'gitlink' key.
        """
                        # What submodules are deleted from .gitmodules?
                        # Usually there is a 1:1 correspondence between
                        # deleting a .gitmodules entry and deleting its
                        # corresponding ls-tree entry, but not always.
                        # So check 'em both. Starting with .gitmodules...

                        # Fetch .gitmodule contents from both the current
                        # commit and whichever parent we're diffing against.
                        # As ConfigParser instances. They'll be empty if no
                        # .gitmodules file for that commit.
        par_sha1 = self._curr_branch_head_sha1(ot.branch_id)
        s1 = self._gitmodules_gwt_paths(par_sha1)
        s2 = self._gitmodules_gwt_paths(ot.sha1)
        submodule_gwt_paths = s1.union(s2)
        par_commit = self.ctx.repo[par_sha1] if par_sha1 else None
        cur_commit = self.ctx.repo[ot.sha1]
        def _filemode(commit, gwt_path):
            """Return file mode or None if not exist."""
            try:
                return commit.tree[gwt_path].filemode
            except (KeyError, AttributeError):
                return None
        deleted_gwt_paths = set()
        for gwt_path in submodule_gwt_paths:
            par_mode = _filemode(par_commit, gwt_path)
            cur_mode = _filemode(cur_commit, gwt_path)
            if par_mode == FileModeInt.COMMIT and not cur_mode:
                deleted_gwt_paths.add(gwt_path)

                        # Check ls-tree enties for any 'D'eleted
                        # paths that were a 16000-mode entry in the previous
                        # commit copied to this Perforce depot branch.
        for gfe_file in gfe_files:
            if (    gfe_file["action"] != "D"
                and gfe_file["path"] not in deleted_gwt_paths):
                continue
            if self._is_submodule_te( commit_sha1 = par_sha1
                                    , gwt_path    = gfe_file["path"] ):
                deleted_gwt_paths.add(gfe_file["path"])

                        # +++ Skip the gfe_file loop in the almost-always
                        #     case that there are no submodule deletes
                        #     to mark.
        if not deleted_gwt_paths:
            return []

        for gfe_file in gfe_files:
            if gfe_file["path"] in deleted_gwt_paths:
                gfe_file["gitlink"] = "# is submodule delete"

        return [ (p4gf_const.NULL_SHA1, gwt_path)
                 for gwt_path in deleted_gwt_paths ]

    @staticmethod
    def _clear_submodule_deletes(gfe_files):
        """Remove any gfe_file["gitlink"] markers inserted by
        _mark_submodule_deletes().

        Do this before re-using gfe_files on a second Perforce depot branch.
        """
        for gfe_file in gfe_files:
            gfe_file.pop("gitlink", None)

    @lru_cache(maxsize = 100)
    def _gitmodules_gwt_paths(self, commit_sha1):
        """Return a set() of submodule gwt_paths from the given commit's
        .gitmodules file.
        Return empty set if no .gitmodules file.
        """
        gitmodules_cp = p4gf_git.parse_gitmodules_for_commit_sha1(
                            self.ctx.repo, commit_sha1)
        gwt_paths     = set()
        if gitmodules_cp:
            for section in gitmodules_cp:
                gwt_path = gitmodules_cp.get( section
                                            , "path"
                                            , raw      = True
                                            , fallback = None )
                if gwt_path:
                    gwt_paths.add(gwt_path)
        return gwt_paths

    def _create_empty_placeholder_dbrev(self, ot, branch, change_utc_dt):
        """Record a single DBRev record that adds or edits a
        .p4gf_empty_changelist_placeholder.
        """
        gwt_path = p4gf_const.P4GF_EMPTY_CHANGELIST_PLACEHOLDER
        self._ensure_empty_blob()
        gfe_file = { "path" : gwt_path
                   , "action" : "M"
                   , "sha1"   : p4gf_const.EMPTY_BLOB_SHA1
                   , "mode"   : FileModeStr.PLAIN
                   }
        p4d_secs = p4gf_time_zone.utc_dt_to_p4d_secs(change_utc_dt, self.ctx)
        self._translate_rev(
              gfe_file      = gfe_file
            , ot            = ot
            , branch        = branch
            , p4d_secs      = p4d_secs
            , integ_src_otl = []
            )

    def _translate_revs(self, *
            , ot
            , branch
            , change_utc_dt
            , gfe_files
            , integ_src_otl
            ):
        """Write a db.rev record for each file revision in this
        commit.

        Also write a zero-th revision to contextdata.jnl for the first
        new revision of each depot path.

        Return the number of revisions translated.
        """
                        # +++ Convert per-changelist time once per changelist,
                        #     rather than per-revision.
        p4d_secs = p4gf_time_zone.utc_dt_to_p4d_secs(change_utc_dt, self.ctx)

                        ### copy/rename support goes here, fold fuzzy
                        ### match rows here, either before or inside top
                        ### of loop here.
        translated_rev_ct = 0
        for gfe_file in gfe_files:
                        # Skip Git submodules. They are recorded as DescInfo
                        # "gitlinks:" text, not Perforce file revisions.
            if gfe_file.get("mode") == FileModeStr.COMMIT:
                continue

            w = self._translate_rev(
                          gfe_file      = gfe_file
                        , ot            = ot
                        , branch        = branch
                        , p4d_secs      = p4d_secs
                        , integ_src_otl = integ_src_otl
                        )
            translated_rev_ct += w
        return translated_rev_ct

    def _translate_rev(self, *
            , gfe_file
            , ot
            , branch
            , p4d_secs
            , integ_src_otl
            ):
        """Create a single DBRev struct for this file.

        Return 1 if db.rev written, 0 if not.
        """
        gwt_path         = gfe_file['path']
        depot_path       = self._gwt_to_depot_path( gwt_path = gwt_path
                                                  , branch   = branch )
        p4_exists        = self._rev_store.exists_at_head(depot_path)
        prev_head_db_rev = self._rev_store.head_db_rev(depot_path)
        rev_num          = self._rev_store.next_rev_num(depot_path)
        is_contextdata_written \
                         = self._rev_store.is_contextdata_written(depot_path)
        p4action         = _gfe_to_p4_file_action(p4_exists, gfe_file["action"])
        is_delete        = p4gf_p4dbschema.FileAction.DELETE == p4action
                        # Do not delete submodules via 'p4 delete'.
                        # Submodules are not tracked as files within Perforce.
        is_submodule = "gitlink" in gfe_file
        if is_delete and is_submodule:
            return 0
                        # Integrating from other branches? Replace 'p4
                        # add/edit' with 'p4 integ' for the file action.
                        # Writes one db.integ row pair for each integ source
                        # used.
        if integ_src_otl:
            p4action = self._translate_rev_integ(
                  gfe_file        = gfe_file
                , ot              = ot
                , p4action        = p4action
                , dest_depot_path = depot_path
                , integ_src_otl   = integ_src_otl
                , rev_num         = rev_num
                )

                        # Write the db.rev record for this file action.
        new_head_db_rev = self._translate_rev_row(
              gfe_file               = gfe_file
            , ot                     = ot
            , branch                 = branch
            , p4d_secs               = p4d_secs
            , p4action               = p4action
            , depot_path             = depot_path
            , is_delete              = is_delete
            , rev_num                = rev_num
            , prev_head_db_rev       = prev_head_db_rev
            , is_contextdata_written = is_contextdata_written
            )

        if not new_head_db_rev:
            return 0

        self._rev_store.record_head(new_head_db_rev, is_delete)
        self._rev_timer.increment(aux_ct = self._byte_ct)
        return 1

    def _translate_rev_integ(self, *
            , gfe_file
            , ot
            , p4action
            , dest_depot_path
            , integ_src_otl
            , rev_num
            ):
        """Should this revision be integrated from some other branch(es)?
        If so, write those integ records to db.integ, and return the
        integ/branch file action to use instead of whatever add/edit
        action a non-integ rev would use.

        If not integrating from another branch, return p4action unchanged.
        """
                        # Not integrating? Nothing to do.
        if not integ_src_otl:
            return p4action
                        # integ for delete has more restrictions than
                        # we want to encode for fast push. No integ.
        is_delete  = p4gf_p4dbschema.FileAction.DELETE == p4action
        if is_delete:
            return p4action

                        # There are only two outcomes now: copy or branch.
        is_branch = p4action == p4gf_p4dbschema.FileAction.ADD
        if is_branch:
            integ_how_pair = ( p4gf_p4dbschema.IntegHow.BRANCH
                             , p4gf_p4dbschema.IntegHow.BRANCH_R )
            integ_p4action = p4gf_p4dbschema.FileAction.BRANCH
        else:
            integ_how_pair = ( p4gf_p4dbschema.IntegHow.COPY
                             , p4gf_p4dbschema.IntegHow.COPY_R )
            integ_p4action = p4gf_p4dbschema.FileAction.INTEG

        dest_file_mode_int = FileModeInt.from_str(gfe_file["mode"])
        src_commit_sha1s   = set()
        for src_ot in integ_src_otl:

                        # Optional, and either a great or awful idea depending
                        # on what you want to see in integ history.
                        #
                        # Avoid duplicate integs when a source commit becomes
                        # duplicate changelists due to tunneling. Use each
                        # source commit only once.
                        #
            #if src_ot.sha1 in src_commit_sha1s:
            #    continue

            integ_src = self._integ_src(
                  gwt_path            = gfe_file["path"]
                , dest_file_sha1      = gfe_file["sha1"]
                , dest_file_mode_int  = dest_file_mode_int
                , integ_src_ot        = src_ot )
            if not integ_src:
                continue

            self._commit_gfunzip.write_integ_pair(
                  src_depot_path      = integ_src.depot_path
                , src_start_rev_int   = integ_src.start_rev_int
                , src_end_rev_int     = integ_src.end_rev_int
                , dest_depot_path     = dest_depot_path
                , dest_rev_int        = rev_num
                , how                 = integ_how_pair[0]
                , how_r               = integ_how_pair[1]
                , dest_change_num_int = _gfmark_to_int(ot.change_num)
                )
                        # Remember this source commit so that we know we've
                        # integed _something_, and so that we can skip
                        # duplicate changelists for this commit (if we so
                        # choose).
            src_commit_sha1s.add(src_ot.sha1)

        if src_commit_sha1s:
            return integ_p4action
        else:
            return p4action

    def _integ_src(self, *
            , gwt_path
            , dest_file_sha1
            , dest_file_mode_int
            , integ_src_ot
            ):
        """If integ_src_ot can act as an integ source for gwt_path, return
        a _IntegSource instance of integ source stuff:
            depot_path
            start_rev_int
            end_rev_int
        Return None if integ_src_ot is not a worthy integ source.
        """
        try:
            src_commit = self.ctx.repo[integ_src_ot.sha1]
            src_te     = src_commit.tree[gwt_path]

                        # Keep this first implementation simple: exact match
                        # only = branch/copy, no integ/merge.
            if not (    src_te.filemode == dest_file_mode_int
                    and src_te.hex      == dest_file_sha1):
                LOG.debug3("_integ_src()     src commit={commit_sha1}"
                           " sha1/mode mismatch {src_sha1} {src_mode}"
                           " != {dest_sha1} {dest_mode}  {gwt}"
                           .format( commit_sha1 = _ab(integ_src_ot.sha1)
                                  , src_sha1    = _ab(src_te.hex)
                                  , src_mode    = _fms(src_te.filemode)
                                  , dest_sha1   = _ab(dest_file_sha1)
                                  , dest_mode   = _fms(dest_file_mode_int)
                                  , gwt         = gwt_path))
                return None
        except KeyError:
                        # File not in this source commit. No source file from
                        # which to integ.
            LOG.debug3("_integ_src()     src commit={commit_sha1}"
                       " gwt not found   {gwt}"
                       .format( commit_sha1 = _ab(integ_src_ot.sha1)
                              , gwt         = gwt_path))
            return None

                        # Not in source branch mapping? Can't integ
                        # from unmapped space.
        src_branch     = self.ctx.branch_dict().get(integ_src_ot.branch_id)
        src_depot_path = self._gwt_to_depot_path( gwt_path = gwt_path
                                                , branch   = src_branch )
        if not src_depot_path:
            LOG.debug3("_integ_src()     src branch={branch_id}"
                       " gwt not mapped   {gwt}"
                       .format( branch_id = _ab(integ_src_ot.branch_id)
                              , gwt       = gwt_path))
            return None

        src_change_num = _gfmark_to_int(integ_src_ot.change_num)
        src_range = self._rev_store.src_range(src_depot_path, src_change_num)
        if src_range[0] is None or src_range[1] is None:
            LOG.debug3("_integ_src()     src depot_file={df}@{cn}"
                       " no src_range()   {gwt}"
                       .format( df  = src_depot_path
                              , cn  = integ_src_ot.change_num
                              , gwt = gwt_path))
            return None

        LOG.debug3("_integ_src() returning {}#{},{}"
                   .format(src_depot_path, src_range[0], src_range[1]))
        return _IntegSource( depot_path    = src_depot_path
                           , start_rev_int = src_range[0]
                           , end_rev_int   = src_range[1] )

    def _translate_rev_row(self, *
            , gfe_file
            , ot
            , branch
            , p4d_secs
            , p4action
            , depot_path
            , is_delete
            , rev_num
            , prev_head_db_rev
            , is_contextdata_written
            ):
        """Record a single db.rev record for this file.

        Return the DbRev struct that holds what we just recorded.
        """
        change_num  = _gfmark_to_int(ot.change_num)
        if not is_delete:
                        # Add revision to bat_gfunzip (unless it's already there).
            blob_sha1  = gfe_file["sha1"]
            lbr        = self._get_librarian_record(blob_sha1)
            lbr_path   = p4gf_path.blob_p4_path(blob_sha1)
            lbr_rev    = lbr_rev_str()

            debug3(LOG, "_translate_rev() {a} {mode} {sha1:7.7} {gwt}"
                   , a    = gfe_file["action"]
                   , mode = gfe_file["mode"]
                   , sha1 = gfe_file["sha1"]
                   , gwt  = gfe_file["path"]
                   )
            p4filetype = self._calc_file_rev_p4filetype(
                    lbr_record   = lbr
                  , gwt_path     = gfe_file["path"]
                  , branch       = branch
                  , gfe_mode_str = gfe_file["mode"])

            db_rev = p4gf_p4dbschema.DbRev(
                    depot_path              = depot_path
                  , depot_rev               = rev_num
                  , depot_file_type_bits    = p4filetype
                  , file_action_bits        = p4action
                  , change_num              = change_num
                  , date_p4d_secs           = p4d_secs
                  , md5                     = lbr.md5
                  , uncompressed_byte_ct    = lbr.byte_ct
                  , lbr_is_lazy             = p4gf_p4dbschema.RevStatus.LAZY
                  , lbr_path                = lbr_path
                  , lbr_rev                 = lbr_rev
                  , lbr_file_type_bits      = lbr.lbr_file_type
                  )

                        # Adding first rev? Create "rev#0" context.
            if (    1 == rev_num
                and not is_contextdata_written):
                assert not prev_head_db_rev
                prev_head_db_rev = copy.copy(db_rev)
                prev_head_db_rev.rev_num = 0

        else:  # is_delete == True
            debug3( LOG, "_translate_rev() {a}                {gwt}"
                  , a   = gfe_file["action"]
                  , gwt = gfe_file["path"]
                  )

                        # Rare, but it happens: deleting an ls-tree entry that
                        # has no previous db.rev. The previous db.rev was
                        # probably a 160000-mode COMMIT entry, which we never
                        # store as a db.rev.  We usually detect these in
                        # _mark_submodule_deletes() and skip, but if there is
                        # no corresponding change to .gitmodules, the 'D'elete
                        # action makes it all the way to here.
            if not prev_head_db_rev:
                LOG.warning("No previous db.rev for commit={sha1} {a} {gwt},"
                            " probably a 160000 that we never store in db.rev."
                            .format( sha1 = ot.sha1
                                   , a    = gfe_file["action"]
                                   , gwt  = gfe_file["path"]
                                   ))
                return None


            db_rev = copy.copy(prev_head_db_rev)
            db_rev.depot_rev               = rev_num
            db_rev.file_action_bits        = p4action
            db_rev.change_num              = change_num
            db_rev.date_p4d_secs           = p4d_secs
            db_rev.md5                     = MD5_NONE
            db_rev.uncompressed_byte_ct    = -1
            db_rev.lbr_is_lazy             = p4gf_p4dbschema.RevStatus.NOT_LAZY

        if is_contextdata_written:
            context_db_rev = None
        else:
            context_db_rev = prev_head_db_rev
        self._commit_gfunzip.write_rev(db_rev, context_db_rev = context_db_rev)
        return db_rev

    def _ensure_empty_blob(self):
        """Store an empty file for the empty blob sha1.

        Required for .p4gf_empty_changelist_placeholder.

        Return the lbr_rec for the the blob.
        """
                        # Already got one? Nothing to do.
        sha1  = p4gf_const.EMPTY_BLOB_SHA1
        lbr   = self._lbr_store.get(sha1)
        if lbr:
            return lbr

        return self._create_blob_librarian_file_bytes(
                      sha1         = sha1
                    , raw_bytes    = b''
                    )

    def _get_librarian_record(self, sha1):
        """If we already have a librarian record, return that.
        If not, git-cat-file the thing into existence, write it to the
        librarian store, and return a record for the new thing.

        Raises KeyError exception if you try to call us on a "D"elete
        action where we don't have a file sha1.

        ### Will have to deal with 100% match "C"opy/"R"ename actions
        that also omit sha1. We need that sha1.
        """
        lbr_rec = self._lbr_store.get(sha1)
        if not lbr_rec:
            lbr_rec = self._create_blob_librarian_file(sha1)
        return lbr_rec

    def _create_blob_librarian_file(self, sha1):
        """Extract a file blob from the Git repo, write it to the bat_gfunzip,
        and record it in the librarian.
        Return the librarian record for this new file.

        Do not call this for blobs already written to the bat_gfunzip.
            That pointlessly wastes time and space git-cat-file-ing the same
        blob into the zip archive, zip will spew warnings about duplicate
        entries, and Perforce probably won't appreciate a zip archive with
        multiple rev#1 of the same file.
        """
        raw_bytes  = p4gf_git.get_blob(sha1, self.ctx.repo)
        self._byte_ct += len(raw_bytes)
        return self._create_blob_librarian_file_bytes(
                          sha1         = sha1
                        , raw_bytes    = raw_bytes)

    def _create_blob_librarian_file_bytes(self, *
            , sha1
            , raw_bytes):
        """Write raw bytes to the bat_gfunzip, record to librarian.

        Implementation for _create_blob_librarian_file() when you already
        have the bytes (such as for the empty placeholder blob)
        """

        md5_strr   = hashlib.md5(raw_bytes).hexdigest().upper()
        uncompressed_byte_ct = len(raw_bytes)
        p4filetype = self._calc_lbr_p4filetype(raw_bytes)
        self._bat_gfunzip.write_blob_rev_1(
                                 sha1          = sha1
                               , md5           = md5_strr
                               , raw_bytes     = raw_bytes
                               , lbr_file_type = p4filetype )
        r = self._lbr_store.store(
              sha1          = sha1
            , md5           = md5_strr
            , byte_ct       = uncompressed_byte_ct
            , lbr_file_type = p4filetype )
        return r
                # pylint:disable=too-many-arguments
    def _calc_file_rev_p4filetype(
              self
            , lbr_record
            , gwt_path
            , branch
            , gfe_mode_str
            , depot_path = None
            ):
        """Return filetype bits for this file revision.

        Use the librarian's file type bits, plus or minus any
        executable flag from Git.

        :param depot_path: Optional: saves a P4.Map lookup if you already
                           know gwt_path's depot_path.
        """


                        # 1a. p4 typemap has an entry actual depot path?
                        #     +++ Skip if Perforce has no typemap.
        typemap_says = None
        if self._p4typemap.has_typemap():
            if depot_path is None:
                dst_depot_path = self._gwt_to_depot_path(gwt_path, branch)
            else:
                dst_depot_path = depot_path
            typemap_says = self._p4typemap.for_depot_path(dst_depot_path)

                        # 1b. p4 typemap for master-ish location of this file?
            if not typemap_says:
                masterish_depot_path = self._gwt_to_depot_path(
                                            gwt_path, self._masterish())
                typemap_says = self._p4typemap.for_depot_path(
                                            masterish_depot_path)
            typemap_says = _sanitize_p4filetype(typemap_says)

        base_mods = ['', '']
        if typemap_says:
            base_mods = p4gf_p4filetype.to_base_mods(typemap_says)

                        # 2. Git says it's a symlink? Then that trumps
                        #    whatever base was supplied by p4 typemap.
        if gfe_mode_str == FileModeStr.SYMLINK:
            base_mods[0] = "symlink"

                        #    Git says it's not a symlink? Then ignore
                        #    p4 typemap if it asked for one.
        elif base_mods[0] == "symlink":
            base_mods[0] = ''

                        # 3. p4 typemap supplied no base filetype?
                        #    Deduce from file content.
        if not base_mods[0]:
            base_mods[0] = p4gf_p4dbschema.FileType.to_base(
                                lbr_record.lbr_file_type)

        _sanitize_mods(base_mods)
        p4filetype = p4gf_p4dbschema.FileType.from_base_mods(base_mods)

                        # 4. If p4 typemap supplied no storage modifier
                        #    (+C, +F), repeat the storage modifier from
                        #    librarian storage.

                        #    Avoids silly "binary+D" format storing binary
                        #    files in RCS delta format.
        if not p4filetype & p4gf_p4dbschema.FileType.S_MASK:
            lbr_store_bits = lbr_record.lbr_file_type \
                           & p4gf_p4dbschema.FileType.S_MASK
            p4filetype |= lbr_store_bits

                        # 5. Force X bit to match Git.
        if gfe_mode_str == FileModeStr.EXECUTABLE:
            p4filetype |= p4gf_p4dbschema.FileType.EXECUTABLE
        else:
            p4filetype &= ~p4gf_p4dbschema.FileType.EXECUTABLE

        return p4filetype

    def _calc_lbr_p4filetype(self, raw_bytes):
        """Return librarian filetype bits for this blob, based on
        blob content and Git file mode.

        Ignores `p4 typemap` here. That doesn't apply to our lazy copy
        sources under //.git-fusion/objects/...
        """
        r = self._lbr_filetype_storage
        detected = self._detect_p4filetype(raw_bytes)
        # VERY noisy, fills debug log with entire content of every file rev.
        # Use only when debugging a specific, small, problem.
        #LOG.getChild("p4filetype").debug3("_calc_lbr_p4filetype() {:08x} {}"
        #    .format(detected, raw_bytes))
        return r + detected

    @staticmethod
    def _detect_p4filetype(raw_bytes):
        """What kind of file is this?"""
        p4filetype_str = p4gf_p4filetype.detect(raw_bytes)
        if p4filetype_str == "text":
            return p4gf_p4dbschema.FileType.TEXT
        else:
            return p4gf_p4dbschema.FileType.BINARY

    def _populate_branch(self, popcom, branch):
        """This is the first commit on an empty depot branch.
        Fill it with the same content as this commit's first parent.

        Add "populate" change record, and all its rev records,
        to the zip archives.

        Add all these rev#1 revs to our RevHistoryStore.

        Remember the ghost DescInfo for later rewrite.
        """
        LOG.debug("_populate_branch() {}".format(popcom))
        change_num_int = _gfmark_to_int(popcom.ghost_gfmark)
        change_utc_dt  = self._sha1_to_changelist_utc_dt(popcom.ghost_of_sha1)
        rev_ct         = 0
        p4d_secs       = p4gf_time_zone.utc_dt_to_p4d_secs(
                                change_utc_dt, self.ctx)
        LOG.debug("_populate_branch() git-ls-tree walk start")
        for l in p4gf_util.git_ls_tree_r(self.ctx.repo, popcom.ghost_of_sha1):
            if l.mode == FileModeStr.DIRECTORY:
                self._copy_tree_to_bat_if(tree = l.sha1)
            elif l.mode == FileModeStr.COMMIT:
                        # Skip submodules. They're tracked in DescInfo text
                        # not as file revisions.
                pass
            else:
                self._populate_revision(
                        branch         = branch
                      , change_num_int = change_num_int
                      , p4d_secs       = p4d_secs
                      , blob_sha1      = l.sha1
                      , gwt_path       = l.gwt_path
                      , gfe_mode_str   = l.mode
                      )
                rev_ct += 1
        LOG.debug("_populate_branch() git-ls-tree walk complete {} db.rev records".format(rev_ct))

                        # This ghost changelist is not part of our OTHistory,
                        # not copied as a GitMirror commit object, but but
                        # still needed as a "current branch head" when
                        # calculating correct Git diffs.
        ot = ObjectType.create_commit(
                  sha1       = popcom.ghost_of_sha1
                , repo_name  = self.ctx.config.repo_name
                , change_num = popcom.ghost_gfmark
                , branch_id  = popcom.branch_id
                )
                        # Branch starting empty? Nothing to populate? Force a
                        # changelist anyway, with a placholder file. Otherwise
                        # our gfmark/changelist renumbering will have gaps and
                        # off-by-1 errors (or off-by-N for n skipped populates)
        if not rev_ct:
            self._create_empty_placeholder_dbrev(ot, branch, change_utc_dt)

        di = self._ghost_desc_info(popcom)
        desc = di.to_text()
        self._commit_gfunzip.write_changelist(
                  ctx         = self.ctx
                , change_num  = change_num_int
                , client      = self.ctx.p4.client
                , user        = p4gf_const.P4GF_USER
                , date_utc_dt = change_utc_dt
                , description = desc
                )

        self._record_desc_info(
                  gfmark             = popcom.ghost_gfmark
                , desc_info          = di
                , change_utc_dt      = change_utc_dt
                , ghost_of_branch_id = popcom.ghost_of_branch_id
                , owner              = p4gf_const.P4GF_USER
                )

        self._set_curr_head_ot(ot)
        LOG.debug("_populate_branch() complete")

    def _populate_revision(self, *
            , branch
            , change_num_int
            , p4d_secs
            , blob_sha1
            , gwt_path
            , gfe_mode_str
            ):
        """Write a single file revision, part of a single "populate"
        changelist.
        """
        lbr_path   = p4gf_path.blob_p4_path(blob_sha1)
        lbr        = self._get_librarian_record(blob_sha1)
        depot_path = self._gwt_to_depot_path(
                                          gwt_path     = gwt_path
                                        , branch       = branch )
        p4filetype = self._calc_file_rev_p4filetype(
                                          lbr_record   = lbr
                                        , gwt_path     = gwt_path
                                        , branch       = branch
                                        , depot_path   = depot_path
                                        , gfe_mode_str = gfe_mode_str )
        db_rev = p4gf_p4dbschema.DbRev(
                  depot_path            = depot_path
                , depot_rev             = 1
                , depot_file_type_bits  = p4filetype
                , file_action_bits      = p4gf_p4dbschema.FileAction.ADD
                , change_num            = change_num_int
                , date_p4d_secs         = p4d_secs
                , md5                   = md5_str(lbr.md5)
                , uncompressed_byte_ct  = lbr.byte_ct
                , lbr_is_lazy           = p4gf_p4dbschema.RevStatus.LAZY
                , lbr_path              = lbr_path
                , lbr_rev               = lbr_rev_str()
                , lbr_file_type_bits    = lbr.lbr_file_type
                )
        prev_db_rev = copy.copy(db_rev)
        prev_db_rev.depot_rev = 0
        self._commit_gfunzip.write_rev(db_rev, context_db_rev = prev_db_rev)
        self._rev_store.record_head(db_rev, is_delete = False)

    def _branch_id_to_dbid(self, branch_id):
        """Return the depot branch ID that houses a branch."""
        br = self.ctx.branch_dict().get(branch_id)
        if not br:
            return None
        par_dbi = br.depot_branch
        if not par_dbi:
            return None
        return par_dbi.depot_branch_id

    def _ghost_desc_info(self, popcom):
        """Return a DescInfo that can produce a ghost changelist's
        description.
        """
        par_dbid = self._branch_id_to_dbid(popcom.ghost_of_branch_id)
        di = DescInfo()
        di.clean_desc           = _("Git Fusion branch management")
        di.push_state           = NTR("incomplete")
        di.parent_branch        = NTR("{}@{}").format(
                                                par_dbid
                                              , popcom.ghost_of_change_num )
        di.ghost_of_sha1        = popcom.ghost_of_sha1
        di.ghost_of_change_num  = popcom.ghost_of_change_num
        di.ghost_precedes       = popcom.ghost_precedes_sha1
        return di

    def _sha1_to_changelist_utc_dt(self, _sha1):
        """Return an appropriate timestamp to use for a Perforce changelist
        that is a translation of the given Git commit.

        If all you have is the Git commit sha1, this will fetch it
        from Git and convert it.
        Used only for "populate" changelists, were we lack a full
        DescInfo-parsed Git commit with author/commiter time.
        """
                        ### Ideally pygit2.Commit would provide  author and
                        ### commiter time and offset. But it provides only
                        ### commiter. So we have to parse the line ourselves,
                        ### similar to what p4gf_fastexport.get_commit() does.
                        ### Until then, here, have the current wallclock time.
        how = self._changelist_date_source
        if (    how == p4gf_config.VALUE_DATE_SOURCE_GIT_AUTHOR
            or  how == p4gf_config.VALUE_DATE_SOURCE_GIT_COMMITTER):
            LOG.warning("Unimplemented _sha1_to_changelist_utc_dt()")

        return p4gf_time_zone.now_utc_dt()

    def _to_desc_info(self, gfe_commit, branch):
        """Call G2P to create a DescInfo block for this changelist."""

                        # Prevent G2P from doing its own scan through
                        # gfe_commit["files"] searching for submodules. we
                        # already did that and stuffed the results we require
                        # into gfe_commit["gitlinks"].
        save_gfe_files = gfe_commit.pop("files", [])
        result = self._g2p.change_desc_info_with_branch(gfe_commit, branch)
        gfe_commit["files"] = save_gfe_files
        return result

    def _record_desc_info( self, *
                         , gfmark
                         , desc_info
                         , change_utc_dt
                         , ghost_of_branch_id = None
                         , owner
                         ):
        """Remember a changelist's DescInfo block so that we can later
        replace its gfmark placeholders with actual submitted changelist
        numbers.
        """
        d = desc_info.to_dict()
        d["gfmark"] = gfmark
        d["change_utc"] = int(change_utc_dt.timestamp())
        d["owner"] = owner
        if ghost_of_branch_id:
            d["ghost_of_branch_id"] = ghost_of_branch_id
                        # indent=None appears to produce a single line with no
                        # newlines (newlines encoded as literal "\n"). One
                        # record-per-line makes our reader code in
                        # _fill_desc_info_gfunzip() _much_ simpler.
        j = json.dumps(d, indent=None)
        self._desc_info_fp.write(j)
        self._desc_info_fp.write("\n")

    def _fill_desc_info_gfunzip(self):
        """Fill a 'p4 unzip' archive with updated changelist descriptions.
        No revs, just db.change and db.desc records.
        """
                        # Assumes you've already called
                        # _fill_branch_head_otl().
        assert self._branch_head_otl
        branch_head_gfmark_set = {ot.change_num
                                  for ot in self._branch_head_otl}

        with open(self._desc_info_abspath, "r", encoding="utf-8") as f:
            for line in f:
                self._fill_desc_info_one(line, branch_head_gfmark_set)

    def _fill_desc_info_one(self, line, branch_head_gfmark_set):
        """Load one line into a DescInfo, renumber it, write to
        DescInfo gfunzip payload.
        """
        if not line:
            return
        d                   = json.loads(line)
        gfmark              = d.get("gfmark")
        ts                  = d["change_utc"]
        ghost_of_branch_id  = d.get("ghost_of_branch_id")
        change_utc_dt       = p4gf_time_zone.seconds_to_utc_dt(ts)
        change_num_int      = self._gfmark_to_submitted_change_num(gfmark)
        LOG.debug3("_fill_desc_info_one() {} ==> {}"
                   .format(gfmark, change_num_int))
        di = DescInfo.from_dict(d)
        if gfmark in branch_head_gfmark_set:
            di.push_state = NTR("complete")
            LOG.debug3("_fill_desc_info_one() {} ==> {}"
                       "    push-state : complete"
                       .format(gfmark, change_num_int))
        self._renumber_desc_info(di, ghost_of_branch_id)
        self._desc_info_gfunzip.write_changelist(
              ctx         = self.ctx
            , change_num  = change_num_int
            , client      = self.ctx.p4.client
            , user        = d["owner"]
            , date_utc_dt = change_utc_dt
            , description = di.to_text()
            )

    def _renumber_desc_info(self, di, ghost_of_branch_id):
        """Convert all ":123" gfmark strings to "129" submitted changelist
        numbers.
        """
        if di.parent_changes:
            for key_sha1, val_gfmark_list in di.parent_changes.items():
                di.parent_changes[key_sha1] = [
                     self._gfmark_to_submitted_change_num(gfmark)
                     for gfmark in val_gfmark_list]
        if _is_gfmark(di.ghost_of_change_num):
            di.ghost_of_change_num  \
                = str(self._gfmark_to_submitted_change_num(
                        di.ghost_of_change_num))

                        # Only ghost changelists fill in parent-branch during
                        # fast push.
        if di.parent_branch:
            assert ghost_of_branch_id
            par_dbid = self._branch_id_to_dbid(ghost_of_branch_id)
            di.parent_branch = NTR("{}@{}").format(
                                         par_dbid
                                       , di.ghost_of_change_num )

    def _write_change_records(self, *, gfmark, di, change_utc_dt):
        """Record a db.change and db.desc journal record for this
        changelist.

        Owner is git-fusion-user for now. We'll reassign ownership
        later as part of _desc_info_gfunzip.
        """
        change_num = _gfmark_to_int(gfmark)
        self._commit_gfunzip.write_changelist(
                          ctx         = self.ctx
                        , change_num  = change_num
                        , client      = self.ctx.p4.client
                        , user        = p4gf_const.P4GF_USER
                        , date_utc_dt = change_utc_dt
                        , description = di.to_text()
                        )

    # lru_cache() here saves 2.3% Fast Push time, avoids about 75% of calls.
    @lru_cache(maxsize = 10000)
    def _gwt_to_depot_path(self, gwt_path, branch):
        """Optimized version of ctx.gwt_path(gwt).to_depot().

        Avoid creating the convert and goes straight to P4.Map.translate().
        """
        gwt_esc = p4gf_util.escape_path(gwt_path)
        client_path = '//{}/'.format(self.ctx.p4.client) + gwt_esc
        return branch.view_p4map.translate( client_path
                                          , branch.view_p4map.RIGHT2LEFT)

    def _calc_changelist_utc_dt(self, desc_info):
        """Return a changelist time, as a datetime instance in UTC.

        If repo is configured to use Git author or committer date, use that.
        If not, use current Git Fusion server time.
        """
        how = self._changelist_date_source
        if how == p4gf_config.VALUE_DATE_SOURCE_GIT_AUTHOR:
            src = desc_info.author
        elif how == p4gf_config.VALUE_DATE_SOURCE_GIT_COMMITTER:
            src = desc_info.committer
        else:
            return p4gf_time_zone.now_utc_dt()
        return p4gf_time_zone.git_strs_to_utc_dt( src["time"]
                                                , src["timezone"] )

    def _write_dbi_files(self):
        """Write all depot branch-info files to first zip payload."""
        dbi_index = self.ctx.depot_branch_info_index()
        dbi_list  = dbi_index.by_id.values()
        LOG.debug("_write_dbi_files() ct={}".format(len(dbi_list)))
        for dbi in dbi_list:
            self._bat_gfunzip.write_dbi(dbi)

    def _fill_gitmirror_gfunzip(self):
        """Back up any Git data that cannot reliably round-trip through
        Git->Perforce->Git translation.
        Record Git Fusion config and branch data.
        """
        self._copy_othistory_to_gitmirror_gfunzip()
        # branch info
        # config files

    def _copy_othistory_to_gitmirror_gfunzip(self):
        """Record a verbatim copy of every Git commit we translated,
        so that we can rebuild it verbatim.

        Assume that ls-trees were already sent as part of bat_gfunzip.
        """
        LOG.debug("_copy_othistory_to_gitmirror_gfunzip() ot_ct={}"
                  .format(self._othistory.ct()))
        for ot in self._othistory.ot_iter():
            self._copy_ot_to_gitmirror_gfunzip(ot)

    def _copy_ot_to_gitmirror_gfunzip(self, ot):
        """Record that a commit was copied to a branch view under
        some changelist number. Back up the commit's original content
        so that we can reproduce it later without whitespace changes.
        """
        change_num_int = self._gfmark_to_submitted_change_num(ot.change_num)
        self._byte_ct += self._gitmirror_gfunzip.copy_commit(
                            ot, change_num_int)

    def _copy_trees_to_bat(self, tree_sha1):
        """Copy trees to the bat_gfunzip.
        Skip any trees already in the bat_gfunzip.

        Recursive tree walk code similar to p4gf_util.git_iter_tree(), but
        different because we stop the tree walk as soon as we hit any already-
        copied tree. Do not waste CPU time recursing down paths that contain
        nothing new to copy.
        """
        tree_root = p4gf_util.treeish_to_tree(self.ctx.repo, tree_sha1)
        if not tree_root:
            LOG.debug("_copy_trees_to_bat() no tree for {}"
                      .format(p4gf_util.abbrev(tree_sha1)))
            return
        work_queue = deque([tree_root.oid])
        while work_queue:
            tree_oid = work_queue.pop()
            tree     = self.ctx.repo.get(tree_oid)
            copied   = self._copy_tree_to_bat_if(tree)
                        # Skip trees already added.
                        # Recurse into trees that needed a copy.
            if not copied:
                continue
            for child_te in tree:
                if child_te.filemode == FileModeInt.DIRECTORY:
                    work_queue.appendleft(child_te.oid)

    def _copy_tree_to_bat_if(self, tree):
        """If we have not yet recorded this tree to our bat_gfunzip,
        do so now.

        Accepts either a pygit2.Tree or a str tree_sha1.

        Return True if copied, False if already there, no need to copy.
        """
                        # Already stored? Nothing more to do.
        tree_sha1_str = TreeStore.to_sha1_str(tree)
        if self._tree_store.contains(tree):
            debug3(LOG, "_copy_tree_to_bat_if() already has {}"
                  , p4gf_util.abbrev(tree_sha1_str))
            return False
        self._byte_ct += self._bat_gfunzip.copy_tree(tree_sha1_str)
        self._tree_store.add(tree)
        debug3(LOG, "_copy_tree_to_bat_if() copied      {}"
               , p4gf_util.abbrev(tree_sha1_str))
        return True

    def _gfmark_to_submitted_change_num(self, gfmark):
        """Translate ":1" to integer(5), using the offset
        returned by 'p4 unzip' of our commit_gfunzip.
        """
        return self._gfmark_offsets.to_change_num(gfmark)

    def _fill_branch_head_otl(self):
        """For each pushed Git branch, find its commit in our
        ObjectTypeHistory.
        """
        branch_head_otl = []
        for prt in self._prl.set_heads:
            branch = self.ctx.git_branch_name_to_branch(prt.git_branch_name())
            if not branch:
                raise RuntimeError(_("Cannot find branch for {prt}").format(prt=prt))
            ot = self._othistory.sha1_branch_to_ot(
                      sha1      = prt.new_sha1
                    , branch_id = branch.branch_id )
            if not ot:
                raise RuntimeError(_("Cannot find changelist for {prt}")
                                   .format(prt=prt))
            branch_head_otl.append(ot)
        self._branch_head_otl = branch_head_otl

    def _set_p4keys(self):
        """Set all our index and last-copied keys."""
                        # Accumulate each changelist's
                        # "git-fusion-index-branch-" key/value pair.
        key_value   = {}

                        # Accumulate the max changelist number for each branch.
                        # Might as well do this while we're iterating through
                        # OTHistory and save ourselves a separate loop.
        last_copied = {}
        for ot in self._othistory.ot_iter():
                        # Why copy()? OTHistory still has ":123" gfmarks. We
                        # need integer submitted changelist numbers: int(129)
                        # But I really hate code that modifies contents of a
                        # collection as a side effect. So copy() until
                        # profiling says this is too expensive.
            otn = copy.copy(ot)
            otn.change_num             = self._gfmark_to_submitted_change_num(
                                             ot.change_num)
            (key_name, value)          = otn.to_index_key_value()
            key_value[key_name]        = value
            otprev = last_copied.get(otn.branch_id)
            if (not otprev) or (otprev.change_num < otn.change_num):
                last_copied[otn.branch_id] = otn

                        # Now that we have the highest changelist's ObjectType
                        # for each branch, we can add them to our bulk-update
                        # dict.
        for otn in last_copied.values():
            (key_name, value) = otn.to_index_last_key_value()
            key_value[key_name] = value

                        # last-copied-changelist-number, others.
        max_cn_int = max([otn.change_num for otn in last_copied.values()])
        max_cn_key = P4Key.calc_last_copied_change_p4key_name(
                        self.ctx.config.repo_name, p4gf_util.get_server_id())
        key_value[max_cn_key] = str(max_cn_int)

        P4Key.set_many(self.ctx, key_value)

    def _submit_config_files(self):
        """ 'p4 add/edit + submit' our config and config2 file,
        if we have any and they have changes worth writing.

        We cannot easily fit this file into our 'p4 unzip' payloads, because it
        might already exist in Perforce. 'p4 unzip' can only modify existing
        files if we first know the db.rev info for  that existing rev, and I
        don't want to pay a 'p4 fstat' cost just to avoid a later 'p4 submit'.

        Bonus: reuse existing code for Branch.add_to_config() and
        RepoConfig.write_repo2_if().

        Code adapted from GitMirror.add_branch_config2() and
        _add_config2_branch_defs_to_p4()

        Writes config(1) only if new depot branches defined.
        """
        p4gf_gitmirror.write_config1(self.ctx)
        branch_list_config_2 = [b for b in self.ctx.branch_dict().values()
                                if b.is_worthy_of_config2()]
        for b in p4gf_branch.ordered(branch_list_config_2):
            LOG.debug2("building p4gf_config2: add branch\n{}".format(b))
            b.add_to_config(self.ctx.repo_config.repo_config2)
        self.ctx.repo_config.write_repo2_if(self.ctx.p4gf)

    def _curr_head_ot(self, branch_id):
        """Return the most recent OT recorded for this branch_id via
        _set_curr_head_ot().
        """
        return self._branch_id_to_curr_head_ot.get(branch_id)

    def _set_curr_head_ot(self, ot):
        """Return the most recent OT recorded for this branch_id via
        _set_curr_head_ot().
        """
        self._branch_id_to_curr_head_ot[ot.branch_id] = ot

    def _create_integ_src_otl(self, desc_info, dest_branch_id):
        """Return a list of zero or more parent object types, one for each
        parent from some branch other than current branch.

        Return empty list if not merging from any other branch.
        """
        integ_src_otl = []
        for par_sha1 in desc_info.parents:
            otl = self._othistory.sha1_to_otl(par_sha1)
            for ot in otl:
                if ot.branch_id == dest_branch_id:
                    continue
                integ_src_otl.append(ot)
        return integ_src_otl

    @lru_cache(maxsize = 1)
    def _masterish(self):
        """Return the most equal branch in our config."""
        return self.ctx.most_equal()

    def _commit_gfunzip_can_accept(self, rev_ct):
        """Is the current commit_gfunzip archive is getting full?

        To limit Fast Push's memory requirements on the Perforce server,
        limit any individual commit_gfunzip archve to N db.rev records.
        """
                        # Empty zip archives can always accept their first
                        # commit, even if that commit exceeds our max.
        if self._commit_gfunzip_rev_ct == 0:
            return True

        want = self._commit_gfunzip_rev_ct + rev_ct
        return want <= self._max_rev_per_commit_gfunzip

    def _rotate_commit_gfunzip(self, next_gfmark):
        """Close the current commit_unzip and open a new one."""
        self._close_commit_gfunzip()
        self._open_commit_gfunzip()
        self._commit_gfunzip_begin_gfmark = next_gfmark

    def _is_submodule_te(self, commit_sha1, gwt_path):
        """Is the given gwt_path a 160000-mode tree entry at the
        given commit?

        Ignores .gitmodule, we're just looking for 160000-mode tree entries
        here.

        Okay to call with None
        """
        try:
            commit = self.ctx.repo[commit_sha1]
            mode   = commit.tree[gwt_path].filemode
            LOG.debug3("is_submodule_te() {} {} mode={}"
                       .format( p4gf_util.abbrev(commit_sha1)
                              , gwt_path
                              , mode ))
            return mode == FileModeInt.COMMIT
        except (KeyError, AttributeError) as e:
            LOG.debug3("_is_submodule_te() {} {} not found: {}"
                       .format( p4gf_util.abbrev(commit_sha1)
                              , gwt_path
                              , e ))
            # No commit (we permit input commit_sha1=None) or tree entry (we
            # expect paths that do not exist).
        return False


# End class FastPush
# ----------------------------------------------------------------------------

class ObjectTypeHistory:
    """Commit sha1/Branch ID/Changelist number store.

    Each Git commit is assigned to one or more branches, and given a
    gfmark ":123" which acts as a changelist number until `p4 unzip`
    assigns a true changelist number.

    Same data that Git Fusion stores
    under //.git-fusion/objects/{repo}/commits/...
    as part of gitmirror.

    This store fits in memory (really!). It's about the same size as
    the branch Assigner's assign_dict().
    """
    def __init__(self):
        self._store = defaultdict(list)
        self._ct    = 0

                        # Nothing to do with ObjectTypeHistory, here solely
                        # because ObjectTypeHistory impersonates branch
                        # Assigner for PreflightChecker.
        self.have_anonymous_branches = False

    def sha1_to_otl(self, sha1):
        """Return a list of fake ObjectType instances recorded with the
        requested sha1.
        """
        return self._store[sha1]

    def add(self, ot):
        """Remember a fake ObjectType instance."""
        self._store[ot.sha1].append(ot)
        self._ct += 1

    def sha1_branch_to_ot(self, sha1, branch_id):
        """Return exactly one matching OT, or None."""
        l = self._store[sha1]
        for ot in l:
            if ot.branch_id == branch_id:
                return ot
        return None

    def __len__(self):
        return self._ct

    def ct(self):       # pylint:disable=invalid-name
        """How many Romans?"""
        return self._ct

    def ot_iter(self):
        """Iterate through all ObjectType instances."""
        for v in self._store.values():
            yield from v

    def to_dict_list(self):
        """Convert to a flat list of pickle-friendly dicts."""
        return [ ot.to_dict()
                 for ot in self.ot_iter() ]

    @staticmethod
    def from_dict_list(l):
        """Inflate from a list of pickle-friendly dicts."""
        oth = ObjectTypeHistory()
        for d in l:
            oth.add(ObjectType.from_dict(d))
        return oth

    # -- branch Assigner API replacement -------------------------------------

    def is_assigned(self, sha1):
        """Do we have at least one branch assigned for this commit?"""
        return sha1 in self._store

    def branch_id_list(self, sha1):
        """Return a list of branch_ids assigned to a commit."""
        return [ot.branch_id for ot in self._store[sha1]]

# end class ObjectTypeHistory
# ----------------------------------------------------------------------------

class _FPMarks:
    """Replacement for G2P.fast_export_marks.

    Expects an instance of p4gf_fastexport_marks.Marks
    """
    def __init__(self, gfe_mark_to_sha1):
        self.gfe_mark_to_sha1 = gfe_mark_to_sha1

    def get_commit(self, mark):
        """Return the sha1 that corresponds with mark."""
        return self.gfe_mark_to_sha1[mark]

    @staticmethod
    def set_head(_prt_ref):
        """Stub to squelch pylint warnings from on our
        PreflightChecker.check_prt_and_commits(), which never is invoked fakey
        _FPMarks replacement.
        """
        pass

# end class _FPMarks
# ----------------------------------------------------------------------------

# What to use as an integ source. Returned by _integ_src().
_IntegSource = namedtuple("_IntegSource", [ "depot_path"
                                          , "start_rev_int"
                                          , "end_rev_int" ])

# end class _FPMarks
# ----------------------------------------------------------------------------

class TreeStore:
    """A BigStore set of all trees copied to the bat_gfunzip.

    Keys are byte-array not string.
    """
    def __init__(self, file_path, store_how):
                        # pylint:disable=invalid-name
        self._file_path = file_path
        if store_how == p4gf_config.VALUE_FAST_PUSH_WORKING_STORAGE_DICT:
            self._s = set()
        elif store_how == p4gf_config.VALUE_FAST_PUSH_WORKING_STORAGE_SQLITE_MEMORY:
            self._s = p4gf_squee_value.SqueeValueDict(file_path)
        elif store_how == p4gf_config.VALUE_FAST_PUSH_WORKING_STORAGE_SQLITE_SINGLE_TABLE:
            self._s = p4gf_squee_value.SqueeValue(file_path)
        else:
            raise RuntimeError(_("Unsupported store_how {store_how}")
                               .format(store_how=store_how))

    def contains(self, sha1):
        """Already stored?"""
        return TreeStore.to_key(sha1) in self._s

    def add(self, sha1):
        """Already stored?"""
        key = TreeStore.to_key(sha1)
        return self._s.add(key)

    @staticmethod
    def to_key(tree):
        """Convert pygit2.Tree or sha1 to the compact form we use as keys."""
        if isinstance(tree, bytes):
            return tree
        elif isinstance(tree, str):
            return binascii.a2b_hex(tree)

        sha1_str = p4gf_pygit2.object_to_sha1(tree)
        return binascii.a2b_hex(sha1_str)

    @staticmethod
    def to_sha1_str(tree):
        """Convert a pygit2.Tree or sha1 to a str form of sha1."""
        if isinstance(tree, str):
            return tree
        elif isinstance(tree, bytes):
            return binascii.b2a_hex(tree)
                        # Will raise an AttributeError if not a
                        # pygit2.Tree or TreeEntry.
        as_str = p4gf_pygit2.object_to_sha1(tree)
        return as_str

# end class TreeStore
# ----------------------------------------------------------------------------

class GFMarkOffsets:
    """A class that tracks multiple gfmark/submitted_change_num offset pairs,
    and can return the correct submitted changelist number for any gfmark.
    """
    def __init__(self):
        self._gf        = []  # int(123) not ":123"
        self._cn_offset = []  # if 127 corresponds to gfmark ":123", then int(4)

    def append(self, gfmark, submitted_change_num):
        """Remember for later calls to to_change_num()."""
        gf = _gfmark_to_int(gfmark)
        cn = int(submitted_change_num)
        cn_offset = cn - gf
        self._gf       .append(gf)
        self._cn_offset.append(cn_offset)
        LOG.getChild("gfmark_offsets").debug("append {} + {} = {}"
                .format(gfmark, cn_offset, submitted_change_num))

    def to_change_num(self, gfmark):
        """Return the submitted changelist number that goes with gfmark."""
        gf = _gfmark_to_int(gfmark)
        index = bisect.bisect_right(self._gf, gf) - 1
        cn_offset = self._cn_offset[index]
        submitted_change_num = gf + cn_offset
        LOG.getChild("gfmark_offsets").debug3("to_change_num {} + {} = {}"
                .format(gfmark, cn_offset, submitted_change_num))
        return submitted_change_num

# end class TreeStore
# ----------------------------------------------------------------------------


# -- module-wide -------------------------------------------------------------

# Struct to hold changelists assigned to first changelist on a new
# depot branch: one for the ghost that populates it, and then one
# to hold the actual Git commit.
PopulateCommit = namedtuple("PopulateCommit",
    [ "branch_id"               # Branch being populated
    , "ghost_of_sha1"           # Git first-parent commit that we'll ghost.
    , "ghost_of_change_num"     # gfmark assigned to parent we're ghosting
    , "ghost_of_branch_id"      # branch that holds @ghost_of_change_num
    , "ghost_gfmark"            # GFMark assigned to ghost that populates this
                                #   branch, to hold a parent commit of "sha1"
    , "ghost_precedes_gfmark"   # GFMark assigned to hold first "real"
                                #   changelist on this branch, to hold
                                #   commit "sha1"
    , "ghost_precedes_sha1"     # First "real" commit on this branch
    ])

# Structure to hold a .gitmodule entry, to help _mark_submodule_deletes()
# detect deletes.
GitmodulesEntry = namedtuple("GitmodulesEntry", ["gwt_path", "sha1"])

def _is_configured(ctx):
    """Is config setting enable-fast-push yes/true?"""
    return ctx.repo_config.getboolean(
                  p4gf_config.SECTION_GIT_TO_PERFORCE
                , p4gf_config.KEY_ENABLE_FAST_PUSH
                , fallback = True )


def _is_repo_view_empty(ctx):
    """No competing changelists in the repo view."""
                    # +++ No changes at all in the union view?
                    #     We know we're empty, no need to check
                    #     each individual branch.
    if ctx.union_view_empty():
        return True

                    # There are changes in the union view, but
                    # those might be excluded by each individual
                    # branch view. Check each individual branch.
    for b in ctx.branch_dict().values():
        with ctx.switched_to_branch(b):
            r = ctx.p4run('changes', '-m1', ctx.client_view_path())
            if r:
                return False

                    # Each branch view empty, therefore repo is empty.
    return True


def _server_supports_unzip(ctx):
    """Is the Perforce server 15.1 or later?"""
    version_string = ctx.server_version
    assert version_string   # Called before ctx.connect()
                            # fills in server_version?

    d = p4gf_version_3.parse_p4d_version_string(version_string)
    year_sub_string = d['release_year'] + '.' + d['release_sub']
    year_sub = float(year_sub_string)
    patch_level = int(d['patchlevel'])

    if year_sub < 2015.1:
        return False
    if year_sub == 2015.1:
                        # @1028542 is not sufficient, lacks @1022853's support
                        # for 'p4 unzip -fIR'
                        # @1038654 was the first internal p15.1 build
                        # to include 'p4 unzip -fIR'.
        return 1038654 <= patch_level
    if 2015.2 <= year_sub:
        return True
    LOG.error("Unknown P4D version: {}".format(version_string))
    return False


def _pickle_file_abspath(repo_name):
    """Generate the name of the packet file for the given repo.

    :type repo: str
    :param repo: name of repository.

    :return: path and name for packet file.
    """
    file_name = NTR('push-data.pickle').format(repo=repo_name)
    file_path = os.path.join(_calc_persistent_dir(repo_name)
                       , file_name)
    return os.path.abspath(file_path)


def _gfe_to_p4_file_action(p4_exists, git_action):
    """Translate git-fast-export actions M/T/D to Perforce
    file actions 'add', 'edit', 'delete'.
    """
                        ### copy/rename support here, too.

    if git_action == 'M' and not p4_exists:
        return p4gf_p4dbschema.FileAction.ADD
    else:
        return { 'A' : p4gf_p4dbschema.FileAction.ADD
               , 'M' : p4gf_p4dbschema.FileAction.EDIT
               , 'D' : p4gf_p4dbschema.FileAction.DELETE }.get(git_action)


def _calc_persistent_dir(repo_name):
    """Return a directory name where we can create files that will
    outlive our process. Not a temporary directory.

    ~/.git-fusion/views/{repo}/fast_push/

    Does not ensure such a directory exists. Do that elsewhere.
    """
    return os.path.join(p4gf_const.P4GF_HOME, "views", repo_name, "fast_push")


def _gfmark_to_int(gfmark):
    """Convert ":123" to int(123)."""
    return int(gfmark[1:])


def _is_gfmark(gfmark):
    """Is this a gfmark string?"""
    return isinstance(gfmark, str) and gfmark.startswith(":")


def _sanitize_p4filetype(p4filetype_str):
    """Git Fusion supports a subset of p4filetypes.
    Replace any unsupported types with "binary".
    """
    if not p4filetype_str:
        return p4filetype_str

    base_mods = p4gf_p4filetype.to_base_mods(p4filetype_str)

                        # Cannot apply type 'symlink' via typemap. Only if Git
                        # says it's a symlink.
    if base_mods[0] == "symlink":
        base_mods[0] = ''

    if (    base_mods[0]
        and base_mods[0] not in p4gf_p4dbschema.FileType.SUPPORTED_P4FILETYPES):
        base_mods[0] = "binary"
        return p4gf_p4filetype.from_base_mods(base_mods[0], base_mods[1:])
    else:
        return p4filetype_str


def _sanitize_mods(base_mods):
    """Fast Push controls the storage format:
    force ON   +C  compressed file per revision (but not here, forced on elsewhere)
    force off  +D  one file with RCS deltas
    force off  +F  uncompressed file per revision (a future version of
                        Git Fusion should use this for JPGs and such)
    force off  +X  archive trigger

    SURPRISE: Modifies collection base_mods in-place!
    """
    for c in ['D', 'F', 'X']:
        if c in base_mods:
            base_mods.pop(base_mods.index(c))
    return base_mods

def debug3(log, msg, *arg, **kwarg):
    """If logging at DEBUG3, do so. If not, do nothing."""
    if log.isEnabledFor(logging.DEBUG3):
        log.debug3(msg.format(*arg, **kwarg))
#	Change	User	Description	Committed
#2	18310	rb	Merging results of upgrade to 2016.1.
#1	17211	rb	Adding libexec/*.py from /opt/perforce/git-fusion package helix-git-fusion/unknown,now 2015.4-1304041~trusty on Ubuntu 14.04 LTS server