p4gf_branch_files_cache.py #2

#! /usr/bin/env python3.3
"""BranchFilesCache."""
from collections import deque, namedtuple
import copy
import logging

import p4gf_util

LOG = logging.getLogger(__name__)

_MAX = 10

# Does this cache help? Barely.
#
# Pusing android-libhardware's 768 commits:
#
# cache  miss   hit wallclock
#  size count count duration
# ----- ----- ----- -----------
# 10000  1185  1018 84s 83s 82s avg=83.0s
#   100  1208   995 85s 85s 83s avg=84.3s
#    10  1358   845 81s 85s 82s avg=82.7s  tiny 3% savings
#     0  2203     0 85s 86s 86s avg=85.7s
#
# The results are noisy enough that even that 3% savings is suspect.
# But avoiding 845 out of 2203 'p4 files' calls to the server? Yeah,
# measurable or not, that's worth at least a 10-deep cache.


class BranchFilesCache:

    """Generic cache of 'p4 files' in some branch at some changelist.

    G2PMatrix runs enough duplicate 'p4 files //branch-client/...@nn' in close
    temporal proximity that it could benefit from a small bounded cache.
    """

    def __init__(self):
        self.cache = deque(maxlen=_MAX)    # of CacheLine
        self._hit_ct  = 0
        self._miss_ct = 0

    def files_at(self, ctx, branch, change_num):
        """Fetch files in branch at change and return result list."""
        result_list = self._find(branch, change_num)
        if not result_list:
            self._miss_ct += 1
            LOG.debug2('{branch}@{change} miss {ct}'
                       .format( branch  = p4gf_util.abbrev(branch.branch_id)
                              , change  = change_num
                              , ct      = self._miss_ct ))
            result_list = self._fetch(ctx, branch, change_num)
            self._insert(branch, change_num, result_list)
        else:
            self._hit_ct += 1
            LOG.debug2('{branch}@{change} hit  {ct}'
                       .format( branch  = p4gf_util.abbrev(branch.branch_id)
                              , change  = change_num
                              , ct      = self._hit_ct ))

                        # Return a list of COPIES of our dicts. Calling code
                        # was originally written to  consume P4.run() results
                        # directly, assumed it owned the results. Cheaper and
                        # cleaner to copy here than to ask all callers to learn
                        # about copy.
                        #
                        # Can't use copy.copy(): too shallow, returns a copy of
                        # the list, pointing to our original dict elements.
                        # copy.deepcopy() might be overkill if our dict
                        # keys/elements are themselves collections, but I'll
                        # live with that until memory/profiling says otherwise.
                        #
        return copy.deepcopy(result_list)

    @staticmethod
    def _fetch(ctx, branch, change_num):
        """Run 'p4 files' and return results."""
        with ctx.switched_to_branch(branch):
            return ctx.p4run('files', ctx.client_view_path(change_num))

    def _find(self, branch, change_num):
        """Find a CacheLine with matching path and return its result_list.

        Or return None if not found.
        """
        # Never cache results for temp branch views that lack a permanent
        # branch_id: a branch_id of None is used for multiple branch views.
        if not branch.branch_id:
            return None

        for cl in self.cache:
            if (    cl.branch           == branch.branch_id
                and cl.change_num       == change_num):
                return cl.result_list
        return None

    def _insert(self, branch, change_num, result_list):
        """Add a CacheLine for path + result_list.

        Assumes we don't already have such a line.
        """
        self.cache.appendleft(CacheLine( branch      = branch.branch_id
                                       , change_num  = change_num
                                       , result_list = result_list ))

CacheLine = namedtuple('CacheLine', ['branch', 'change_num', 'result_list'])
#	Change	User	Description	Committed
#2	18310	rb	Merging results of upgrade to 2016.1.
#1	17211	rb	Adding libexec/*.py from /opt/perforce/git-fusion package helix-git-fusion/unknown,now 2015.4-1304041~trusty on Ubuntu 14.04 LTS server