#! /usr/bin/env python3.3 """BranchFilesCache.""" from collections import deque, namedtuple import copy import logging import p4gf_util LOG = logging.getLogger(__name__) _MAX = 10 # Does this cache help? Barely. # # Pusing android-libhardware's 768 commits: # # cache miss hit wallclock # size count count duration # ----- ----- ----- ----------- # 10000 1185 1018 84s 83s 82s avg=83.0s # 100 1208 995 85s 85s 83s avg=84.3s # 10 1358 845 81s 85s 82s avg=82.7s tiny 3% savings # 0 2203 0 85s 86s 86s avg=85.7s # # The results are noisy enough that even that 3% savings is suspect. # But avoiding 845 out of 2203 'p4 files' calls to the server? Yeah, # measurable or not, that's worth at least a 10-deep cache. class BranchFilesCache: """Generic cache of 'p4 files' in some branch at some changelist. G2PMatrix runs enough duplicate 'p4 files //branch-client/...@nn' in close temporal proximity that it could benefit from a small bounded cache. """ def __init__(self): self.cache = deque(maxlen=_MAX) # of CacheLine self._hit_ct = 0 self._miss_ct = 0 def files_at(self, ctx, branch, change_num): """Fetch files in branch at change and return result list.""" result_list = self._find(branch, change_num) if not result_list: self._miss_ct += 1 LOG.debug2('{branch}@{change} miss {ct}' .format( branch = p4gf_util.abbrev(branch.branch_id) , change = change_num , ct = self._miss_ct )) result_list = self._fetch(ctx, branch, change_num) self._insert(branch, change_num, result_list) else: self._hit_ct += 1 LOG.debug2('{branch}@{change} hit {ct}' .format( branch = p4gf_util.abbrev(branch.branch_id) , change = change_num , ct = self._hit_ct )) # Return a list of COPIES of our dicts. Calling code # was originally written to consume P4.run() results # directly, assumed it owned the results. Cheaper and # cleaner to copy here than to ask all callers to learn # about copy. # # Can't use copy.copy(): too shallow, returns a copy of # the list, pointing to our original dict elements. # copy.deepcopy() might be overkill if our dict # keys/elements are themselves collections, but I'll # live with that until memory/profiling says otherwise. # return copy.deepcopy(result_list) @staticmethod def _fetch(ctx, branch, change_num): """Run 'p4 files' and return results.""" with ctx.switched_to_branch(branch): return ctx.p4run('files', ctx.client_view_path(change_num)) def _find(self, branch, change_num): """Find a CacheLine with matching path and return its result_list. Or return None if not found. """ # Never cache results for temp branch views that lack a permanent # branch_id: a branch_id of None is used for multiple branch views. if not branch.branch_id: return None for cl in self.cache: if ( cl.branch == branch.branch_id and cl.change_num == change_num): return cl.result_list return None def _insert(self, branch, change_num, result_list): """Add a CacheLine for path + result_list. Assumes we don't already have such a line. """ self.cache.appendleft(CacheLine( branch = branch.branch_id , change_num = change_num , result_list = result_list )) CacheLine = namedtuple('CacheLine', ['branch', 'change_num', 'result_list'])