#!/usr/bin/env python # # Copyright (C) 2006 Robey Pointer # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # # bzr2p4: # # given a folder under bazaar version control, sync up a corresponding folder # under perforce control so that each bazaar revision has an equivalent # revision under perforce. # # some bazaar revisions need to be split up into 2 perforce revisions, since # perforce is unable to cope with a file rename *and* content change in the # same revision. in that case the renames will happen as a single commit, and # the changes as a follow-up commit, with the comments marked to indicate that # they're part of the same bazaar revision. # # after the first sync, temporary files are stored in the target folder to # remember the source folder, last revision sync'd, and a list of all the # revisions sync'd so far. (the latter is necessary because bazaar retains # the individual revisions of a merged branch, which we try to preserve by # turning them into individual revisions in perforce. it might be a better # to turn merges into a single "lump" commit...) # # # TODO: # - command-line option to scan the p4 history for a folder and rebuild the # bzr2p4 config from that (what's been sync'd etc) # - handle the reverse case of turning p4 revisions into bzr revisions # import base64 import logging import marshal import os import pickle import re import subprocess import sys import textwrap import time from StringIO import StringIO from optparse import OptionParser if os.environ.get('BZRPATH', None) is not None: sys.path.append(os.environ['BZRPATH']) try: import bzrlib except ImportError: sys.stderr.write('\n') sys.stderr.write('Can\'t find bzrlib on the python path.\n') sys.stderr.write('Try putting it in either PYTHONPATH or BZRPATH.\n') sys.stderr.write('\n') sys.exit(1) import bzrlib.branch import bzrlib.diff import bzrlib.revision import bzrlib.textfile P4_EXE = 'p4' #PATCH_EXE = 'patch' log = logging.getLogger('bzr2p4') # This code is dead, I believe. def patch(filename, diff): args = [ PATCH_EXE, '--forward', '-s', '-t', filename ] try: log.debug('%r', args) process = subprocess.Popen(args, bufsize=-1, stdin=subprocess.PIPE, stdout=None, stderr=None) process.stdin.write(diff) process.stdin.close() ret = process.wait() if ret != 0: raise Exception('patch returned %d' % (ret,)) except Exception, e: log.error('Exception executing (patch) %s: %s', ' '.join(args), e) raise def p4(*in_args, **kw): """ execute the string cmd as a perforce command, and return the output as an array of hashes. no interpretation of the results is done. any exceptions on popen are reraised. """ ret = [] if kw.get('raw', False): args = [ P4_EXE ] raw = True else: args = [ P4_EXE, '-G' ] raw = False args.extend(list(in_args)) if 'stdin' in kw: stdin_data = kw['stdin'] stdin = subprocess.PIPE else: stdin = None try: log.debug('%r', args) if stdin is not None: log.debug('stdin = %r', stdin_data) # p4 uses $PWD instead of the process's working directory, on # Cygwin at least os.environ['PWD'] = os.getcwd() process = subprocess.Popen(args, bufsize=-1, stdin=stdin, stdout=subprocess.PIPE, stderr=None) if stdin is not None: process.stdin.write(stdin_data) process.stdin.close() if raw: ret.append(process.stdout.read()) else: while True: try: ret.append(marshal.load(process.stdout)) except EOFError, e: break # TODO: Remove this hack. I suspect it is necessary only # because the coding of EOF differs between Windows and # Cygwin. except ValueError, e: break process.stdout.close() retcode = process.wait() except Exception, e: log.error('Exception executing (p4) %s: %s', ' '.join(args), e) raise if (0 != retcode): log.error('Error executing p4: ' + str(retcode)) log.error(ret) raise Exception('Error executing p4: ' + str(retcode)) return ret def p4_where(path): out = p4('where', path) return out[0]['data'].split(' ')[0] # TODO: The path argument is not used. def make_p4_changelist(path, rev, comment=None): description = rev.message if not isinstance(description, list): description = description.split('\n') if (len(description) > 1) and (description[-1] == ''): description = description[:-1] if len(description) == 1: # robey likes to type really long -m commit lines; wrap them description = textwrap.wrap(description[0], 70) # revision_id might be unicode, so can't use it directly encoded_revid = textwrap.wrap(base64.encodestring(rev.revision_id.encode('utf-8')), 55) # add special notes: datetime = time.strftime('%d %b %Y %H:%M:%S', time.localtime(rev.timestamp)) description.append('') description.append('# bazaar revision import') description.append('# commit by: %s' % (rev.committer,)) description.append('# date/time: %s' % (datetime,)) description.append('# rev id: %s' % (encoded_revid[0],)) for line in encoded_revid[1:]: description.append('# : %s' % (line,)) if comment is not None: description.append('# ' + comment) query = { 'Change': 'new', 'Description': '\n'.join(description) + '\n' } print 'make_p4_changelist:', query print 'make_p4_changelist:', marshal.dumps(query, 0) out = p4('change', '-i', stdin=marshal.dumps(query, 0)) m = re.match(r'Change (\d+) created', out[0]['data']) if m is None: log.debug('Bad response: %r', out) raise Exception('Can\'t parse changelist #.') return int(m.group(1)) def _are_files_opened(delta): return not ([] == delta.added and [] == delta.removed and [] == delta.renamed and [] == delta.kind_changed and [] == delta.modified) def get_revision_path(branch, old_rev_id, new_rev_id): """ get the list of revision ids to traverse from one revision to another. """ log.debug('get revision path: %r => %r', old_rev_id, new_rev_id) base_ancestry = set(branch.repository.get_ancestry(old_rev_id)) return [r for r in branch.repository.get_ancestry(new_rev_id) if r not in base_ancestry] def tree_lines(tree, file_id): if not file_id in tree: return [] tree_file = bzrlib.textfile.text_file(tree.get_file(file_id)) return tree_file.readlines() def get_patch(file_id, old_tree, new_tree, old_path, new_path): """ @raise errors.BinaryFile: if it's not a text file """ old_lines = tree_lines(old_tree, file_id) new_lines = tree_lines(new_tree, file_id) buffer = StringIO() bzrlib.diff.internal_diff(old_path, old_lines, new_path, new_lines, buffer) return buffer.getvalue() class DeltaWorker (object): def __init__(self, branch, rev_id, last_rev_id): self.branch = branch self.rev_id = rev_id self.last_rev_id = last_rev_id self.target = '.' self.get_delta() def get_delta(self): """ for a specific revision on a branch, get the Delta object describing the changes in that revision, and save copies of the prior and current revision tree. """ self.rev = self.branch.repository.get_revision(self.rev_id) self.rev_tree = self.branch.repository.revision_tree(self.rev_id) if self.last_rev_id is None: base_id = bzrlib.revision.NULL_REVISION else: base_id = self.last_rev_id self.base_tree = self.branch.repository.revision_tree(base_id) self.delta = self.rev_tree.changes_from(self.base_tree, want_unchanged=True, include_root=True) print self.delta def set_target(self, path): self.target = path def _two_stage(self): a_renamed_is_also_modified = ( len([r for r in self.delta.renamed if r[4] == True]) > 0) renamed = set([r[0] for r in self.delta.renamed]) added = [a[0] for a in self.delta.added] a_renamed_is_also_added = (len(renamed.intersection(added)) > 0) return a_renamed_is_also_modified or a_renamed_is_also_added def process(self): # perforce can't handle renaming a file AND modifying it at the same # time. so if we have any of those, we need to break the commit into # two stages. self.two_stage = self._two_stage() if self.two_stage: self.process_renames_only() self.process_all(stage=2) else: self.process_all() def process_renames_only(self): self.changelist = make_p4_changelist(self.target, self.rev, comment='(phase 1/2)') for r in self.delta.renamed: self.handle_rename(*r, **dict(stage=1)) print(p4('describe', '-du', str(self.changelist), raw=True)) p4('submit', '-c', str(self.changelist)) log.info('Submitted as p4 change %d.', self.changelist) def process_all(self, stage=0): if stage == 2: comment = '(phase 2/2)' else: comment = None self.changelist = make_p4_changelist(self.target, self.rev, comment=comment) for a in self.delta.added: self.handle_add(*a) for r in self.delta.removed: self.handle_remove(*r) for r in self.delta.renamed: self.handle_rename(*r, **dict(stage=stage)) for m in self.delta.modified: self.handle_modify(*m) self._ensure_perforce_change_has_opened_files() print(p4('describe', '-du', str(self.changelist), raw=True)[0]) p4('submit', '-f', 'submitunchanged', '-c', str(self.changelist)) log.info('Submitted as p4 change %d.', self.changelist) def _ensure_perforce_change_has_opened_files(self): # Bazaar merges have their own revision in addition to the # constituent revisions of the merge. The merge revision can have # no differences. Maybe it can also have changes of its own. The # case of no changes causes p4 submit to fail. if (not _are_files_opened(self.delta) and len(self.delta.unchanged) >= 2 and 'file' == self.delta.unchanged[1][2]): # Hack: Perforce doesn't support changelists with no files # changed. Open one file for edit so that we don't lose the # current Bazaar revision in migration. p4('edit', '-c', str(self.changelist), os.path.join(self.target, self.delta.unchanged[1][0])) def handle_add(self, path, id, kind): log.debug('add %r kind %r', path, kind) orig_dir = os.getcwdu() os.chdir(self.target) filename = path try: if kind == 'directory': if (not self._is_bzr_tree_root(path, id) and not os.path.isdir(filename)): os.mkdir(filename) return if kind != 'file': raise Exception('Don\'t know how to add objects of type %r' % (kind,)) print 'handle_add:', filename f = open(filename, 'w') f.write(''.join(tree_lines(self.rev_tree, id))) f.close() if self.rev_tree.is_executable(id): os.chmod(filename, 0664) p4('add', '-c', str(self.changelist), filename) finally: print 'popping directory from', self.target, 'to', orig_dir os.chdir(orig_dir) def _is_bzr_tree_root(self, path, id): return (path == '' and id == 'TREE_ROOT') def handle_remove(self, path, id, kind): log.debug('remove %r kind %r', path, kind) orig_dir = os.getcwdu() os.chdir(self.target) filename = path try: if kind == 'directory': try: os.rmdir(filename) except OSError, e: log.debug('error rmdir %r: %s', filename, e) return if kind != 'file': raise Exception('Don\'t know how to remove objects of type %r' % (kind,)) p4('delete', '-c', str(self.changelist), filename) finally: os.chdir(orig_dir) def handle_rename(self, old_path, new_path, id, kind, text_modified, meta_modified, stage=0): log.debug('rename %r => %r, kind %r', old_path, new_path, kind) orig_dir = os.getcwdu() os.chdir(self.target) old_filename = old_path new_filename = new_path try: if kind == 'directory': if (stage == 0) or (stage == 1): p4('integrate', '-c', str(self.changelist), old_filename + '/...', new_filename + '/...') p4('delete', '-c', str(self.changelist), old_filename + '/...') # ignore "modify" portion of a directory rename return if kind != 'file': raise Exception('Don\'t know how to rename objects of type %r' % (kind,)) if (stage == 0) or (stage == 1): # move the file print(p4('integrate', '-f', '-c', str(self.changelist), old_filename, new_filename)[0]) print(p4('delete', '-c', str(self.changelist), old_filename)[0]) if (stage == 0) or (stage == 2): self.handle_modify(new_path, id, kind, text_modified, meta_modified, old_path=old_path) finally: os.chdir(orig_dir) def handle_modify(self, path, id, kind, text_modified, meta_modified, old_path=None): log.debug('modify %r kind %r', path, kind) orig_dir = os.getcwdu() os.chdir(self.target) filename = path try: if kind != 'file': raise Exception('Don\'t know how to modify objects of type %r' % (kind,)) if old_path is None: old_path = path p4('edit', '-c', str(self.changelist), filename) if not text_modified: log.debug('No text modification.') return f = open(filename, 'w') f.write(''.join(tree_lines(self.rev_tree, id))) f.close() finally: os.chdir(orig_dir) class Config (object): def __init__(self, path): self._path = path def load(self): f = open(self._path, 'r') for line in f: m = re.match(r'([^#=]+)=(.*)$', line) if m is not None: key = m.group(1) val = m.group(2) if len([c for c in val if ord(c) > 127]) > 0: val = val.decode('utf-8') setattr(self, m.group(1), m.group(2)) f.close() def save(self): f = open(self._path, 'w') for key in self.__dict__: if key.startswith('_'): continue val = getattr(self, key) if not isinstance(val, (str, unicode)): continue if isinstance(val, unicode): val = val.encode('utf-8') f.write('%s=%s\n' % (key, val)) f.close() description = """\ bzr2p4: sync a perforce tree from a bazaar2 branch. if no target folder is given, the current directory is assumed. after one successful sync, the source folder will be saved in a state file in the target folder, and can be omitted on future runs. """ def parse_args(): parser = OptionParser('usage: %prog [options] [[source-folder] target-folder]', description=description) parser.add_option('-n', '--limit', dest='limit', default=None, help='limit number of migrated revisions to N') parser.add_option('-l', '--list', action='store_true', dest='show_list', default=False, help='show list of missing revisions instead of syncing') parser.add_option('--logfile', action='store', dest='logfile', default=None, help='override location of debug log') parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, help='send everything going to the logs to stdout also') parser.add_option('-q', '--quiet', action='store_true', dest='quiet', default=False, help='send nothing to stdout unless an error occurs') options, args = parser.parse_args() options.source_folder = None options.target_folder = '.' if len(args) > 2: parser.error('no more than two folders can be used') if len(args) == 1: options.target_folder = args[0] elif len(args) == 2: options.source_folder = args[0] options.target_folder = args[1] config = Config(os.path.join(options.target_folder, '.bzr2p4')) try: config.load() except IOError: pass if (options.source_folder is None) and (config is None): parser.error('Missing source-folder, and target folder has no saved config.') # clean up the config a little if options.source_folder is not None: config.source_folder = os.path.realpath(options.source_folder) options.target_folder = os.path.realpath(options.target_folder) if getattr(config, 'last_revid', None) is None: config.last_revid = bzrlib.revision.NULL_REVISION if getattr(config, 'logfile', None) is None: config.logfile = os.path.join(options.target_folder, '.bzr2p4.log') if options.logfile is not None: config.logfile = options.logfile if getattr(config, 'revisions_db', None) is None: config.revisions_db = os.path.join(options.target_folder, '.bzr2p4.revdb') try: f = open(config.revisions_db, 'rb') revisions_seen = pickle.load(f) f.close() except IOError: revisions_seen = set() if getattr(config, 'source_folder', None) is None: parser.error('No source folder given or known.') return options, config, revisions_seen def save_revisions(config, revisions_seen): f = open(config.revisions_db, 'wb') pickle.dump(revisions_seen, f) f.close() def setup_log(options, config): log.setLevel(logging.DEBUG) h = logging.StreamHandler(open(config.logfile, 'a')) h.setLevel(logging.DEBUG) h.setFormatter(logging.Formatter('%(levelname)-.3s [%(asctime)s.%(msecs)03d] %(message)s', '%Y%m%d-%H:%M:%S')) log.addHandler(h) h = logging.StreamHandler(sys.stdout) if options.verbose: h.setLevel(logging.DEBUG) elif options.quiet: h.setLevel(logging.ERROR) else: h.setLevel(logging.INFO) h.setFormatter(logging.Formatter('%(levelname)s: %(message)s ')) log.addHandler(h) def show_list(branch, path, limit, revisions_seen): print if len(path) == 0: print 'No missing revisions.' print return print 'Missing revisions: %d' % (len(path),) print count = 0 for revid in path: if revid in revisions_seen: # skip continue count += 1 rev = branch.repository.get_revision(revid) datetime = time.strftime('%d-%b %H:%M', time.localtime(rev.timestamp)) message = re.sub(r'[^\x20-\x7e]', '', rev.message) print ' %d. %s - %-.20s - %-.30s' % (count, datetime, rev.committer, message) if count >= limit: break print return def main(): options, config, revisions_seen = parse_args() setup_log(options, config) limit = options.limit if limit is None: limit = 1L << 64 else: limit = int(limit) branch = bzrlib.branch.Branch.open(config.source_folder) branch.lock_read() try: graph = branch.repository.get_graph() base_revision = graph.find_unique_lca(branch.last_revision(), config.last_revid) if base_revision == bzrlib.revision.NULL_REVISION: base_revision = None path = get_revision_path(branch, base_revision, branch.last_revision()) if options.show_list: show_list(branch, path, limit, revisions_seen) config.save() return count = 0 for revid in path: if revid in revisions_seen: log.info('(Skipping revision: %r)', revid) continue count += 1 log.info('Applying revision: %r', revid) worker = DeltaWorker(branch, revid, config.last_revid) worker.set_target(options.target_folder) worker.process() config.last_revid = revid revisions_seen.add(revid) # save in case we crash: save_revisions(config, revisions_seen) config.save() if count >= limit: break log.info('Done!') save_revisions(config, revisions_seen) config.save() finally: branch.unlock() if __name__ == '__main__': main() sys.exit(0)