parser.py #2

##
## Copyright (c) 2006 Jason Dillon
##
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
##
##     http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
##

##
## $Id: parser.py 60 2006-06-01 22:44:19Z user57 $
##

import re

class StreamIterator:
    def __init__(this, stream):
        this.stream = stream
        this.lineno = 0
        this.pushline = None
    
    def pushback(this, line):
        this.pushline = line
    
    def __iter__(this):
        return this
    
    def next(this):
        if this.pushline != None:
            line = this.pushline
            this.pushline = None
        
        else:
            line = this.stream.readline()
            
            if line == "":
                raise StopIteration
                
            this.lineno = this.lineno + 1
        
        return line

HEADER_MODE = 0
COMMENT_MODE = 1
AFFECTED_FILES_MODE = 2
MOVED_FILES_MODE = 3
JOB_FIXED_MODE = 4
JOB_FIXED_DESCRIPTION_MODE = 4.1
DIFFERENCES_MODE = 5

# Change 25375 by jdillon@jdillon-sanity on 2006/04/06 14:44:22
CHANGE_HEADER_RE = re.compile(r'^Change ([0-9]+) by (\S+)@(\S+) on (\S+) (\S+)$')

AFFECTED_FILES_SECTION_PREFIX = 'Affected files ...'

# ... //infrastructure/p4spam/main/pylib/p4spam/htmlmessage.py#2 edit
AFFECTED_FILE_RE = re.compile(r'^\.\.\. (.+?)#([0-9]+) (\S+)$')

MOVED_FILES_SECTION_PREFIX = 'Moved files ...'

JOBS_FIXED_SECTION_PREFIX = 'Jobs fixed ...'

# MYJOB-1 on 2006/04/17 by jdillon *closed*
JOB_FIXED_RE = re.compile(r'^(\S+) on (\S+) by (\S+) (\S+)$')

DIFFERENCES_SECTION_PREFIX = 'Differences ...'

# ==== //infrastructure/p4spam/main/pylib/p4spam/htmlmessage.py#2 (ktext) ====
DIFF_HEADER_RE = re.compile(r'^==== (.+?)#([0-9]+) \((\S+)\) ====$')

class DescriptionParser:
    def __init__(this, stream):
        this.stream = StreamIterator(stream)
        this.mode = None
    
    def parse(this):
        desc = Description()
        jobfix = None
        
        for line in this.stream:
            #this.log.debug("[%i] %s" % (this.stream.lineno, line))
            
            # Switch modes
            if this.stream.lineno == 1:
                this.mode = HEADER_MODE
            
            elif this.stream.lineno == 2:
                this.mode = COMMENT_MODE
                line = this.stream.next()
                
            elif line.startswith(AFFECTED_FILES_SECTION_PREFIX):
                this.mode = AFFECTED_FILES_MODE
                line = this.stream.next()

            elif line.startswith(MOVED_FILES_SECTION_PREFIX):
                this.mode = MOVED_FILES_MODE
                line = this.stream.next()
            
            elif line.startswith(JOBS_FIXED_SECTION_PREFIX):
                this.mode = JOB_FIXED_MODE
                line = this.stream.next()
            
            elif line.startswith(DIFFERENCES_SECTION_PREFIX):
                this.mode = DIFFERENCES_MODE
                # NOTE: Might not have another line to skip
                
            # Process modes
            if this.mode == HEADER_MODE:
                match = CHANGE_HEADER_RE.match(line)
                if match != None:
                    desc.setHeader(match.groups())
                else:
                    raise "Unable to parse change header: %s" % line
            
            elif this.mode == COMMENT_MODE:
                desc.comments.append(line)
            
            elif this.mode == AFFECTED_FILES_MODE:
                if len(line) == 1:
                    # print "Skip blank lines"
                    continue
                
                match = AFFECTED_FILE_RE.match(line)
                if match != None:
                    desc.files.append(AffectedFile(match.groups()))
                else:
                    raise "Unable to parse: '%s'" % line

            elif this.mode == MOVED_FILES_MODE:
                 # print "Skip moved file lines"
                 continue
                
            elif this.mode == JOB_FIXED_MODE:
                if len(line) == 1:
                    # print "Skip blank lines"
                    continue
                
                match = JOB_FIXED_RE.match(line)
                if match != None:
                    jobfix = desc.addJobFix(match.groups())
                    this.mode = JOB_FIXED_DESCRIPTION_MODE
                else:
                    raise "Unable to parse: '%s'" % line
            
            elif this.mode == JOB_FIXED_DESCRIPTION_MODE:
                assert jobfix != None
                jobfix.comments.append(line)
            
            elif this.mode == DIFFERENCES_MODE:
                desc.diffs = DifferencesIterator(this.stream)
                
                # Do not consume any more lines yet
                break
            
        # Pop off the last line of the comments
        if len(desc.comments) != 0:
            ##
            ## HACK: If nothing was processed, then comments will be empty,
            ##       should rally have a better way to detect failure
            ##
            desc.comments.pop()
        
        return desc

class Description:
    def __init__(this):
        this.change = None
        this.author = None
        this.client = None
        this.date = None
        this.time = None
        
        this.comments = []
        this.files = []
        this.jobs = []
        this.diffs = []
    
    def setHeader(this, groups):
        (this.change, this.author, this.client, this.date, this.time) = groups
    
    def addJobFix(this, groups):
        fix = JobFixed(groups)
        this.jobs.append(fix)
        return fix
        
    def __str__(this):
        return "%s %s@%s %s %s" % (this.change, this.author, this.client, this.date, this.time)

class JobFixed:
    def __init__(this, groups):
        (this.name, this.date, this.author, this.status) = groups
        this.comments = []
    
    def __str__(this):
        return "%s on %s %s by %s %s" % (this.name, this.date, this.author, this.status)

class AffectedFile:
    def __init__(this, groups):
        (this.path, this.rev, this.action) = groups
        this.rev = int(this.rev)
        
    def __str__(this):
        return "%s#%s %s" % (this.path, this.rev, this.action)

class DifferencesIterator:
    def __init__(this, stream):
        this.stream = stream
        this.parser = DifferenceParser(this.stream)
    
    def __iter__(this):
        return this
    
    def next(this):
        diff = this.nextDiff()
        if diff == None:
            raise StopIteration
        return diff
    
    def nextDiff(this):
        return this.parser.parse()

DIFF_INFO_PREFIX = '@@'
DIFF_ADDED_PREFIX = '+'
DIFF_REMOVED_PREFIX = '-'

class Difference:
    def __init__(this, groups):
        (this.path, this.rev, this.filetype) = groups
        this.rev = int(this.rev)
        this.lines = []
    
    def __str__(this):
        return "%s#%s (%s)" % (this.path, this.rev, this.filetype)

class DifferenceParser:
    def __init__(this, stream):
        this.stream = stream
        this.pushline = None
        this.pastfirst = False
        
    def parse(this):
        diff = None
        
        for line in this.stream:
            #this.log.debug("[%i] %s" % (this.stream.lineno, line))
            
            if line.startswith('==== '):
                if diff == None:
                    match = DIFF_HEADER_RE.match(line)
                    if match != None:
                        diff = Difference(match.groups())
                        
                        # Eat the next line
                        this.stream.next()
                        
                else:
                    # Save this line for next round and return our diff obj
                    this.stream.pushback(line)
                    
                    # Pop the last line off the parsed diff
                    diff.lines.pop()
                    
                    break
            
            elif diff != None:
                diff.lines.append(line)
        
        return diff

if __name__ == '__main__':
    import sys
    
    parser = DescriptionParser(sys.stdin)
    desc = parser.parse()
    
    # Make sure there are files
    assert len(desc.files) != 0
    
    # Make sure if there are any edits, that we see diffs
    expectDiffCount = 0
    for f in desc.files:
        if f.action in ('edit', 'integrate'):
            expectDiffCount = expectDiffCount + 1
    
    # Diffs is an iter, so parse everything first
    diffs = []
    for d in desc.diffs:
        diffs.append(d)
    
    assert expectDiffCount == len(diffs)
    
    print desc
    print
    
    for f in desc.files:
        print "    %s" % f
    print
    
    print "Comments:"
    for line in desc.comments:
        print "    %s" % line,
    print
    
    if len(diffs) == 0:
        print "NO DIFFS"
    else:
        print "Diffs (%s):" % expectDiffCount
        for diff in diffs:
            print diff
            for line in diff.lines:
                print "    %s" % line,
    print " ------ "
#	Change	User	Description	Committed
#2	7732	Matthew Janulewicz	Adding ability for P4Spam to detect and correcty categorize P4 2009.1's new 'move/add' and 'move/delete' file operations.
#1	7731	Matthew Janulewicz	Adding P4Spam 1.1 code from http://p4spam.sourceforge.net/wiki/Main_Page "P4Spam is a Perforce change review daemon which spits out sexy HTML-styled notification emails."