#!/usr/bin/python # #Copyright (c) 2010, Perforce Software, Inc. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL PERFORCE SOFTWARE, INC. BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #******************************************************************************* # Author: Stephen Moon # Date: 1/27/10 # Last Modified: 2/08/10 # # Summary: Uses Python CSV module to import a CSV file and then generates # CSV files after sorting each column numerically and alphabetically. # A bit sophisticated dictionary of dictionary example. # #******************************************************************************* import sys,optparse,csv,string #class to encapsulate dictionary and array class DictClass(object): #initializes the data structure def __init__(self): self.data = {} #dictionary of category data self.hdata = {} #dictionary of category header self.superDict = {} #contains self.data for each company for example self.cList = [] #list of category #does a numeric comparison def cmpTwoNums(a,b): if int(a) <= int(b): return 1 else: return -1 #is a number? def isNumeric(value): return str(value).replace(".","").replace("-","").isdigit() #filter duplicate names def filterName(name,v,k): if name == k: return elif name == v: return else: return name #first break up array into strings and then output them to CSV file def array2Str2CSV(dArray,k,writer,verbose): firstArray = [] for i in dArray: if(type(i) == list): subArray= i subArray.append(k) writer.writerow(subArray) if verbose: for j in i: print "list: %s, %s" % (j,type(j)) else: firstArray.append(i) if verbose: print "str: %s, %s" % (i,type(i)) firstArray.append(k) writer.writerow(firstArray) #building the dictionary for manipulation def buildDictionary(d,file): count = 1 for row in file: if count == 1: d.cList = row d.hdata = dict(zip(d.cList,row)) else: d.data = dict(zip(d.cList,row)) count +=1 d.superDict[row[0]] = d.data return d #reverse key and value of dictionary to sort by values def swapKeyValue(d,v,v1,k): nv = [item for item in v.values() if filterName(item,v1,k)] nv.insert(0,k) if d.has_key(v1): d[v1].append(nv) else: d[v1] = nv return d #write a header at the top of each CSV file def writeHeader(writer,d,name): filteredHdata = [item for item in d.hdata.values() if filterName(item,name,d.cList[0])] filteredHdata.insert(0,d.cList[0]) filteredHdata.append(name) writer.writerow(filteredHdata) def main(): parser = optparse.OptionParser(usage="%prog [-h,-v] filename",version="%prog v0.9") parser.add_option("-v","--verbose",action="store_true",dest="verbose",help="Print debug messages to stdout") (options,args) = parser.parse_args() #by default sys.argv[1:] if len(args) != 1: parser.error("Incorrect number of arguments") #read a CSV file file = csv.reader(open(args[0], "r"),delimiter=';',quoting=csv.QUOTE_NONE) d = DictClass() #create dictClass object d = buildDictionary(d,file) #build the dictionary for later processing for name in d.cList: #name = each category ntotalDict = {} #store dictionary of numeric values ctotalDict = {} #store dictionary of character values ndict = 1 for k,v in d.superDict.items(): for k1,v1 in v.items(): if k1 == name and isNumeric(v1): ntotalDict = swapKeyValue(ntotalDict,v,v1,k) elif k1 == name and not isNumeric(v1): ctotalDict = swapKeyValue(ctotalDict,v,v1,k) #sorting the keys (numeric or non-numeric) if ntotalDict: keys = ntotalDict.keys() keys.sort(cmpTwoNums) elif ctotalDict: ndict = 0 keys = ctotalDict.keys() keys.sort() #write a CSV file for each category writer = csv.writer(open(name + '.csv',"w"),delimiter=',',quoting=csv.QUOTE_MINIMAL) print "Category: %s" % name writeHeader(writer,d,name) #break up arrays so it is better formatted for Excel import for k in keys: if ndict == 1: nArray = ntotalDict.get(k) array2Str2CSV(nArray,k,writer,options.verbose) elif ndict == 0: cArray = ctotalDict.get(k) array2Str2CSV(cArray,k,writer,options.verbose) ctotalDict = {} ntotalDict = {} if __name__ == '__main__': main()