#!/usr/bin/env python # #Copyright (c) 2009, Perforce Software, Inc. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL PERFORCE SOFTWARE, INC. BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #******************************************************************************* # Author: Stephen Moon # Date: 1/25/10 # # Summary: Uses Python CSV module to import a CSV file and then generates # CSV files after sorting each column numerically and alphabetically. # A bit sophisticated dictionary of dictionary example. # import csv,string def cmpTwoNums(a,b): if int(a) <= int(b): return 1 else: return -1 def isNumeric(value): return str(value).replace(".","").replace("-","").isdigit() def filterName(name,k): #print "name: %s, k: %s" % (name,k) if name == k: return else: return name file = csv.reader(open( "company_data.csv", "r"),delimiter=';',quoting=csv.QUOTE_NONE) data = {} hdata = {} superDict = {} cList = [] count = 1 for row in file: if count == 1: cList = row hdata = dict(zip(cList,row)) else: data = dict(zip(cList,row)) count +=1 superDict[row[0]] = data #print superDict for name in cList: ntotalDict = {} ctotalDict = {} ndict = 1 for k,v in superDict.items(): for k1,v1 in v.items(): if k1 == name and isNumeric(v1): nv = ','.join([item for item in v.values() if filterName(item,v1)]) nk1 = ','.join([k,nv]) if ntotalDict.has_key(v1): #print "v: %s" % v ntotalDict[v1].append(nk1) else: #print "v: %s" % v ntotalDict[v1] = [nk1] elif k1 == name and not isNumeric(v1): nv = ','.join([item for item in v.values() if filterName(item,v1)]) nk1 = ','.join([k,nv]) if ctotalDict.has_key(v1): ctotalDict[v1].append(nk1) else: ctotalDict[v1] = [nk1] if ntotalDict: keys = ntotalDict.keys() keys.sort(cmpTwoNums) elif ctotalDict: ndict = 0 keys = ctotalDict.keys() keys.sort() writer = csv.writer(open(name + '.csv',"w"),delimiter=',',quoting=csv.QUOTE_MINIMAL) print "Category: %s" % name filteredHdata = [item for item in hdata.values() if filterName(item,name)] filteredHdata.insert(0,cList[0]) writer.writerow([filteredHdata,name]) for k in keys: if ndict == 1: #print "Duplicate key count: %s, value: %s" % (ntotalDict.get(k),k) writer.writerow([ntotalDict.get(k),k]) elif ndict == 0: #print "Duplicate key count: %s, value: %s" % (ctotalDict.get(k),k) writer.writerow([ctotalDict.get(k),k]) ctotalDict = {} ntotalDict = {}