#!/usr/bin/python
#
#Copyright (c) 2010, Perforce Software, Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL PERFORCE SOFTWARE, INC. BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#*******************************************************************************
# Author: Stephen Moon
# Date: 1/27/10
# Last Modified: 2/08/10
#
# Summary: Uses Python CSV module to import a CSV file and then generates
# CSV files after sorting each column numerically and alphabetically.
# A bit sophisticated dictionary of dictionary example.
#
#*******************************************************************************
import sys,optparse,csv,string
#class to encapsulate dictionary and array
class DictClass(object):
#initializes the data structure
def __init__(self):
self.data = {} #dictionary of category data
self.hdata = {} #dictionary of category header
self.superDict = {} #contains self.data for each company for example
self.cList = [] #list of category
#does a numeric comparison
def cmpTwoNums(a,b):
if int(a) <= int(b):
return 1
else:
return -1
#is a number?
def isNumeric(value):
return str(value).replace(".","").replace("-","").isdigit()
#filter duplicate names
def filterName(name,v,k):
if name == k:
return
elif name == v:
return
else:
return name
#first break up array into strings and then output them to CSV file
def array2Str2CSV(dArray,k,writer,verbose):
firstArray = []
for i in dArray:
if(type(i) == list):
subArray= i
subArray.append(k)
writer.writerow(subArray)
if verbose:
for j in i:
print "list: %s, %s" % (j,type(j))
else:
firstArray.append(i)
if verbose:
print "str: %s, %s" % (i,type(i))
firstArray.append(k)
writer.writerow(firstArray)
#building the dictionary for manipulation
def buildDictionary(d,file):
count = 1
for row in file:
if count == 1:
d.cList = row
d.hdata = dict(zip(d.cList,row))
else:
d.data = dict(zip(d.cList,row))
count +=1
d.superDict[row[0]] = d.data
return d
#reverse key and value of dictionary to sort by values
def swapKeyValue(d,v,v1,k):
nv = [item for item in v.values() if filterName(item,v1,k)]
nv.insert(0,k)
if d.has_key(v1):
d[v1].append(nv)
else:
d[v1] = nv
return d
#write a header at the top of each CSV file
def writeHeader(writer,d,name):
filteredHdata = [item for item in d.hdata.values() if filterName(item,name,d.cList[0])]
filteredHdata.insert(0,d.cList[0])
filteredHdata.append(name)
writer.writerow(filteredHdata)
def main():
parser = optparse.OptionParser(usage="%prog [-h,-v] filename",version="%prog v0.9")
parser.add_option("-v","--verbose",action="store_true",dest="verbose",help="Print debug messages to stdout")
(options,args) = parser.parse_args() #by default sys.argv[1:]
if len(args) != 1:
parser.error("Incorrect number of arguments")
#read a CSV file
file = csv.reader(open(args[0], "r"),delimiter=';',quoting=csv.QUOTE_NONE)
d = DictClass() #create dictClass object
d = buildDictionary(d,file) #build the dictionary for later processing
for name in d.cList: #name = each category
ntotalDict = {} #store dictionary of numeric values
ctotalDict = {} #store dictionary of character values
ndict = 1
for k,v in d.superDict.items():
for k1,v1 in v.items():
if k1 == name and isNumeric(v1):
ntotalDict = swapKeyValue(ntotalDict,v,v1,k)
elif k1 == name and not isNumeric(v1):
ctotalDict = swapKeyValue(ctotalDict,v,v1,k)
#sorting the keys (numeric or non-numeric)
if ntotalDict:
keys = ntotalDict.keys()
keys.sort(cmpTwoNums)
elif ctotalDict:
ndict = 0
keys = ctotalDict.keys()
keys.sort()
#write a CSV file for each category
writer = csv.writer(open(name + '.csv',"w"),delimiter=',',quoting=csv.QUOTE_MINIMAL)
print "Category: %s" % name
writeHeader(writer,d,name)
#break up arrays so it is better formatted for Excel import
for k in keys:
if ndict == 1:
nArray = ntotalDict.get(k)
array2Str2CSV(nArray,k,writer,options.verbose)
elif ndict == 0:
cArray = ctotalDict.get(k)
array2Str2CSV(cArray,k,writer,options.verbose)
ctotalDict = {}
ntotalDict = {}
if __name__ == '__main__':
main()