sortByValue.py #8

#!/usr/bin/python
#
#Copyright (c) 2010, Perforce Software, Inc.  All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 
# 1.  Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 
# 2.  Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL PERFORCE SOFTWARE, INC. BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#*******************************************************************************
# Author: Stephen Moon
# Date: 1/27/10
# Last Modified: 2/08/10
#
# Summary: Uses Python CSV module to import a CSV file and then generates 
#          CSV files after sorting each column numerically and alphabetically.
#          A bit sophisticated dictionary of dictionary example.
#
#*******************************************************************************

import sys,optparse,csv,string

#class to encapsulate dictionary and array
class DictClass(object):

  #initializes the data structure
  def __init__(self):
    self.data = {} #dictionary of category data
    self.hdata = {} #dictionary of category header
    self.superDict = {} #contains self.data for each company for example
    self.cList = [] #list of category

#does a numeric comparison
def cmpTwoNums(a,b):
  if int(a) <= int(b):
    return 1
  else:
    return -1

#is a number?
def isNumeric(value):
  return str(value).replace(".","").replace("-","").isdigit()

#filter duplicate names
def filterName(name,v,k):
  if name == k:
    return
  elif name == v:
    return
  else:
    return name

#first break up array into strings and then output them to CSV file
def array2Str2CSV(dArray,k,writer,verbose):
 firstArray = [] 
 for i in dArray:
   if(type(i) == list):
     subArray= i
     subArray.append(k)
     writer.writerow(subArray)
     if verbose:
       for j in i:
         print "list: %s, %s" % (j,type(j))
   else:
     firstArray.append(i)
     if verbose:
       print "str: %s, %s" % (i,type(i))
 firstArray.append(k)
 writer.writerow(firstArray)

#building the dictionary for manipulation
def buildDictionary(d,file):
  count = 1
  for row in file:
    if count == 1:
      d.cList = row
      d.hdata = dict(zip(d.cList,row))
    else:
      d.data = dict(zip(d.cList,row))
    count +=1
    d.superDict[row[0]] = d.data
  return d

#reverse key and value of dictionary to sort by values
def swapKeyValue(d,v,v1,k):
  nv = [item for item in v.values() if filterName(item,v1,k)]
  nv.insert(0,k) 
  if d.has_key(v1):
    d[v1].append(nv)
  else:
    d[v1] = nv
  return d

#write a header at the top of each CSV file
def writeHeader(writer,d,name):
  filteredHdata = [item for item in d.hdata.values() if filterName(item,name,d.cList[0])]
  filteredHdata.insert(0,d.cList[0])
  filteredHdata.append(name)
  writer.writerow(filteredHdata)

def main():

  parser = optparse.OptionParser(usage="%prog [-h,-v] filename",version="%prog v0.9")
  parser.add_option("-v","--verbose",action="store_true",dest="verbose",help="Print debug messages to stdout") 
  (options,args) = parser.parse_args() #by default sys.argv[1:]

  if len(args) != 1:
    parser.error("Incorrect number of arguments")

  #read a CSV file
  file = csv.reader(open(args[0], "r"),delimiter=';',quoting=csv.QUOTE_NONE)

  d = DictClass() #create dictClass object
  d = buildDictionary(d,file) #build the dictionary for later processing

  for name in d.cList: #name = each category
    ntotalDict = {} #store dictionary of numeric values
    ctotalDict = {} #store dictionary of character values
    ndict = 1
    for k,v in d.superDict.items():
      for k1,v1 in v.items():
        if k1 == name and isNumeric(v1):
          ntotalDict = swapKeyValue(ntotalDict,v,v1,k)
        elif k1 == name and not isNumeric(v1):
          ctotalDict = swapKeyValue(ctotalDict,v,v1,k)

    #sorting the keys (numeric or non-numeric) 
    if ntotalDict: 
      keys = ntotalDict.keys()
      keys.sort(cmpTwoNums)
    elif ctotalDict:
      ndict = 0
      keys = ctotalDict.keys()
      keys.sort()
    
    #write a CSV file for each category
    writer = csv.writer(open(name + '.csv',"w"),delimiter=',',quoting=csv.QUOTE_MINIMAL)

    print "Category: %s" % name
    writeHeader(writer,d,name)

    #break up arrays so it is better formatted for Excel import
    for k in keys:
      if ndict == 1:
        nArray = ntotalDict.get(k)
        array2Str2CSV(nArray,k,writer,options.verbose)
      elif ndict == 0:
        cArray = ctotalDict.get(k)
        array2Str2CSV(cArray,k,writer,options.verbose)
    ctotalDict = {}
    ntotalDict = {}

if __name__ == '__main__':
  main()

#	Change	User	Description
#9	8645	Stephen Moon	removing not up-to-date scripts
#8	7573	Stephen Moon	optparse added to accept a filename from the command line and print usage info
#7	7572	Stephen Moon	removed control characters right before she-bang line
#6	7571	Stephen Moon	refactored and added comments
#5	7570	Stephen Moon	Put the arrays in one line into a batch of strings in its own line
#4	7561	Stephen Moon	output strings rather than arrays
#3	7557	Stephen Moon	redundant information filtered out from the sorted CSV outputs
#2	7556	Stephen Moon	A bit more sophisticated dictionary of dictionary example added, used CSV module to read and write files
#1	7547	Stephen Moon	dictionary of dictionary example where you sort by keys