sortByValue.py #5

#!/usr/bin/python
#
#Copyright (c) 2009, Perforce Software, Inc.  All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 
# 1.  Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 
# 2.  Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL PERFORCE SOFTWARE, INC. BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#*******************************************************************************
# Author: Stephen Moon
# Date: 1/25/10
#
# Summary: Uses Python CSV module to import a CSV file and then generates 
#          CSV files after sorting each column numerically and alphabetically.
#          A bit sophisticated dictionary of dictionary example.
#

import csv,string

def cmpTwoNums(a,b):
  if int(a) <= int(b):
    return 1
  else:
    return -1

def isNumeric(value):
  return str(value).replace(".","").replace("-","").isdigit()

def filterName(name,v,k):
  #print "name: %s, v: %s, k: %s" % (name,v,k)
  if name == k:
    return
  elif name == v:
    return
  else:
    return name

file = csv.reader(open( "company_data.csv", "r"),delimiter=';',quoting=csv.QUOTE_NONE)

data = {}
hdata = {}
superDict = {} 
cList = []

count = 1
for row in file:
  if count == 1:
    cList = row
    hdata = dict(zip(cList,row))
  else:
    data = dict(zip(cList,row))
  count +=1
  superDict[row[0]] = data

#print superDict
for name in cList:
  ntotalDict = {}
  ctotalDict = {}
  ndict = 1
  for k,v in superDict.items():
    for k1,v1 in v.items():
      if k1 == name and isNumeric(v1):
        nv = [item for item in v.values() if filterName(item,v1,k)]
        #nv = ','.join([item for item in v.values() if filterName(item,v1,k)])
        nv.insert(0,k) 
        #nk1 = ','.join([k,nv])
        if ntotalDict.has_key(v1):
          #print "v: %s" % v
          ntotalDict[v1].append(nv)
        else:
          #print "v: %s" % v
          ntotalDict[v1] = nv
      elif k1 == name and not isNumeric(v1):
        nv = [item for item in v.values() if filterName(item,v1,k)]
        #nv = ','.join([item for item in v.values() if filterName(item,v1,k)])
        nv.insert(0,k)
        #nk1 = ','.join([k,nv])
        if ctotalDict.has_key(v1):
          ctotalDict[v1].append(nv)
        else:
          ctotalDict[v1] = nv
 
  if ntotalDict: 
    keys = ntotalDict.keys()
    keys.sort(cmpTwoNums)
  elif ctotalDict:
    ndict = 0
    keys = ctotalDict.keys()
    keys.sort()

  writer = csv.writer(open(name + '.csv',"w"),delimiter=',',quoting=csv.QUOTE_MINIMAL)

  print "Category: %s" % name

  filteredHdata = [item for item in hdata.values() if filterName(item,name,cList[0])]
  filteredHdata.insert(0,cList[0])
  filteredHdata.append(name)
  writer.writerow(filteredHdata)

  for k in keys:
    if ndict == 1:
      #print "Duplicate key count: %s, value: %s" % (ntotalDict.get(k),k)
      nDict = ntotalDict.get(k)
      nFirstDict = [] 
      for i in nDict:
         if(type(i) == list):
            nSubDict = i
            nSubDict.append(k)
            writer.writerow(nSubDict)
            for j in i:
               print "num yes: %s, %s" % (j,type(j))
         else:
            nFirstDict.append(i)
            print "num no: %s, %s" % (i,type(i))
      nFirstDict.append(k)
      writer.writerow(nFirstDict)
    elif ndict == 0:
      #print "Duplicate key count: %s, value: %s" % (ctotalDict.get(k),k)
      cDict = ctotalDict.get(k)
      cFirstDict = [] 
      for i in cDict:
         if(type(i) == list):
            cSubDict = i
            cSubDict.append(k)
            writer.writerow(cSubDict)
            for j in i:
               print "char yes: %s, %s" % (j,type(j))
         else:
            cFirstDict.append(i)
            print "char no: %s, %s" % (i,type(i))
      cFirstDict.append(k)
      writer.writerow(cFirstDict)
  ctotalDict = {}
  ntotalDict = {}

#	Change	User	Description
#9	8645	Stephen Moon	removing not up-to-date scripts
#8	7573	Stephen Moon	optparse added to accept a filename from the command line and print usage info
#7	7572	Stephen Moon	removed control characters right before she-bang line
#6	7571	Stephen Moon	refactored and added comments
#5	7570	Stephen Moon	Put the arrays in one line into a batch of strings in its own line
#4	7561	Stephen Moon	output strings rather than arrays
#3	7557	Stephen Moon	redundant information filtered out from the sorted CSV outputs
#2	7556	Stephen Moon	A bit more sophisticated dictionary of dictionary example added, used CSV module to read and write files
#1	7547	Stephen Moon	dictionary of dictionary example where you sort by keys