createfiles.py #6

  • //
  • guest/
  • robert_cowham/
  • p4benchmark/
  • main/
  • createfiles.py
  • View
  • Commits
  • Open Download .zip Download (4 KB)
#! /usr/bin/env python2
#
# Main driver for benchmarks
#
# Copyright (C) 2017, Robert Cowham, Perforce
#

from __future__ import print_function

import logging
import os
import P4
import platform
import math
import sys
import random
import string
import subprocess
from argparse import ArgumentParser
import yaml
from faker import Faker

python3 = sys.version_info[0] >= 3

LINE_LENGTH = 80
BLOCK_SIZE = 4096

fake = Faker()
logger = logging.getLogger("createfiles")

# Random generators
try:
    import numpy as np

    def create_random(size):
        if python3:
            return bytes((int(x) for x in np.random.random_integers(1, 254, size)))
        else:
            return bytearray((int(x) for x in np.random.random_integers(1, 254, size)))

    def generator(size, eol="\n"):
        s = string.ascii_letters + string.digits
        return "".join(np.random.choice(list(s), size - 1)) + eol

except ImportError:
    print("No numpy installed, falling back to standard Python random. Prepare to wait ...", file=sys.stderr)

    def create_random(size):
        if python3:
            return bytes((random.randint(1,254) for x in range(size)))
        else:
            return bytearray((random.randint(1,254) for x in range(size)))

    def generator(size, eol="\n"):
        s = string.ascii_letters + string.digits
        return "".join((random.choice(s) for x in range(size - 1))) + eol

def create_file(fileSize, filename, binary=False):
    "Approximation for data generation"
    logger.debug("create_file '%s' binary: %s" % (filename, str(binary)))
    mode = "wb" if binary else "w"
    with open(filename, mode) as f:
        if binary:
            blocks = int(fileSize / BLOCK_SIZE)
            dup_count = random.randint(1,4)
            for unused in range(blocks / dup_count):
                b = os.urandom(BLOCK_SIZE)
                for _ in range(dup_count):
                    f.write(b)
        else:
            lines = int(fileSize / LINE_LENGTH)
            for unused in range(lines):
                f.write(fake.text())

class FileCreator:

    def __init__(self, options):
        self.options = options

    def getFileName(self):
        "Create a random filename"
        return "test"

    def getDirs(self, levels):
        "Return a list of directories from which to select"
        if len(levels) == 1:
            return ["%02d" % x for x in range(levels[0])]
        else:
            return ["%02d/%s" % (x, y) for x in range(levels[0]) for y in self.getDirs(levels[1:])]

    def run(self):
        dirs = [os.path.join(self.options.rootdir, x) for x in self.getDirs(self.options.levels)]
        if self.options.create:
            for dir in dirs:
                if not os.path.isdir(dir):
                    os.makedirs(dir)
        maxsize = self.options.size * 2
        for i in range(self.options.max):
            dir = random.choice(dirs)
            isBinary = random.choice([True, False])
            ext = ".txt"
            if isBinary:
                ext = ".dat"
            filename = os.path.join(dir, "%s%s" % (generator(20, eol=""), ext))
            print("File: %s" % filename)
            if self.options.create:
                create_file(random.randint(100, maxsize), filename, binary=isBinary)

def main():
    parser = ArgumentParser(add_help=True)
    parser.add_argument('-m', '--max', type=int, help="Number of files to create", default=100)
    parser.add_argument('-l', '--levels', type=int, nargs='+', help="Directories to create at each level, e.g. -l 5 10", default=5)
    parser.add_argument('-s', '--size', type=int, help="Average size of files", default=100000)
    parser.add_argument('-d', '--rootdir', help="Directory where to start", default=None)
    parser.add_argument('-c', '--create', help="Create the files as specified instead of just printing names", action='store_true', default=False)
    try:
        options = parser.parse_args()
        if len(options.levels) == 0:
            print("At least one level must be specified")
    except:
        parser.print_help()
        sys.exit(1)

    fc = FileCreator(options)
    fc.run()

if __name__ == '__main__':
    main()
# Change User Description Committed
#10 24711 Robert Cowham Restructure and tidy up
#9 24629 Robert Cowham Allow textonly or binaryonly and use os.urandom for performance
#8 24628 Robert Cowham Use mimesis for much faster text sentence faking
#7 22132 Robert Cowham Basic test
#6 22003 Robert Cowham Latest state - with p4python and no syncing
#5 21913 Robert Cowham Tweaked to allow to scale more easily with levels parameter
#4 21756 Robert Cowham Add 3rd level
#3 21734 Robert Cowham Fix avg size
#2 21728 Robert Cowham Make executable
#1 21727 Robert Cowham Utility program