perfmerge.c #1

/*
 * usage: perfmerge checkpoint.1 checkpoint.2 ... checkpoint.n > checkpoint.new
 *
 * WARNING: THIS APPLICATION IS PROVIDED AS IS. Please contact
 * support@perforce.com for more information if you are using this application
 * for anything other than experimentation.
 *
 * Merge 2 or more checkpoint files into one checkpoint.  Changes and jobs
 * are renumbered.  The result is written to stdout - redirect this to a file!
 *
 * Credits: This work is was derived directly from:
 *          ftp://ftp.perforce.com/perforce/r00.2/tools/server/perfmerge.pl
 *
 * Requirements:
 * - no pathnames may overlap between checkpoints!
 * - checkpoints must both be from the same version of the server!
 * - checkpoints must not contain changes or jobs with timestamps that are
 *   "in the future" on the machine of execution.
 * - The max line length for all checkpoints is MAXBUFFER (see below).
 * - The max number of checkpoints that can be merged is MAXFILES (see below).
 *
 * Known issues:
 * - When restoring from the merged checkpoint, the db.* files may grow
 * larger than expected. This is most likely due to the ordering this
 * program's output. Currently, the best fix for this is to:
 *   1. Let the restore complete.
 *   2. Generate a new checkpoint.
 *   3. Delete the db.* files and restore from that new checkpoint.
 * - Private counters are lost (only change and jobs are set properly).
 * - The journal counter is reset to zero (0) after the restore.
 *
 * Caveat:
 * - This code was designed for speed. It's not necessarily very pretty. It
 * is commented to some extent, however.
 * - Constants are always on the left of comparrisons. This confuses many,
 * but has saved me a lot of time over the years.
 * 
 * WARNING: THIS APPLICATION IS PROVIDED AS IS. Please contact
 * support@perforce.com for more information if you are using this application
 * for anything other than experimentation.
 *
 * Contributed by David Markley (david@hextris.com)
 */

#include <stdio.h>
#include <string.h>
#include <time.h>
#include <errno.h>

/* Maximum number of files that can be supplied on the command line. */
#define MAXFILES 64
/* Maximum length of an single line in any of the checkpoint files. */
#define MAXBUFFER 4096
/* Maximum number of tokens that can be generated from a single line. */
#define MAXTOKENS 2048

/*
 * Apparently, NetBSD doesn't handle a NULL value being passed into its atol
 * function. This compensates for that shortcoming.
 */
#ifdef __NetBSD__
#define ATOL(val) val?atol(val):0
#else
#define ATOL(val) atol(val)
#endif

/* Constants used for the type value within the following structure. */
#define CHANGE 1
#define JOB 2

/* This structure is used to keep the mappings for changes and jobs */
typedef struct __chngmap {
  short ck; /* Checkpoint file this came from. From argc. */
  short type; /* This is either a CHANGE or a JOB. */
  long time; /* The time for this item. */
  long change; /* The orginal change number. */
  long new_change; /* The new change number. */
  struct __chngmap *prev; /* Previous chngmap in linked list. */
  struct __chngmap *next; /* Next chngmap in linked list. */
} chngmap;

/*
 * Tokenizes the line in the buffer and returns the number of tokens found.
 */
long tokenize(char *tok[MAXTOKENS], char buf[MAXBUFFER]) {
  int cnt = 0;
  int len = strlen(buf);
  int pos = 0;
  int in_token = 1;
  char fc;

  memset(tok, 0, sizeof(char *)*MAXTOKENS);

  tok[cnt++] = buf+pos;
  fc = buf[pos];
  for (pos = 0; pos < len; pos++) {
    switch (buf[pos]) {
    case ' ':
      if (in_token && '@' != fc) {
	buf[pos] = '\0';
	in_token = 0;
	continue;
      }
      break;
    case '\r': case '\n':
      if (in_token) {
	buf[pos] = '\0';
	in_token = 0;
	continue;
      }
      break;
    case '@':
      if (! in_token) {
	in_token = 1;
	tok[cnt++] = buf+pos;
	fc = '@';
	continue;
      } else if (' ' == buf[pos+1]) {
	buf[++pos] = '\0';
	in_token = 0;
      }
      break;
    default:
      if (! in_token) {
	in_token = 1;
	tok[cnt++] = buf+pos;
	fc = buf[pos];
      }
      break;
    }
  }
  return cnt;
} 

/*
 * Private memory allocation function. Fails horribly if we run out of
 * memory. What else should we do?
 */
void *palloc(size_t size) {
  void *mem;
  /* Allocate space. */
  if (NULL == (mem = (void *)malloc(size))) {
    fprintf(stderr, "Out of memory...arrrrgh!!!\n");
    exit(1);
  }
  memset(mem, 0, size);
  return mem;
}

/*
 * Here's the tricky part...
 *
 * Each change and job has a time associated with it. That time is used
 * as an index for this combination hash table/insertion sort algorithm
 * (call it what you want). It divides all the times into buckets based upon
 * the upper 16 bits. They are then further divided into the 255 lists that
 * each bucket contains by bits 8-15. They are then inserted into this list
 * at their appropriate location (insertion sort).
 *
 * This is relatively fast. ;-)
 */
void insert_sort(void *sortbydate[], chngmap *map) {
  chngmap *tmap;
  long n1 = (map->time >> 16);
  long n2 = ((map->time & 0xFF00) >> 8);
  void **bucket = sortbydate[n1];

  if (NULL == bucket) {
    bucket = sortbydate[n1] = palloc(sizeof(void *)*256);
  }
  if (NULL == bucket[n2]) {
    bucket[n2] = map;
  } else {
    tmap = bucket[n2];
    if (tmap->time <= map->time) {
      bucket[n2] = map;
      map->next = tmap;
      return;
    }
    while (NULL != tmap->next) {
      if (tmap->next->time <= map->time) {
	map->next = tmap->next;
	tmap->next = map;
	return;
      }
      tmap = tmap->next;
    }
    tmap->next = map;
  }
}

/*
 * Walks through the sortbydate buckets and lists and renumbers each change and
 * job accordingly. It also creates lookup lists for jobs and changes for
 * each input file. These lookup lists make the replacement go very fast.
 */
void renumber(void *sortbydate[], long maxbuckets,
	      chngmap **changebyfile[MAXFILES],
	      chngmap **jobbyfile[MAXFILES])
{
  int change = 0;
  int job = 0;
  void **bucket;
  chngmap *tmap;
  int i, j;

  for (i = 0; i < maxbuckets; i++) {
    if (NULL == (bucket = sortbydate[i])) continue;
    for (j = 0; j < 256; j++) {
      if (NULL == (tmap = bucket[j])) continue;
      while (NULL != tmap) {
	if (CHANGE == tmap->type) {
	  tmap->new_change = ++change;
	  changebyfile[tmap->ck][tmap->change] = tmap;
	} else if (JOB == tmap->type) {
	  tmap->new_change = ++job;
	  jobbyfile[tmap->ck][tmap->change] = tmap;
	}
	tmap = tmap->next;
      }
    }
  }
}

/*
 * We all know what this is.
 */
int main(int argc, char **argv) {
  int i, j;
  /* Tokenization */
  char *tok[MAXTOKENS], *tmptok1 = NULL, *tmptok2 = NULL;
  int tcnt;
  /* File input */
  FILE *in;
  char buf[MAXBUFFER];
  char buf2[MAXBUFFER];
  chngmap *mapping;

  /* Counts for changes and jobs for each file. The value at the
   * index of zero (0) contains the total overall. */
  long changes[MAXFILES];
  long jobs[MAXFILES];

  long maxchange;
  long maxjob;

  /* Lookup list for mappings each file. These are used during the replace. */
  chngmap **changebyfile[MAXFILES];
  chngmap **jobbyfile[MAXFILES];

  /* The bucket/list container for all the mappings. */
  void **sortbydate;
  long maxbuckets = (time(NULL) >> 16)+10;

  /* Check the arguments. Fail if they're not correct. */
  if (1 >= argc) {
    fprintf(stderr, "Usage: perfmerge <checkpoint+>\n");
    exit(1);
  }
  if (MAXFILES < argc) {
    fprintf(stderr, "Sorry. There is a hardcoded limit to the number of files that can be joined.\nThat limit is %d. Do you really need to merge that many!?", MAXFILES-1);
    exit(1);
  }

  /* Clear and intialize our storage */
  memset(changes, 0, sizeof(long)*MAXFILES);
  memset(jobs, 0, sizeof(long)*MAXFILES);
  sortbydate = palloc(sizeof(void *)*maxbuckets);

  /* Loop through the files and grab the jobs and changes. */
  for (i = 1; i < argc; i++) {
    int type;
    maxchange = 0;
    maxjob = 0;
    /* Open the next checkpoint file. */
    if (NULL == (in = fopen(argv[i],"r"))) {
      perror(argv[i]);
      exit(errno);
    }

    /* Grab the change and job numbers. */
    while (NULL != fgets(buf, MAXBUFFER, in)) {
      if ('c' == buf[11]) {
	if (0 == strncmp(buf,"@pv@ 0 @db.change@",18)) {
	  type = CHANGE;
	} else {
	  continue;
	}
      } else if ('j' == buf[11]) {
	if (0 == strncmp(buf,"@pv@ 0 @db.jobs@",16)) {
	  type = JOB;
	} else {
	  continue;
	}
      } else {
	continue;
      }

      /* split the line into tokens */
      memset(tok, 0, sizeof(char *)*MAXTOKENS);
      tcnt = 0;
      tok[tcnt++] = strtok(buf, " ");
      while (8 > tcnt && NULL != (tok[tcnt++] = strtok(NULL, " ")));

      mapping = (chngmap *)palloc(sizeof(chngmap));
      mapping->ck = i;

      if (CHANGE == (mapping->type = type)) {
	long c1 = ATOL(tok[3]);
	long c2 = ATOL(tok[4]);
	long tm = ATOL(tok[7]);

	if (c1 > maxchange) maxchange = c1;
	if (c2 > maxchange) maxchange = c2;

	changes[i]++;
	changes[0]++;
	mapping->change = c1;
	mapping->time = tm;
	insert_sort(sortbydate, mapping);

	/* Handle the case where the changes were re-numbered. */
	if (c1 != c2) {
	  changes[i]++;
	  changes[0]++;
	  mapping = (chngmap *)palloc(sizeof(chngmap));
	  mapping->ck = i;
	  mapping->change = c2;
	  mapping->time = tm;
	  insert_sort(sortbydate, mapping);
	}

      } else if (JOB == (mapping->type = type)) {
	long c1 = ATOL(tok[3]);
	if (c1 > maxjob) maxjob = c1;

	jobs[i]++;
	jobs[0]++;
	mapping->change = ATOL(c1);
	mapping->time = ATOL(tok[5]);
	insert_sort(sortbydate, mapping);
      } else {
	free(mapping);
      }
    }
    fclose(in);
    changebyfile[i] = palloc(sizeof(chngmap *)*maxchange);
    jobbyfile[i] = palloc(sizeof(chngmap *)*maxjob);
  }

  /* Renumber all the changes. */
  renumber(sortbydate, maxbuckets, changebyfile, jobbyfile);

  /* Write new counters. */
  printf("@pv@ 0 @db.counters@ @change@ %d\n", changes[0]);
  printf("@pv@ 0 @db.counters@ @jobs@ %d\n", jobs[0]);

  /* Now read the checkpoint files, translating as we go. */
  for (i = 1; i < argc; i++) {
    int first = 1, action, special, change, job;

    /* Open the next checkpoint file. */
    if (NULL == (in = fopen(argv[i],"r"))) {
      perror(argv[i]);
      exit(errno);
    }
    /* Grab the change and job numbers. */
    while (NULL != fgets(buf, MAXBUFFER, in)) {
      memcpy(buf2, buf, sizeof(char)*MAXBUFFER);
      /* split the line into tokens */
      tcnt = tokenize(tok, buf);

      if (first && (0 == strncmp(tok[0],"@pv@",4))) {
	char tbuf1[20];
	char tbuf2[20];
	chngmap *cmap;

	action = 0;
	switch (tok[2][4]) {
	case 'd':
	  if (0 == strncmp(tok[2],"@db.desc",8)) {
	    action = 0x501;
	  } else if (0 == strncmp(tok[2],"@db.domain",10)) {
	    action = 0x100;
	  }
	  break;
	case 'v':
	  if (0 == strncmp(tok[2],"@db.view",8)) action = 0x200;
	  break;
	case 'i':
	  if (0 == strncmp(tok[2],"@db.integ",9)) action = 0x509;
	  break;
	case 'r':
	  if (0 == strncmp(tok[2],"@db.revcx",9)) {
	    action = 0x501;
	  } else if (0 == strncmp(tok[2],"@db.rev",7)) {
	    action = 0x505;
	  }
	  break;
	case 'w':
	  if (0 == strncmp(tok[2],"@db.working",11)) action = 0x509;
	  break;
	case 'c':
	  if (0 == strncmp(tok[2],"@db.change",10)) {
	    action = 0x300;
	  } else if (0 == strncmp(tok[2],"@db.counters",12)) {
	    action = 0x400;
	  }
	  break;
	case 'j':
	  if (0 == strncmp(tok[2],"@db.job",7) ||
	      0 == strncmp(tok[2],"@db.jobpend",11) ||
	      0 == strncmp(tok[2],"@db.jobdesc",11)) action = 0x510;
	  break;
	case 'f':
	  if (0 == strncmp(tok[2],"@db.fix",7) ||
	      0 == strncmp(tok[2],"@db.fixrev",10)) action = 0x512;
	  break;
	default:
	  break;
	}
	special = action >> 8;

	if ( 1 == special ) {
	  /*
	   * Looks like nothing is being done in this section in
	   * perfmerge.pl. I'll ignore it too. Hashtables are no fun in C!
	   */
	  /*
	  // For db.domain, skip duplicates
	  if( $domained{ $_[3] } ) {
	  // printf("Skipping domain %s already seen.\n", tok[3]);
	  } else {
	  $domained{ $_[3] } = 1;
	  }
	  */
	} else if ( 2 == special ) {
	  /*
	   * Looks like nothing is being done in this section in
	   * perfmerge.pl. I'll ignore it too. Hashtables are no fun in C!
	   */
	  /*
	  // For db.view, use only the first set we see. 
	  if( $viewed{ $_[3] } && $viewed{ $_[3] } ne $ck ) {
	  // printf("Skipping view %s already seen.\n", tok[3]);
	  } else {
	  $viewed{ $_[3] } = $ck;
	  }
	  */
	} else if ( 3 == special ) {
	  // renumber change (1), key (2)
	  if (NULL != (cmap = changebyfile[i][ATOL(tok[3])])) {
	    sprintf(tbuf1, "%d", cmap->new_change);
	    tok[3] = tbuf1;
	  } else {
	    //fprintf(stderr, "NO CHANGE: %s\n", tok[3]);
	  }

	  if (NULL != (cmap = changebyfile[i][ATOL(tok[4])])) {
	    sprintf(tbuf2, "%d", cmap->new_change);
	    tok[4] = tbuf2;
	  } else {
	    //fprintf(stderr, "NO CHANGE: %s\n", tok[4]);
	  }
	} else if( 4 == special ) {
	  // elide -- we'll output new counters.
	  continue;
	} else if ( 5 == special ) {
	  job = ( action >> 4 ) & 0x0F;
	  change = ( action ) & 0x0F;

	  // Renumber changes
	  if ( change ) {
	    ++change;
	    ++change;
	    if (NULL != (cmap = changebyfile[i][ATOL(tok[change])])) {
	      sprintf(tbuf1, "%d", cmap->new_change);
	      tok[change] = tbuf1;
	    } else {
	      //fprintf(stderr, "NO CHANGE: %s\n", tok[change]);
	    }
	  }

	  // Renumber jobs
	  /* The perl script didn't do this. Why not? I dunno. Well...
	   * I'll defer to its infinite wisdom.
	   */
	  /*
	    if( job ) {
	    ++job;
	    ++job;
	    sprintf(tbuf1, "%d", changebyjob[i][ATOL(tok[change])]);
	    tok[job] = tbuf1;
	    }
	  */
	}
	// Print out the modified tokens.
	j = 0;
	tcnt--;
	do {
	  fputs(tok[j++], stdout);
	  fputc(' ', stdout);
	} while (j < tcnt);
	fputs(tok[j], stdout);
	fputc('\n', stdout);
      } else {
	// Print out the line as it was read in.
	fputs(buf2, stdout);
      }
      // If this line has matching @@'s, we don't change first
      // split the line into tokens
      tcnt = 0;
      for (j = 0; j++; j < MAXBUFFER && '\0' != buf2[j]) {  if ('@' == buf2[j]) { tcnt++; } }
      
      if (tcnt % 2) { first = 1 - first; };
    }
    fclose(in);
  }
  fflush(stdout);
  fclose(stdout);
}
#	Change	User	Description	Committed
#1	1013	David Markley	Added C implementation of PerfMerge.