/*
* usage: perfmerge checkpoint.1 checkpoint.2 ... checkpoint.n > checkpoint.new
*
* WARNING: THIS APPLICATION IS PROVIDED AS IS. Please contact
* support@perforce.com for more information if you are using this application
* for anything other than experimentation.
*
* Merge 2 or more checkpoint files into one checkpoint. Changes and jobs
* are renumbered. The result is written to stdout - redirect this to a file!
*
* Credits: This work is was derived directly from:
* ftp://ftp.perforce.com/perforce/r00.2/tools/server/perfmerge.pl
*
* Requirements:
* - no pathnames may overlap between checkpoints!
* - checkpoints must both be from the same version of the server!
* - checkpoints must not contain changes or jobs with timestamps that are
* "in the future" on the machine of execution.
* - The max line length for all checkpoints is MAXBUFFER (see below).
* - The max number of checkpoints that can be merged is MAXFILES (see below).
*
* Known issues:
* - When restoring from the merged checkpoint, the db.* files may grow
* larger than expected. This is most likely due to the ordering this
* program's output. Currently, the best fix for this is to:
* 1. Let the restore complete.
* 2. Generate a new checkpoint.
* 3. Delete the db.* files and restore from that new checkpoint.
* - Private counters are lost (only change and jobs are set properly).
* - The journal counter is reset to zero (0) after the restore.
*
* Caveat:
* - This code was designed for speed. It's not necessarily very pretty. It
* is commented to some extent, however.
* - Constants are always on the left of comparrisons. This confuses many,
* but has saved me a lot of time over the years.
*
* WARNING: THIS APPLICATION IS PROVIDED AS IS. Please contact
* support@perforce.com for more information if you are using this application
* for anything other than experimentation.
*
* Contributed by David Markley (david@hextris.com)
*/
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <errno.h>
/* Maximum number of files that can be supplied on the command line. */
#define MAXFILES 64
/* Maximum length of an single line in any of the checkpoint files. */
#define MAXBUFFER 4096
/* Maximum number of tokens that can be generated from a single line. */
#define MAXTOKENS 2048
/*
* Apparently, NetBSD doesn't handle a NULL value being passed into its atol
* function. This compensates for that shortcoming.
*/
#ifdef __NetBSD__
#define ATOL(val) val?atol(val):0
#else
#define ATOL(val) atol(val)
#endif
/* Constants used for the type value within the following structure. */
#define CHANGE 1
#define JOB 2
/* This structure is used to keep the mappings for changes and jobs */
typedef struct __chngmap {
short ck; /* Checkpoint file this came from. From argc. */
short type; /* This is either a CHANGE or a JOB. */
long time; /* The time for this item. */
long change; /* The orginal change number. */
long new_change; /* The new change number. */
struct __chngmap *prev; /* Previous chngmap in linked list. */
struct __chngmap *next; /* Next chngmap in linked list. */
} chngmap;
/*
* Tokenizes the line in the buffer and returns the number of tokens found.
*/
long tokenize(char *tok[MAXTOKENS], char buf[MAXBUFFER]) {
int cnt = 0;
int len = strlen(buf);
int pos = 0;
int in_token = 1;
char fc;
memset(tok, 0, sizeof(char *)*MAXTOKENS);
tok[cnt++] = buf+pos;
fc = buf[pos];
for (pos = 0; pos < len; pos++) {
switch (buf[pos]) {
case ' ':
if (in_token && '@' != fc) {
buf[pos] = '\0';
in_token = 0;
continue;
}
break;
case '\r': case '\n':
if (in_token) {
buf[pos] = '\0';
in_token = 0;
continue;
}
break;
case '@':
if (! in_token) {
in_token = 1;
tok[cnt++] = buf+pos;
fc = '@';
continue;
} else if (' ' == buf[pos+1]) {
buf[++pos] = '\0';
in_token = 0;
}
break;
default:
if (! in_token) {
in_token = 1;
tok[cnt++] = buf+pos;
fc = buf[pos];
}
break;
}
}
return cnt;
}
/*
* Private memory allocation function. Fails horribly if we run out of
* memory. What else should we do?
*/
void *palloc(size_t size) {
void *mem;
/* Allocate space. */
if (NULL == (mem = (void *)malloc(size))) {
fprintf(stderr, "Out of memory...arrrrgh!!!\n");
exit(1);
}
memset(mem, 0, size);
return mem;
}
/*
* Here's the tricky part...
*
* Each change and job has a time associated with it. That time is used
* as an index for this combination hash table/insertion sort algorithm
* (call it what you want). It divides all the times into buckets based upon
* the upper 16 bits. They are then further divided into the 255 lists that
* each bucket contains by bits 8-15. They are then inserted into this list
* at their appropriate location (insertion sort).
*
* This is relatively fast. ;-)
*/
void insert_sort(void *sortbydate[], chngmap *map) {
chngmap *tmap;
long n1 = (map->time >> 16);
long n2 = ((map->time & 0xFF00) >> 8);
void **bucket = sortbydate[n1];
if (NULL == bucket) {
bucket = sortbydate[n1] = palloc(sizeof(void *)*256);
}
if (NULL == bucket[n2]) {
bucket[n2] = map;
} else {
tmap = bucket[n2];
if (tmap->time <= map->time) {
bucket[n2] = map;
map->next = tmap;
return;
}
while (NULL != tmap->next) {
if (tmap->next->time <= map->time) {
map->next = tmap->next;
tmap->next = map;
return;
}
tmap = tmap->next;
}
tmap->next = map;
}
}
/*
* Walks through the sortbydate buckets and lists and renumbers each change and
* job accordingly. It also creates lookup lists for jobs and changes for
* each input file. These lookup lists make the replacement go very fast.
*/
void renumber(void *sortbydate[], long maxbuckets,
chngmap **changebyfile[MAXFILES],
chngmap **jobbyfile[MAXFILES])
{
int change = 0;
int job = 0;
void **bucket;
chngmap *tmap;
int i, j;
for (i = 0; i < maxbuckets; i++) {
if (NULL == (bucket = sortbydate[i])) continue;
for (j = 0; j < 256; j++) {
if (NULL == (tmap = bucket[j])) continue;
while (NULL != tmap) {
if (CHANGE == tmap->type) {
tmap->new_change = ++change;
changebyfile[tmap->ck][tmap->change] = tmap;
} else if (JOB == tmap->type) {
tmap->new_change = ++job;
jobbyfile[tmap->ck][tmap->change] = tmap;
}
tmap = tmap->next;
}
}
}
}
/*
* We all know what this is.
*/
int main(int argc, char **argv) {
int i, j;
/* Tokenization */
char *tok[MAXTOKENS], *tmptok1 = NULL, *tmptok2 = NULL;
int tcnt;
/* File input */
FILE *in;
char buf[MAXBUFFER];
char buf2[MAXBUFFER];
chngmap *mapping;
/* Counts for changes and jobs for each file. The value at the
* index of zero (0) contains the total overall. */
long changes[MAXFILES];
long jobs[MAXFILES];
long maxchange;
long maxjob;
/* Lookup list for mappings each file. These are used during the replace. */
chngmap **changebyfile[MAXFILES];
chngmap **jobbyfile[MAXFILES];
/* The bucket/list container for all the mappings. */
void **sortbydate;
long maxbuckets = (time(NULL) >> 16)+10;
/* Check the arguments. Fail if they're not correct. */
if (1 >= argc) {
fprintf(stderr, "Usage: perfmerge <checkpoint+>\n");
exit(1);
}
if (MAXFILES < argc) {
fprintf(stderr, "Sorry. There is a hardcoded limit to the number of files that can be joined.\nThat limit is %d. Do you really need to merge that many!?", MAXFILES-1);
exit(1);
}
/* Clear and intialize our storage */
memset(changes, 0, sizeof(long)*MAXFILES);
memset(jobs, 0, sizeof(long)*MAXFILES);
sortbydate = palloc(sizeof(void *)*maxbuckets);
/* Loop through the files and grab the jobs and changes. */
for (i = 1; i < argc; i++) {
int type;
maxchange = 0;
maxjob = 0;
/* Open the next checkpoint file. */
if (NULL == (in = fopen(argv[i],"r"))) {
perror(argv[i]);
exit(errno);
}
/* Grab the change and job numbers. */
while (NULL != fgets(buf, MAXBUFFER, in)) {
if ('c' == buf[11]) {
if (0 == strncmp(buf,"@pv@ 0 @db.change@",18)) {
type = CHANGE;
} else {
continue;
}
} else if ('j' == buf[11]) {
if (0 == strncmp(buf,"@pv@ 0 @db.jobs@",16)) {
type = JOB;
} else {
continue;
}
} else {
continue;
}
/* split the line into tokens */
memset(tok, 0, sizeof(char *)*MAXTOKENS);
tcnt = 0;
tok[tcnt++] = strtok(buf, " ");
while (8 > tcnt && NULL != (tok[tcnt++] = strtok(NULL, " ")));
mapping = (chngmap *)palloc(sizeof(chngmap));
mapping->ck = i;
if (CHANGE == (mapping->type = type)) {
long c1 = ATOL(tok[3]);
long c2 = ATOL(tok[4]);
long tm = ATOL(tok[7]);
if (c1 > maxchange) maxchange = c1;
if (c2 > maxchange) maxchange = c2;
changes[i]++;
changes[0]++;
mapping->change = c1;
mapping->time = tm;
insert_sort(sortbydate, mapping);
/* Handle the case where the changes were re-numbered. */
if (c1 != c2) {
changes[i]++;
changes[0]++;
mapping = (chngmap *)palloc(sizeof(chngmap));
mapping->ck = i;
mapping->change = c2;
mapping->time = tm;
insert_sort(sortbydate, mapping);
}
} else if (JOB == (mapping->type = type)) {
long c1 = ATOL(tok[3]);
if (c1 > maxjob) maxjob = c1;
jobs[i]++;
jobs[0]++;
mapping->change = ATOL(c1);
mapping->time = ATOL(tok[5]);
insert_sort(sortbydate, mapping);
} else {
free(mapping);
}
}
fclose(in);
changebyfile[i] = palloc(sizeof(chngmap *)*maxchange);
jobbyfile[i] = palloc(sizeof(chngmap *)*maxjob);
}
/* Renumber all the changes. */
renumber(sortbydate, maxbuckets, changebyfile, jobbyfile);
/* Write new counters. */
printf("@pv@ 0 @db.counters@ @change@ %d\n", changes[0]);
printf("@pv@ 0 @db.counters@ @jobs@ %d\n", jobs[0]);
/* Now read the checkpoint files, translating as we go. */
for (i = 1; i < argc; i++) {
int first = 1, action, special, change, job;
/* Open the next checkpoint file. */
if (NULL == (in = fopen(argv[i],"r"))) {
perror(argv[i]);
exit(errno);
}
/* Grab the change and job numbers. */
while (NULL != fgets(buf, MAXBUFFER, in)) {
memcpy(buf2, buf, sizeof(char)*MAXBUFFER);
/* split the line into tokens */
tcnt = tokenize(tok, buf);
if (first && (0 == strncmp(tok[0],"@pv@",4))) {
char tbuf1[20];
char tbuf2[20];
chngmap *cmap;
action = 0;
switch (tok[2][4]) {
case 'd':
if (0 == strncmp(tok[2],"@db.desc",8)) {
action = 0x501;
} else if (0 == strncmp(tok[2],"@db.domain",10)) {
action = 0x100;
}
break;
case 'v':
if (0 == strncmp(tok[2],"@db.view",8)) action = 0x200;
break;
case 'i':
if (0 == strncmp(tok[2],"@db.integ",9)) action = 0x509;
break;
case 'r':
if (0 == strncmp(tok[2],"@db.revcx",9)) {
action = 0x501;
} else if (0 == strncmp(tok[2],"@db.rev",7)) {
action = 0x505;
}
break;
case 'w':
if (0 == strncmp(tok[2],"@db.working",11)) action = 0x509;
break;
case 'c':
if (0 == strncmp(tok[2],"@db.change",10)) {
action = 0x300;
} else if (0 == strncmp(tok[2],"@db.counters",12)) {
action = 0x400;
}
break;
case 'j':
if (0 == strncmp(tok[2],"@db.job",7) ||
0 == strncmp(tok[2],"@db.jobpend",11) ||
0 == strncmp(tok[2],"@db.jobdesc",11)) action = 0x510;
break;
case 'f':
if (0 == strncmp(tok[2],"@db.fix",7) ||
0 == strncmp(tok[2],"@db.fixrev",10)) action = 0x512;
break;
default:
break;
}
special = action >> 8;
if ( 1 == special ) {
/*
* Looks like nothing is being done in this section in
* perfmerge.pl. I'll ignore it too. Hashtables are no fun in C!
*/
/*
// For db.domain, skip duplicates
if( $domained{ $_[3] } ) {
// printf("Skipping domain %s already seen.\n", tok[3]);
} else {
$domained{ $_[3] } = 1;
}
*/
} else if ( 2 == special ) {
/*
* Looks like nothing is being done in this section in
* perfmerge.pl. I'll ignore it too. Hashtables are no fun in C!
*/
/*
// For db.view, use only the first set we see.
if( $viewed{ $_[3] } && $viewed{ $_[3] } ne $ck ) {
// printf("Skipping view %s already seen.\n", tok[3]);
} else {
$viewed{ $_[3] } = $ck;
}
*/
} else if ( 3 == special ) {
// renumber change (1), key (2)
if (NULL != (cmap = changebyfile[i][ATOL(tok[3])])) {
sprintf(tbuf1, "%d", cmap->new_change);
tok[3] = tbuf1;
} else {
//fprintf(stderr, "NO CHANGE: %s\n", tok[3]);
}
if (NULL != (cmap = changebyfile[i][ATOL(tok[4])])) {
sprintf(tbuf2, "%d", cmap->new_change);
tok[4] = tbuf2;
} else {
//fprintf(stderr, "NO CHANGE: %s\n", tok[4]);
}
} else if( 4 == special ) {
// elide -- we'll output new counters.
continue;
} else if ( 5 == special ) {
job = ( action >> 4 ) & 0x0F;
change = ( action ) & 0x0F;
// Renumber changes
if ( change ) {
++change;
++change;
if (NULL != (cmap = changebyfile[i][ATOL(tok[change])])) {
sprintf(tbuf1, "%d", cmap->new_change);
tok[change] = tbuf1;
} else {
//fprintf(stderr, "NO CHANGE: %s\n", tok[change]);
}
}
// Renumber jobs
/* The perl script didn't do this. Why not? I dunno. Well...
* I'll defer to its infinite wisdom.
*/
/*
if( job ) {
++job;
++job;
sprintf(tbuf1, "%d", changebyjob[i][ATOL(tok[change])]);
tok[job] = tbuf1;
}
*/
}
// Print out the modified tokens.
j = 0;
tcnt--;
do {
fputs(tok[j++], stdout);
fputc(' ', stdout);
} while (j < tcnt);
fputs(tok[j], stdout);
fputc('\n', stdout);
} else {
// Print out the line as it was read in.
fputs(buf2, stdout);
}
// If this line has matching @@'s, we don't change first
// split the line into tokens
tcnt = 0;
for (j = 0; j++; j < MAXBUFFER && '\0' != buf2[j]) { if ('@' == buf2[j]) { tcnt++; } }
if (tcnt % 2) { first = 1 - first; };
}
fclose(in);
}
fflush(stdout);
fclose(stdout);
}