/*
** p4dbm.c
**
** Richard Geiger - Data Domain, Inc.
**
** Copyright 2004, Data DOmain, Inc.
**
** Please see the "LICENSE" file for additional terms and conditions
**
*/
/* Stage I: read a p4 journal format file, mirroring the Perforce internal
** metadata into an RDBM (mysql for now).
*/
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <ctype.h>
#include <syslog.h>
#include <mysql/mysql.h>
void usage()
{
printf("Usage: p4dbm [-Dn] [-q] [-h] [-V] < journal\n\
\n");
}
void help()
{
usage();
printf("\
-Dn debug level n\n\
-q quiet\n\
-h help\n\
-V print version\n\n");
}
/* Gratuitous Global Abuse... */
int n_input_lines = 0;
int n_input_recs = 0;
char hostname[120];
int debug = 0;
char Revision[] = "$Revision: #1 $";
char Date[] = "$Date: 2004/02/11 $";
char *Revision_p;
char *Date_p;
#define JOPSIZE 16
#define JTABSIZE 16
#define BUFSIZE 32*1024
char buffer[BUFSIZE];
#define MAXFLDSIZE 256*1024*1024 /* Maximum size of a column value */
#define MAXQRYSIZE 256*1024*1024 /* Maximum size of a query string */
#define MAXTABLE 33
#define MAXCOLS 16
#define MAXGENS 16
struct table
{
char name[JTABSIZE];
int gen;
int ncols;
int is_date[MAXCOLS];
};
struct table tables[MAXTABLE];
int ntables = 0;
void set_tabent(int i, char *name, int gen, int ncol)
{
int c;
strcpy(tables[i].name, name);
tables[i].gen = gen;
tables[i].ncols = ncol;
for (c = 0; c < MAXCOLS; c++) tables[i].is_date[c] = 0;
}
struct table *get_tabent(char *name)
{
int i;
for (i = 0; i < ntables; i++)
if (strcmp(tables[i].name, name) == 0)
return &tables[i];
fprintf(stderr, "get_tabent: no match for \"%s\".\n", name);
exit (1);
}
/* Try to keep the most heavily used near the front, for performance */
#define MARK_DATE(c) tables[ntables-1].is_date[c] = 1
void init_tables()
{
set_tabent(ntables++, "have", 1, 4);
set_tabent(ntables++, "integed", 0, 8);
set_tabent(ntables++, "rev", 3, 11); MARK_DATE(5); MARK_DATE(6);
set_tabent(ntables++, "boddate", 0, 3); MARK_DATE(2);
set_tabent(ntables++, "bodtext", 0, 3);
set_tabent(ntables++, "change", 0, 7); MARK_DATE(4);
set_tabent(ntables++, "changex", 0, 7); MARK_DATE(4);
set_tabent(ntables++, "counters", 0, 2);
set_tabent(ntables++, "depot", 0, 4);
set_tabent(ntables++, "desc", 0, 2);
set_tabent(ntables++, "domain", 3, 12); MARK_DATE(7); MARK_DATE(8);
set_tabent(ntables++, "fix", 1, 6); MARK_DATE(2);
set_tabent(ntables++, "fixrev", 1, 6); MARK_DATE(2);
set_tabent(ntables++, "group", 3, 5);
set_tabent(ntables++, "integ", 0, 9);
set_tabent(ntables++, "ixdate", 0, 3); MARK_DATE(0);
set_tabent(ntables++, "ixtext", 0, 3);
set_tabent(ntables++, "job", 0, 5); MARK_DATE(2);
set_tabent(ntables++, "jobpend", 0, 5); MARK_DATE(2);
set_tabent(ntables++, "jobdesc", 0, 2);
set_tabent(ntables++, "label", 0, 3);
set_tabent(ntables++, "locks", 1, 5);
set_tabent(ntables++, "logger", 0, 3);
set_tabent(ntables++, "message", 1, 3);
set_tabent(ntables++, "protect", 2, 7);
set_tabent(ntables++, "resolve", 0, 9);
set_tabent(ntables++, "revcx", 0, 4);
set_tabent(ntables++, "review", 0, 5);
set_tabent(ntables++, "trigger", 0, 5);
set_tabent(ntables++, "user", 2, 7); MARK_DATE(3); MARK_DATE(4);
set_tabent(ntables++, "view", 0, 5);
set_tabent(ntables++, "working", 2, 12); MARK_DATE(10);
set_tabent(ntables++, "monitor", 0, 6);
}
/* undoc-ed in the schema doc: */
/* @pv@ 0 @db.monitor@ 440 @rmg@ @user-admin@ @checkpoint@ 1067938224 0 */
struct jent
{
char op[JOPSIZE];
int gen;
char table[JTABSIZE];
char *vals[MAXCOLS];
};
int getjfield(char *to, char **from, FILE *stream)
{
char *tstart = to;
while (isspace(**from)) (*from)++;
if (isdigit(**from))
{
/* Perforce never splits number fields with newlines */
while (isdigit(**from))
*to++ = *(*from)++;
*to++ = '\0';
while (isspace(**from)) (*from)++;
return to - tstart;
}
if (**from != '@')
{
fprintf(stderr, "internal error: expected digit or '@', at \"%s\"!\n", *from);
exit(1);
}
(*from)++;
/* Now scan to the end of the '@'-delimited field */
while (1)
{
/*
** First, check to see if we've exhausted this line of the journal;
** if so, get another:
*/
if (! **from)
{
if (! fgets(buffer, BUFSIZE, stream))
{
fprintf(stderr, "unexpected EOF looking for '@'!\n");
exit(1);
}
n_input_lines++;
*from = buffer;
}
if (**from == '@')
{
if (*((*from)+1) == '@')
{
*to++ = '@';
(*from) += 2;
continue;
}
*to++ = '\0';
(*from)++;
return to - tstart;
}
*to++ = *(*from)++;
}
}
struct jent *fgetjent(FILE *stream)
{
struct jent *j = NULL;
char *bpt;
char *s, *d;
char agen[8];
struct table *t;
int i;
int ncols;
if (! fgets(buffer, BUFSIZE, stream))
return (struct jent *)0;
n_input_lines++;
bpt = buffer;
if ((j = malloc(sizeof(struct jent))) == NULL)
{ perror("fgetjent(): malloc1"); exit(1); }
(void)getjfield(j->op, &bpt, stream);
getjfield(agen, &bpt, stream);
j->gen = atoi(agen);
getjfield(j->table, &bpt, stream);
d = j->table;
s = d+3;
while (*s) *d++ = *s++;
*d = '\0';
t = get_tabent(j->table);
ncols = t->ncols;
for (i = 0; i < ncols; i++)
{
char bigbuf[MAXFLDSIZE];
int size;
size = getjfield(bigbuf, &bpt, stream);
if (! (j->vals[i] = malloc(size)))
{ perror("fgetjent(): malloc2"); exit(1); }
strcpy(j->vals[i], bigbuf);
}
return j;
}
void free_jent(struct jent *j)
{
int i;
struct table *t;
t = get_tabent(j->table);
for (i = 0; i < t->ncols; i++)
free(j->vals[i]);
free(j);
}
void dump_jent(struct jent *j)
{
int i;
struct table *t;
t = get_tabent(j->table);
fprintf(stderr, "%s %s %d ", j->op, j->table, j->gen);
for (i = 0; i < t->ncols; i++)
fprintf(stderr, "<%s> ", j->vals[i]);
fprintf(stderr, "\n\n");
}
char *strtac(char *t, char *f)
{
while ((*t++ = *f++));
return --t;
}
char *qstrtac(char *t, char *f)
{
*t++ = '"';
while (*f)
{
if (*f == '"' || *f == '\\') *t++ = '\\';
*t++ = *f++;
}
*t++ = '"';
*t = '\0';
return t;
}
void insert_jent(MYSQL *mysql, struct jent *j)
{
int i;
struct table *t;
char query[MAXQRYSIZE];
char *q = query;
/* Select the record types we're interested in mirroring... */
q = strtac(q, "REPLACE _");
q = strtac(q, j->table);
q = strtac(q, " VALUES (");
t = get_tabent(j->table);
for (i = 0; i < t->ncols; i++)
{
if (i > 0) q = strtac(q, ", ");
if (t->is_date[i])
{
q = strtac(q, "FROM_UNIXTIME(");
q = strtac(q, j->vals[i]);
q = strtac(q, ")");
}
else
q = qstrtac(q, j->vals[i]);
}
/* Any special treatment for user-added columns here: */
if (strcmp("fix", j->table) == 0)
q = strtac(q, ", \"\", \"\"");
q = strtac(q, ")");
fprintf(stderr, "> %s\n", query);
if (mysql_query(mysql, query))
{
fprintf(stderr, "insert_jent(): insert failed: <%s>\n", query);
exit(1);
}
}
int main(int argc, char **argv)
{
int daemonize = 0;
int quiet = 0;
int opt;
char *p;
struct jent *j;
MYSQL *mysql;
gethostname(hostname, 120);
/* Isolate the bits of revision/date we are interested in: */
Revision_p = Revision;
while (*Revision_p && *Revision_p != '#') Revision_p++;
p = Revision_p;
while (*p && *p != ' ') p++;
*p = '\0';
Date_p = Date;
while (*Date_p && *Date_p != ' ') Date_p++;
Date_p++;
p = Date_p;
while (*p && *p != ' ') p++;
*p = '\0';
while ((opt = getopt(argc, argv, "dD:cqhV")) > -1)
{
switch (opt)
{
case 'D':
debug = atoi(optarg);
break;
case 'd':
daemonize = 1;
break;
case 'q':
quiet = 1;
break;
case 'h':
help();
exit(0);
break;
case 'V':
printf("revision %s (%s)\n", Revision_p, Date_p);
exit(0);
default:
usage();
exit(1);
}
}
openlog("p4dbm", LOG_PID|LOG_PERROR, LOG_USER);
init_tables();
// if (! quiet)
// syslog(LOG_INFO, "starting p4dbm\n");
if (daemonize) daemon(1, 1);
mysql = mysql_init(0);
if (! mysql_real_connect(mysql, "localhost", "root", "", "p4dbm", 0, NULL, 0))
{
fprintf(stderr, "main(): mysql_connect() failed.\n");
exit(1);
}
while ((j = fgetjent(stdin)))
{
// dump_jent(j);
insert_jent(mysql, j);
free_jent(j);
if ((++n_input_recs % 1000) == 0)
fprintf(stderr, "\r%d", n_input_recs);
}
fprintf(stderr, "\n\nn_input_lines = %d\n", n_input_lines);
fprintf(stderr, "n_input_recs = %d\n", n_input_recs);
mysql_close(mysql);
exit(0);
}