hcache.c #8

/*
 * This file has been donated to Jam.
 */

# include "jam.h"
# include "lists.h"
# include "parse.h"
# include "rules.h"
# include "regexp.h"
# include "headers.h"
# include "newstr.h"
# include "hash.h"
# include "hcache.h"
# include "variable.h"
# include "search.h"

#ifdef OPT_HEADER_CACHE_EXT

/*
 * Craig W. McPheeters, Alias|Wavefront.  Nov/2001.
 * Jan/2002.  Extensions by Matt Armstrong.
 *    See the file README.header_scan_cache for details on the extensions.
 * Jan/2002.  Modification to extensions by Craig.
 *
 * hcache.c, hcache.h - handle cacheing of #includes in source files
 *
 * Create a cache of files scanned for headers.	 When starting jam,
 * look for the cache file and load it if present.  When finished the
 * binding phase, create a new header cache.  The cache contains
 * files, their timestamps and the header files found in their scan.
 * During the binding phase of jam, look in the header cache first for
 * the headers contained in a file.  If the cache is present and
 * valid, use its contents.  This can result in dramatic speedups on
 * large projects (eg. 3min -> 1min startup on one project.)
 *
 * External routines:
 *    hcache_init() - read and parse the local .jamdeps file.
 *    hcache_done() - write a new .jamdeps file
 *    hcache() - return list of headers on target.  Use cache or do a scan.
 *    
 * The dependency file format is an ascii file with 1 line per target.
 * Each line has the following fields:
 * @boundname@ timestamp age num @file@ @file@ num @hdrscan@ @hdrscan@ ... \n
 *   where the first number is the number of headers, and the second is the
 *   number of elements in the hdrscan list.  
 *
 * Filenames may contain any ascii or non-ascii characters.  If they
 * contain the '@' or '#' characters, they are quoted on output and
 * their quoting is handled on input.  Often the '\' character is used
 * for quoting, but as that is so common in NT pathnames, the '#'
 * character is used instead. Both '@' and '#' are characters
 * disallowed in Perforce filenames - and they should be rare in other
 * SCM systems hopefully.  CWM.
 */

struct hcachedata {
    char		*boundname;
    time_t		time;
    LIST		*includes;
    LIST		*hdrscan; /* the HDRSCAN value for this target */
    int			age;	  /* if too old, we'll remove it from cache */
    struct hcachedata	*next;
} ;

typedef struct hcachedata HCACHEDATA ;


static struct hash *hcachehash = 0;
static HCACHEDATA  *hcachelist = 0; 

static int queries = 0;
static int hits = 0;

#define CACHE_FILE_VERSION "version 1"

/*
 * Return the name of the header cache file.  May return NULL.
 *
 * The user sets this by setting the HCACHEFILE variable in a Jamfile.
 * We cache the result so the user can't change the cache file during
 * header scanning.
 */
static char*
cache_name(void)
{
    static char *name = 0;
    if( !name ) {
	LIST *hcachevar = var_get( "HCACHEFILE" );

	if( hcachevar ) {
	    TARGET *t = bindtarget( hcachevar->string );

	    pushsettings( t->settings );
	    t->boundname = search( t->name, &t->time );
	    popsettings( t->settings );

	    name = copystr( t->boundname );
	}
    }
    return name;
}

/*
 * Return the maximum age a cache entry can have before it is purged
 * from the cache.
 *
 * A maxage of 0 indicates that the cache entries should never be
 * purged, in effect disabling the aging of cache entries.
 */
static int
cache_maxage(void)
{
    int age = 100;
    LIST *var = var_get( "HCACHEMAXAGE" );

    if( var ) {
	age = atoi( var->string );
	if( age < 0 )
	    age = 0;
    }

    return age;
}

/*
 * Read any spaces we're on.  Return the first non-space character
 */
static int
skip_spaces( FILE *f )
{
    int ch = fgetc( f );

    while( ch == ' ' )
	ch = fgetc( f );

    return ch;
}

/*
 * Read a string from the file.	 Handle quoted characters.  The
 * returned value is as returned by newstr(), so it need not be freed.	
 */
static char *
read_string( FILE *f )
{
    int ch, i = 0;
    char filename[ MAXJPATH ];
    
    ch = skip_spaces( f );
    if( ch != '@' )
	return 0;

    ch = fgetc( f );
    while( ch != '@' && ch != EOF && i < MAXJPATH ) {
	if( ch == '#' ) /* Quote */
	    filename[ i++ ] = fgetc( f );
	else
	    filename[ i++ ] = ch;
	ch = fgetc( f );
    }

    if( ch != '@' )
	return 0;

    filename[ i ] = 0;
    return newstr( filename );
}

static int
read_int( FILE *f )
{
    int	 ch, i = 0;
    char num[ 30 ];

    ch = skip_spaces( f );
    while( ch >= '0' && ch <= '9' ) {
	num[ i++ ] = ch;
	ch = fgetc( f );
    }
    num[ i ] = 0;

    return atoi( num );
}

static void 
write_string( FILE *f, const char *s )
{
    int i = 0;

    fputc( '@', f );
    while( s[ i ] != 0 ) {
	if( s[ i ] == '@' || s[ i ] == '#' )
	    fputc( '#', f ); /* Quote */
	fputc( s[ i++ ], f );
    }
    fputs( "@ ", f );
}

static void
write_int( FILE *f, int i )
{
    fprintf( f, "%d ", i );
}

void
hcache_init(void)
{
    HCACHEDATA	cachedata, *c, *last = 0;
    FILE	*f;
    int		bad_cache = 1, ch;
    char	*version, *hcachename;

    hcachehash = hashinit( sizeof( HCACHEDATA ), "hcache" );

    if( ! (hcachename = cache_name()) )
	return;

    if( ! (f = fopen( hcachename, "rb" )) )
	return;

    version = read_string( f );
    ch = fgetc( f );
    if (!version || strcmp( version, CACHE_FILE_VERSION ) || ch != '\n' ) {
	goto bail;
    }
    
    for(;;) {
	int i, count, ch;
	LIST *l;

	c = &cachedata;

	c->boundname = read_string( f );
	if( !c->boundname ) /* Test for eof */
	    break;
	
	c->time = read_int( f );
	c->age = read_int( f ) + 1; /* we're getting older... */

	if( !c->boundname )
	    goto bail;

	/* headers */
	count = read_int( f );
	for( l = 0, i = 0; i < count; ++i ) {
	    char *s = read_string( f );
	    if( !s )
		goto bail;
	    l = list_new( l, s );
	}
	c->includes = l;

	/* hdrscan */
	count = read_int( f );
	for( l = 0, i = 0; i < count; ++i ) {
	    char *s = read_string( f );
	    if( !s )
		goto bail;
	    l = list_new( l, s );
	}
	c->hdrscan = l;

	/* Read the newline */
	ch = skip_spaces( f );
	if( ch != '\n' )
	    goto bail;

	if( !hashenter( hcachehash, (HASHDATA **)&c ) ) {
	    printf( "jam: can't insert header cache item, bailing on %s\n",
		    hcachename );
	    goto bail;
	}

	c->next = 0;
	if( last )
	    last->next = c;
	else
	    hcachelist = c;
	last = c;
    }

    bad_cache = 0;

    if( DEBUG_HEADER )
	printf( "hcache read from file %s\n", hcachename );

 bail:
    /* If its bad, no worries, it'll be overwritten in hcache_done() */
    if( bad_cache )
	printf( "jam: warning: the cache was invalid\n", hcachename );

    fclose( f );
}

void
hcache_done(void)
{
    FILE	*f;
    HCACHEDATA	*c;
    int		header_count = 0;
    char*	hcachename;
    int		maxage;
    
    if( !hcachehash )
	return;

    if( ! (hcachename = cache_name()) )
	return;

    if( ! (f = fopen( hcachename, "wb" ) ) )
	return;

    maxage = cache_maxage();

    /* print out the version */
    fprintf( f, "@%s@\n", CACHE_FILE_VERSION );

    c = hcachelist;
    for( c = hcachelist; c; c = c->next ) {
	LIST	*l;

	if( maxage == 0 )
	    c->age = 0;
	else if( c->age > maxage )
	    continue;

	write_string( f, c->boundname );
	write_int( f, c->time );
	write_int( f, c->age );

	write_int( f, list_length( c->includes ) );
	for( l = c->includes; l; l = list_next( l ) ) {
	    write_string( f, l->string );
	}

	write_int( f, list_length( c->hdrscan ) );
	for( l = c->hdrscan; l; l = list_next( l ) ) {
	    write_string( f, l->string );
	}

	fputc( '\n', f );
	++header_count;
    }

    if( DEBUG_HEADER )
	printf( "hcache written to %s.	 %d dependencies, %.0f%% hit rate\n",
	       hcachename, header_count,
	       queries ? 100.0 * hits / queries : 0 );

    fclose( f );
}

LIST *
hcache( TARGET *t, int rec, regexp *re[], LIST *hdrscan )
{
    HCACHEDATA	cachedata, *c = &cachedata;
    LIST	*l = 0;
    int		use_cache = 1;

    ++queries;

    c->boundname = t->boundname;

    if( hashcheck( hcachehash, (HASHDATA **) &c ) )
    {
	if( c->time == t->time )
	{
	    LIST *l1 = hdrscan, *l2 = c->hdrscan;
	    while( l1 && l2 ) {
		if( l1->string != l2->string ) {
		    l1 = 0;
		} else {
		    l1 = list_next( l1 );
		    l2 = list_next( l2 );
		}
	    }
	    if( l1 || l2 )
		use_cache = 0;
	}
	else
	    use_cache = 0;

	if( use_cache ) {
	    if( DEBUG_HEADER )
		printf( "using header cache for %s\n", t->boundname );
	    c->age = 0; /* The entry has been used, its young again */
	    ++hits;
	    l = list_copy( 0, c->includes );
	    return l;
	}
	else {
	    if( DEBUG_HEADER )
		printf( "header cache out of date for %s\n", t->boundname );
	    list_free( c->includes );
	    list_free( c->hdrscan );
	    c->includes = 0;
	    c->hdrscan = 0;
	}
    } else {
	if( hashenter( hcachehash, (HASHDATA **)&c ) ) {
	    c->boundname = newstr( c->boundname );
	    c->next = hcachelist;
	    hcachelist = c;
	}
    }

    /* 'c' points at the cache entry.  Its out of date. */

    l = headers1( 0, t->boundname, rec, re );

    c->time = t->time;
    c->age = 0;
    c->includes = list_copy( 0, l );
    c->hdrscan = list_copy( 0, hdrscan );

    return l;
}

#endif

#	Change	User	Description
#8	1250	Craig Mcpheeters	Incorporating some changes after a review by Matt I've put the initial version in the cache file back. I'm starting it at version 1. Matt's version is 4, but there shouldn't be a confict as the format of the version string is slightly different, his program won't interpret this version/file as valid Restored the initial age to 0 from 1. With the version in the file, I no longer need to worry about reading old cache files from my earlier versions of Jam Tabified the file, so its a little closer to the Jam coding standards A few tweaks, here and there
#7	1232	Craig Mcpheeters	Clarified the error on invalid cache - there is no need to delete it
#6	1231	Craig Mcpheeters	Oops, forgot about NT. Changed the quote character from '\' to '#'.
#5	1230	Craig Mcpheeters	This contains alterations to Matt's earlier modifications. This version has the benefits of Matt's version, with the cache being smaller and easier to parse by external programs. I've gone back to the format where there is one line per boundname in the cache file. My original implementation had fixed size buffers. This version benefits from Matt's rewriting of the input loops so more care is taken on error handling and in building the lists name by name. Matt had used a '%d\t%s\n' format for writing out strings which contain arbitrary characters. The only special character in my original implementation was the '@' character. If its seen, its now quoted with a '\', and this is handled on input. This produces slightly smaller output Matt's version could handle very large filenames, this version is restricted to the internal jam filename length, this is used elsewhere in the program (MAXJPATH) The initial age of a cache line is now 1 rather than 0. This allows us to catch some errors by atoi() returning 0 when given a non-integer I modified the global cache list handling, so the order its unchanged between two invocations of Jam. This allows easier diff'ing to see what's going on
#4	1229	Craig Mcpheeters	Modified the Jamfile to use the header cache if the feature is on A couple minor fixes, in cache_name() and hcache_init() hcache_init(). If the cache is bad, unlink the file. Avoid use of stderr, have all output go to stdout hcache(), re-organized the do/do-not use logic to avoid duplicated code
#3	1228	Craig Mcpheeters	Syntax changes to bring both my original implementation, and Matt's changes in line with what I think are the Jam coding standards.
#2	1227	Craig Mcpheeters	These are the enhancements that Matt Armstrong made to the header cache code. See their original copies in: //guest/matt_armstrong/jam/hdrscan_cache The file headers.c has been modified to incorporate the other extensions from my branch, the other three files are unaltered in this return.
#1	1226	Craig Mcpheeters	Created a branch which will be used to integrate changes from Matt Armstrong's copy of Jam.
//guest/craig_mcpheeters/jam/src/hcache.c
#2	1101	Craig Mcpheeters	Added a percent done extension Updated the hcache code to be ansi now that Jam is
#1	1023	Craig Mcpheeters	Integration from //guest/craig_mcpheeters/work/jam/src/... This return incorporates all of the Alias\|Wavefront extensions to Jam, into an area which is a proper branch of the Jam mainline. An integration of these files into the Jam mainline will show all of the differences. There are several extensions to Jam in this return. Look at the new file Jamfile.config for an explanation of the extensions, and how to compile them into your own copy of Jam. If you want to build a copy of Jam with all of the extensions, do this: jam -sAllOptions=1 Read the config file for more info. The extensions range from minor output tweaks and simple fixes to more major things like a header cache, serialization of output from multiple jobs, dynamic command block sizing These are all offered without warranty, etc.
//guest/craig_mcpheeters/work/jam/src/hcache.c
#2	785	Craig Mcpheeters	Integration from //guest/craig_mcpheeters/jam/src/... This work area now contains both the Alias\|Wavefront changes, and the latest integrations from the mainline. There are some changes in this area which shouldn't be merged back into the mainline. As I merge this branch back into my jam/src/... branch, I'll leave out a few of the changes.
#1	782	Craig Mcpheeters	Initial return of the Alias\|Wavefront mods to jam 2.2. I made a stab at a configuration system - see the file Jamfile.config. Most of the mods are now enclosed within #ifdef blocks, which kind of pollutes the code, but may make it easier to accept or reject some of these changes. Some of these #ifdefs could disappear completely if they are accepted into the mainline This return implements the following extensions: * header cache * dynamic command block size, allowing for large commands * slightly improved warnings and errors * serial output from jam - nice when working with multiple jobs * an extension to variable modifiers: $(>:/) and $(>:\\) * ability to ignore header dependencies for a build (jam -p) * new debug level, -d+10, which outputs the dependency graph * added no-care support to internal nodes. if they fail, dependents are built anyway * time stamps on output * a few minor output modifications * a fix for nt batch file names conflicing when more than one jam runs at a time Each of the above can be enabled/disabled on the command line. For example, to turn on the HeaderCache code: jam -sHeaderCache=1 that is, build jam first, then use that jam to build a new one with the options you want. Some of these changes may have been made in the mainline already, my next step will be to integrate the mainline changes into these ones This return isn't yet ready for prime-time