expand.c #2

/*
 * Copyright 1993, 1995 Christopher Seiwald.
 *
 * This file is part of Jam - see jam.c for Copyright information.
 */

# include "jam.h"
# include "lists.h"
# include "variable.h"
# include "expand.h"
# include "filesys.h"
# include "newstr.h"
# include "regexp.h"

/*
 * expand.c - expand a buffer, given variable values
 *
 * External routines:
 *
 *     var_expand() - variable-expand input string into list of strings
 *
 * Internal routines:
 *
 *     var_edit() - copy input target name to output, performing : modifiers
 *     var_mods() - parse : modifiers into FILENAME structure
 *
 * 01/25/94 (seiwald) - $(X)$(UNDEF) was expanding like plain $(X)
 * 04/13/94 (seiwald) - added shorthand L0 for null list pointer
 *
 * Changed by pjh@unisoft.com to support:
 * :X modifier that converts the suffix character into
 * the file delimiter.
 * :E which applies a regular expression substitution
 * on the string.
 */

static void	var_edit();

/* changed from void to int */
static int	var_mods();

# define MAGIC_COLON	'\001'
# define MAGIC_LEFT	'\002'
# define MAGIC_RIGHT	'\003'

/*
 * var_expand() - variable-expand input string into list of strings
 *
 * Would just copy input to output, performing variable expansion, 
 * except that since variables can contain multiple values the result
 * of variable expansion may contain multiple values (a list).  Properly
 * performs "product" operations that occur in "$(var1)xxx$(var2)" or
 * even "$($(var2))".
 *
 * Returns a newly created list.
 */

LIST *
var_expand( l, in, end, lol, cancopyin )
LIST	*l;
char	*in;
char	*end;
LOL	*lol;
int	cancopyin;
{
	char out_buf[ MAXSYM ];
	char *out = out_buf;
	char *inp = in;
	char *ov;		/* for temp copy of variable in outbuf */
	int depth;

	if( DEBUG_VAREXP )
	    printf( "expand '%.*s'\n", end - in, in );

	/* This gets alot of cases: $(<) and $(>) */

	if( in[0] == '$' && in[1] == '(' && in[3] == ')' && !in[4] )
	{
	    switch( in[2] )
	    {
	    case '1':
	    case '<':
		return list_copy( l, lol_get( lol, 0 ) );

	    case '2':
	    case '>':
		return list_copy( l, lol_get( lol, 1 ) );
	    }
	}

	/* Just try simple copy of in to out. */

	while( in < end )
	    if( ( *out++ = *in++ ) == '$' && *in == '(' ) 
		goto expand;

	/* No variables expanded - just add copy of input string to list. */

	/* Cancopyin is an optimization: if the input was already a list */
	/* item, we can use the copystr() to put it on the new list. */
	/* Otherwise, we use the slower newstr(). */

	*out = '\0';

	if( cancopyin )
	    return list_new( l, copystr( inp ) );
	else
	    return list_new( l, newstr( out_buf ) );

    expand:
	/*
	 * Input so far (ignore blanks):
	 *
	 *	stuff-in-outbuf $(variable) remainder
	 *			 ^	             ^
  	 *			 in		     end
	 * Output so far:
	 *
	 *	stuff-in-outbuf $
	 *	^	         ^
	 *	out_buf          out
	 *
	 *
	 * We just copied the $ of $(...), so back up one on the output.
	 * We now find the matching close paren, copying the variable and
	 * modifiers between the $( and ) temporarily into out_buf, so that
	 * we can replace :'s with MAGIC_COLON.  This is necessary to avoid
	 * being confused by modifier values that are variables containing
	 * :'s.  Ugly.
	 */

	depth = 1;
	out--, in++;
	ov = out;

	while( in < end && depth )
	{
	    switch( *ov++ = *in++ )
	    {
	    case '(': depth++; break;
	    case ')': depth--; break;
	    case ':': ov[-1] = MAGIC_COLON; break;
	    case '[': ov[-1] = MAGIC_LEFT; break;
	    case ']': ov[-1] = MAGIC_RIGHT; break;
	    }
	}

	/* Copied ) - back up. */

	ov--;

	/*
	 * Input so far (ignore blanks):
	 *
	 *	stuff-in-outbuf $(variable) remainder
	 *			            ^        ^
	 *			            in       end
	 * Output so far:
	 *
	 *	stuff-in-outbuf variable
	 *	^	        ^       ^
	 *	out_buf         out	ov
	 *
	 * Later we will overwrite 'variable' in out_buf, but we'll be
	 * done with it by then.  'variable' may be a multi-element list, 
	 * so may each value for '$(variable element)', and so may 'remainder'.
	 * Thus we produce a product of three lists.
	 */

	{
	    LIST *variables = 0;
	    LIST *remainder = 0;
	    LIST *vars;

	    /* Recursively expand variable name & rest of input */

	    if( out < ov )
		variables = var_expand( L0, out, ov, lol, 0 );
	    if( in < end )
		remainder = var_expand( L0, in, end, lol, 0 );

	    /* Now produce the result chain */

	    /* For each variable name */

	    for( vars = variables; vars; vars = list_next( vars ) )
	    {
		LIST *value;
		char *colon;
		char *bracket;
		char varname[ MAXSYM ];
		int i, sub1, sub2;

		/* Look for a : modifier in the variable name */
		/* Must copy into varname so we can modify it */

		strcpy( varname, vars->string );

		if( colon = strchr( varname, MAGIC_COLON ) )
		    *colon = '\0';

		if( bracket = strchr( varname, MAGIC_LEFT ) )
		{
		    char *dash;

		    if( dash = strchr( bracket + 1, '-' ) )
		    {
			*dash = '\0';
			sub1 = atoi( bracket + 1 );
			sub2 = atoi( dash + 1 );
		    }
		    else
		    {
			sub1 = sub2 = atoi( bracket + 1 );
		    }

		    *bracket = '\0';
		}
		else
		{
		    sub1 = sub2 = 0;	/* not needed */
		}

		/* Get variable value, specially handling $(<), $(>), $(n) */
		
		if( varname[0] == '<' && !varname[1] )
		{
		    value = lol_get( lol, 0 );
		}
		else if( varname[0] == '>' && !varname[1] )
		{
		    value = lol_get( lol, 1 );
		}
		else if( varname[0] >= '1' && varname[0] <= '9' && !varname[1] )
		{
		    value = lol_get( lol, varname[0] - '1' );
		}
		else 
		{
		    value = var_get( varname );
		}

		/* The fast path: $(x) - just copy the variable value. */

		if( out == out_buf && !bracket && !colon && in == end )
		{
		    l = list_copy( l, value );
		    continue;
		}

		/* For each variable value */

		for( i = 1; value; i++, value = list_next( value ) )
		{
		    LIST *rem;
		    char *out1;

		    /* Skip members not in subscript */

		    if( bracket && ( i < sub1 || sub2 && i > sub2 ) )
			continue;

		    /* Apply : mods, if present */

		    if( colon )
			var_edit( value->string, colon + 1, out );
		    else
			strcpy( out, value->string );

		    /* If no remainder, append result to output chain. */

		    if( in == end )
		    {
			l = list_new( l, newstr( out_buf ) );
			continue;
		    }

		    /* Remember the end of the variable expansion so */
		    /* we can just tack on each instance of 'remainder' */

		    out1 = out + strlen( out );

		    /* For each remainder, or just once if no remainder, */
		    /* append the complete string to the output chain */

		    for( rem = remainder; rem; rem = list_next( rem ) )
		    {
			strcpy( out1, rem->string );
			l = list_new( l, newstr( out_buf ) );
		    }
		}
	    }

	    /* variables & remainder were gifts from var_expand */
	    /* and must be freed */

	    if( variables )
		list_free( variables );
	    if( remainder)
		list_free( remainder );

	    if( DEBUG_VAREXP )
	    {
		printf( "expanded to " );
		list_print( l );
		printf( "\n" );
	    }

	    return l;
	}
}

/*
 * var_edit() - copy input target name to output, performing : modifiers
 */

/* Structure changed */
typedef struct {
	char	downshift;	/* :L -- downshift result */
	char	upshift;	/* :U -- upshift result */
	char	dotzap;		/* :X -- convert . to / */
	char	parent;		/* :P -- go to parent directory */
	char	*subs;		/* :E -- regular expression */
	int	subslen;
} VAR_ACTS ;

/* added */
# ifdef unix
# define DELIM '/'
# else
# define DELIM '\\'
# endif

/* added */
static int re_substitute();

static void
var_edit( in, mods, out )
char	*in;
char	*mods;
char	*out;
{
	FILENAME old, new;
	VAR_ACTS acts;
	int fileparsed = 0;

	/* Parse apart modifiers, putting them into "new" */

	/*
	 * changed.
	 * Now returns 1 if a filemode has been applied,
	 * and 0 if we just want a re substitution.
	 * This is so a re on a string that ends with a "/"
	 * will still work. The trailing "/" gets discarded
	 * otherwise.
	 */

	if( fileparsed = var_mods( mods, &new, &acts ) )
	{

	    /* Parse apart original filename, putting parts into "old" */

	    file_parse( in, &old );

	    /* Replace any old with new */

	    if( new.f_grist.ptr )
		old.f_grist = new.f_grist;

	    if( new.f_root.ptr )
		old.f_root = new.f_root;

	    if( new.f_dir.ptr )
		   old.f_dir = new.f_dir;

	    if( new.f_base.ptr )
		old.f_base = new.f_base;

	    if( new.f_suffix.ptr )
		old.f_suffix = new.f_suffix;

	    if( new.f_member.ptr )
		old.f_member = new.f_member;

	    /* If requested, modify old to point to parent */

	    if( acts.parent )
		file_parent( &old );
	}

	/*
	 * if we have a regex, then copy
	 * the file into some tempspace first
	 * This is a change to the original file.
	 */

	if( acts.subs )
	{
	    char *re_ptr;
	    if( fileparsed )
	    {
		char re_buf[ MAXSYM ];
		file_build( &old, re_buf, 0);
		re_ptr = re_buf;
	    }
	    else
	    {
		/*
		 * just doing a re substitution.
		 */
		re_ptr = in;
	    }

	    /*
	     * now call the re code to apply the
	     * result into out.
	     */
	    if( re_substitute( re_ptr, out, MAXSYM, acts.subs, acts.subslen ) )
	    {
		/*
		 * Non-zero means it failed, so copy stuff
		 * anyway.
		 */
		strcpy( out, re_ptr );
	    }
	}
	else
	{
	    if( fileparsed )
	    {
		/* Put filename back together */
		file_build( &old, out, 0 );
	    }
	    else
	    {
		/*
		 * This can only happen if
		 * one of the modifiers is
		 * not recognised.
		 */
		strcpy(out, in);
	    }
	}

	/* Handle upshifting, downshifting now */

	if( acts.upshift )
	{
	    for( ; *out; ++out )
		*out = toupper( *out );
	}
	else if( acts.downshift )
	{
	    for( ; *out; ++out )
		*out = tolower( *out );
	}

	/* Change to original file */

	else if ( acts.dotzap )
	{
	    for( ; *out; ++out )
		if (*out == '.') *out = DELIM ;
	}

}

/*
 * var_mods() - parse : modifiers into FILENAME structure
 *
 * The : modifiers in a $(varname:modifier) currently support replacing
 * or omitting elements of a filename, and so they are parsed into a 
 * FILENAME structure (which contains pointers into the original string).
 *
 * Modifiers of the form "X=value" replace the component X with
 * the given value.  Modifiers without the "=value" cause everything 
 * but the component X to be omitted.  X is one of:
 *
 *	G <grist>
 *	D directory name
 *	B base name
 *	S .suffix
 *	M (member)
 *	R root directory - prepended to whole path
 *	X change the suffix into the delimiter.
 *	E apply a regular expression.
 *
 * This routine sets:
 *
 *	f->f_xxx.ptr = 0
 *	f->f_xxx.len = 0
 *		-> leave the original component xxx
 *
 *	f->f_xxx.ptr = string
 *	f->f_xxx.len = strlen( string )
 *		-> replace component xxx with string
 *
 *	f->f_xxx.ptr = ""
 *	f->f_xxx.len = 0
 *		-> omit component xxx
 *
 * var_edit() above and file_build() obligingly follow this convention.
 */

/* routine changed */
static int
var_mods( mods, f, acts )
char		*mods;
FILENAME	*f;
VAR_ACTS	*acts;
{
	char *flags = "GRDBSM";
	int filemodseen = 0;
	int havezeroed = 0;
	memset( (char *)f, 0, sizeof( *f ) );
	memset( (char *)acts, 0, sizeof( *acts ) );

	while( *mods )
	{
	    char *fl;
	    struct filepart *fp;

	    /* First take care of :U or :L (upshift, downshift) */

	    if( *mods == 'L' )
	    {
		acts->downshift = 1;
		filemodseen = 1;
		++mods;
		continue;
	    }
	    else if( *mods == 'U' )
	    {
		acts->upshift = 1;
		filemodseen = 1;
		++mods;
		continue;
	    }
	    else if( *mods == 'P' )
	    {
		acts->parent = 1;
		filemodseen = 1;
		++mods;
		continue;
	    }
	    else if( *mods == 'X' )
	    {
		/* changed */
		acts->dotzap = 1;
		filemodseen = 1;
		++mods;
		continue;
	    }

	    if( *mods == 'E' )
	    {
		/*
		 * added.
		 * Mark the fact that we have a re
		 * by setting fp to null.
		 */
		fp = NULL;
		mods++;
	    }
	    else
	    {
		/* Now handle the file component flags */

		if( !( fl = strchr( flags, *mods++ ) ) )
		    break;	/* should complain, but so what... */
		/* changed */
		filemodseen = 1;
		fp = &f->part[ fl - flags ];
	    }

	    if( *mods++ != '=' )
	    {
		/* :X - turn everything but X off */

		int i;

		mods--;
		/* changed */
		if( fp == NULL )
		    continue;

		if( !havezeroed++ )
		    for( i = 0; i < 6; i++ )
		{
		    f->part[ i ].len = 0;
		    f->part[ i ].ptr = "";
		}

		fp->ptr = 0;
	    }
	    else
	    {
		/* :X=value - set X to value */

		char *p;

		if( p = strchr( mods, MAGIC_COLON ) )
		{
		    /* changed */
		    if( fp )
		    {
			    fp->ptr = mods;
			    fp->len = p - mods;
		    }
		    else
		    {
			    acts->subs = mods;
			    acts->subslen = p - mods;
		    }
		    mods = p + 1;
		}
		else
		{
		    /* changed */
		    int len = strlen(mods);
		    if(fp)
		    {
			    fp->ptr = mods;
			    fp->len = len;
		    }
		    else
		    {
			    acts->subs = mods;
			    acts->subslen = len;
		    }
		    mods += len;
		}
	    }
	}

	/* changed */
	return( filemodseen );

}

/*
** Find the next unescaped delim char, and copy
** that string to 'out'
** If we find the delim, they return where we got to
** (pointing at it), otherwise return 0 to indicate failure.
*/


static char *
findelim(out, outlen, start, end, delim)
char *out;
int outlen;
char *start;
char *end;
int delim;
{
	int esc = 0;
	char *badout = out + outlen;
	while((start < end) && (out < badout))
	{
		if(*start == '\\')
		{
			if(start + 1 < end)
			{
				if(*(start + 1) == delim)
				{
					start++;
				}
			}
		}
		else
		{
			if(*start == delim)
			{
				*out = 0;
				return(start);
			}
		}
		*out++ = *start++;
	}
	return(0);
}


/*
** Perform the re matching and substitution.
** Output the result to 'out'.
** The re string is 'regstr'
** The replacment string is 'regrep'
** If g is true then repeat the replacement, a
** global substitute.
**/
static int
re_dosub(in, out, olen, regstr, repstr, g)
char *in;
char *out;
int olen;
char *regstr;
char *repstr;
int g;
{
	regexp *re;
	char *rep;
	int n;
	char *repb;
	char *badout;
	static char *saveregstr = 0;
	static regexp *savere = 0;


	badout = out + olen;
	/*
	** do we have a saved re ?
	** If so see if the string is the same.
	*/


	if(saveregstr && (strcmp(saveregstr, regstr) == 0))
	{
		re = savere;
	}
	else
	{
		/*
		** have new string, compile it
		*/
		if((re = regcomp(regstr)) == NULL)
		{
			printf("regcomp failed\n");
			return(3);
		}
		/*
		** the regcomp worked, so store the result.
		*/
		if(saveregstr)
		{
			freestr(saveregstr);
			free(savere);
		}
		saveregstr = newstr(regstr);
		savere = re;
	}
	while(regexec(re, in))
	{
		int esc = 0;


		n = re->startp[0] - in;
		/*
		** If we don't have enough room to copy
		** out the result, then fail quietly.
		*/
		if(out + n >= badout)
		{
			return(4);
		}
		strncpy(out, in, n);
		/* move out to where to write next */
		out += n;
		/* move in to where to match next */
		in = re->endp[0];


		/*
		** now output the replacement string.
		*/


		for(repb = rep = repstr;*rep;rep++)
		{
			int ch;
			ch = *rep;
			if(esc)
			{
				esc = 0;
				/*
				** Look for sub expression matches,
				** like \1 or \2.
				*/
				if(ch >= '0' && ch <= '9')
				{
					int x = ch - '0';
					n = re->endp[x] - re->startp[x];
					if(out + n >= badout)
					{
						return(4);
					}
					strncpy(out, re->startp[x],n);
					out += n;
					repb = rep + 1; /* skip this char */
				}
				continue;
			}
			/*
			** whole pattern replacement.
			*/
			if(ch == '&')
			{
				n = rep - repb;
				if(n)
				{
					if(out + n >= badout)
					{
						return(4);
					}
					strncpy(out, repb, n);
					out+=n;
				}
				n = re->endp[0] - re->startp[0];
				if(out + n >= badout)
				{
					return(4);
				}
				strncpy(out, re->startp[0],n);
				out += n;
				repb = rep + 1;
				continue;
			}
			if(ch == '\\')
			{
				n = rep - repb;
				if(n)
				{
					if(out + n >= badout)
					{
						return(4);
					}
					strncpy(out, repb, n);
					out += n;
				}
				esc = 1;
				repb = rep + 1;
				continue;
			}
		}
		/*
		** copy any bytes left
		*/
		n = rep - repb;
		if(n)
		{
			if(out + n >= badout)
			{
				return(4);
			}
			strncpy(out, repb, n);
			out += n;
		}
		/*
		** need global repeat?
		*/
		if(!g)
			break;
	}
	/* finish off the last bit */
	if(out + strlen(in) >= badout)
		return(4);
	strcpy(out, in);
	return(0);
}



/*
** Convert the string after the :E=
** into a null terminated re input string and
** a null terminated replacementstring.
** The format of the string after the =
** is E='re'rep' where the ' char can be any
** character. re is the input string and rep
** the replacement.
** Missing delimiters are silently ignored.
** Running out of space is also silently ignored.
*/


#define RESIZE 80
static int
re_substitute(instring, out, outlen, sub, sublen)
char *instring;
char *out;
int outlen;
char *sub;
int sublen;
{
	char *end;
	char *current;
	char regstr[RESIZE];
	char repstr[RESIZE];
	char delim;
	int gflag = 0;
	/*
	** The first thing to do is split the sublen string
	** into a null terminated regex expression string,
	** and a null terminated replacement string.
	**
	** if we find an error return non-zero, zero
	** means sucess.
	*/
	current = sub;
	end = sub + sublen;


	/*
	** The first char in the string is the regex
	** delimiter character.
	*/
	delim = *current;


	/*
	** now search for a non-escaped delim.
	*/
	current++;
	if((current = findelim(regstr, RESIZE, current, end, delim)) == 0)
	{
		return(1);
	}


	/*
	** now find the replacement string.
	*/
	current++;


	if((current = findelim(repstr, RESIZE, current, end, delim)) == NULL)
	{
		return(2);
	}
	if((end - current == 2) && (end[-1] == 'g'))
	{
		gflag = 1;
	}
	return(re_dosub(instring, out, outlen, regstr, repstr,
		gflag));
}
#	Change	User	Description
#2	389	Eric Scouten	Submit changes from Paul Haffenden <pjh@unisoft.com> for doing regexp parsing on filename expressions.
#1	388	Eric Scouten	Populate es-jam branch.
//guest/perforce_software/jam/src/expand.c
#1	2	laura	Add Jam/MR 2.2 source