/*
* Copyright 1993, 1995 Christopher Seiwald.
*
* This file is part of Jam - see jam.c for Copyright information.
*/
# include "jam.h"
# include "lists.h"
# include "variable.h"
# include "expand.h"
# include "filesys.h"
# include "newstr.h"
# include "regexp.h"
/*
* expand.c - expand a buffer, given variable values
*
* External routines:
*
* var_expand() - variable-expand input string into list of strings
*
* Internal routines:
*
* var_edit() - copy input target name to output, performing : modifiers
* var_mods() - parse : modifiers into FILENAME structure
*
* 01/25/94 (seiwald) - $(X)$(UNDEF) was expanding like plain $(X)
* 04/13/94 (seiwald) - added shorthand L0 for null list pointer
*
* Changed by pjh@unisoft.com to support:
* :X modifier that converts the suffix character into
* the file delimiter.
* :E which applies a regular expression substitution
* on the string.
*/
static void var_edit();
/* changed from void to int */
static int var_mods();
# define MAGIC_COLON '\001'
# define MAGIC_LEFT '\002'
# define MAGIC_RIGHT '\003'
/*
* var_expand() - variable-expand input string into list of strings
*
* Would just copy input to output, performing variable expansion,
* except that since variables can contain multiple values the result
* of variable expansion may contain multiple values (a list). Properly
* performs "product" operations that occur in "$(var1)xxx$(var2)" or
* even "$($(var2))".
*
* Returns a newly created list.
*/
LIST *
var_expand( l, in, end, lol, cancopyin )
LIST *l;
char *in;
char *end;
LOL *lol;
int cancopyin;
{
char out_buf[ MAXSYM ];
char *out = out_buf;
char *inp = in;
char *ov; /* for temp copy of variable in outbuf */
int depth;
if( DEBUG_VAREXP )
printf( "expand '%.*s'\n", end - in, in );
/* This gets alot of cases: $(<) and $(>) */
if( in[0] == '$' && in[1] == '(' && in[3] == ')' && !in[4] )
{
switch( in[2] )
{
case '1':
case '<':
return list_copy( l, lol_get( lol, 0 ) );
case '2':
case '>':
return list_copy( l, lol_get( lol, 1 ) );
}
}
/* Just try simple copy of in to out. */
while( in < end )
if( ( *out++ = *in++ ) == '$' && *in == '(' )
goto expand;
/* No variables expanded - just add copy of input string to list. */
/* Cancopyin is an optimization: if the input was already a list */
/* item, we can use the copystr() to put it on the new list. */
/* Otherwise, we use the slower newstr(). */
*out = '\0';
if( cancopyin )
return list_new( l, copystr( inp ) );
else
return list_new( l, newstr( out_buf ) );
expand:
/*
* Input so far (ignore blanks):
*
* stuff-in-outbuf $(variable) remainder
* ^ ^
* in end
* Output so far:
*
* stuff-in-outbuf $
* ^ ^
* out_buf out
*
*
* We just copied the $ of $(...), so back up one on the output.
* We now find the matching close paren, copying the variable and
* modifiers between the $( and ) temporarily into out_buf, so that
* we can replace :'s with MAGIC_COLON. This is necessary to avoid
* being confused by modifier values that are variables containing
* :'s. Ugly.
*/
depth = 1;
out--, in++;
ov = out;
while( in < end && depth )
{
switch( *ov++ = *in++ )
{
case '(': depth++; break;
case ')': depth--; break;
case ':': ov[-1] = MAGIC_COLON; break;
case '[': ov[-1] = MAGIC_LEFT; break;
case ']': ov[-1] = MAGIC_RIGHT; break;
}
}
/* Copied ) - back up. */
ov--;
/*
* Input so far (ignore blanks):
*
* stuff-in-outbuf $(variable) remainder
* ^ ^
* in end
* Output so far:
*
* stuff-in-outbuf variable
* ^ ^ ^
* out_buf out ov
*
* Later we will overwrite 'variable' in out_buf, but we'll be
* done with it by then. 'variable' may be a multi-element list,
* so may each value for '$(variable element)', and so may 'remainder'.
* Thus we produce a product of three lists.
*/
{
LIST *variables = 0;
LIST *remainder = 0;
LIST *vars;
/* Recursively expand variable name & rest of input */
if( out < ov )
variables = var_expand( L0, out, ov, lol, 0 );
if( in < end )
remainder = var_expand( L0, in, end, lol, 0 );
/* Now produce the result chain */
/* For each variable name */
for( vars = variables; vars; vars = list_next( vars ) )
{
LIST *value;
char *colon;
char *bracket;
char varname[ MAXSYM ];
int i, sub1, sub2;
/* Look for a : modifier in the variable name */
/* Must copy into varname so we can modify it */
strcpy( varname, vars->string );
if( colon = strchr( varname, MAGIC_COLON ) )
*colon = '\0';
if( bracket = strchr( varname, MAGIC_LEFT ) )
{
char *dash;
if( dash = strchr( bracket + 1, '-' ) )
{
*dash = '\0';
sub1 = atoi( bracket + 1 );
sub2 = atoi( dash + 1 );
}
else
{
sub1 = sub2 = atoi( bracket + 1 );
}
*bracket = '\0';
}
else
{
sub1 = sub2 = 0; /* not needed */
}
/* Get variable value, specially handling $(<), $(>), $(n) */
if( varname[0] == '<' && !varname[1] )
{
value = lol_get( lol, 0 );
}
else if( varname[0] == '>' && !varname[1] )
{
value = lol_get( lol, 1 );
}
else if( varname[0] >= '1' && varname[0] <= '9' && !varname[1] )
{
value = lol_get( lol, varname[0] - '1' );
}
else
{
value = var_get( varname );
}
/* The fast path: $(x) - just copy the variable value. */
if( out == out_buf && !bracket && !colon && in == end )
{
l = list_copy( l, value );
continue;
}
/* For each variable value */
for( i = 1; value; i++, value = list_next( value ) )
{
LIST *rem;
char *out1;
/* Skip members not in subscript */
if( bracket && ( i < sub1 || sub2 && i > sub2 ) )
continue;
/* Apply : mods, if present */
if( colon )
var_edit( value->string, colon + 1, out );
else
strcpy( out, value->string );
/* If no remainder, append result to output chain. */
if( in == end )
{
l = list_new( l, newstr( out_buf ) );
continue;
}
/* Remember the end of the variable expansion so */
/* we can just tack on each instance of 'remainder' */
out1 = out + strlen( out );
/* For each remainder, or just once if no remainder, */
/* append the complete string to the output chain */
for( rem = remainder; rem; rem = list_next( rem ) )
{
strcpy( out1, rem->string );
l = list_new( l, newstr( out_buf ) );
}
}
}
/* variables & remainder were gifts from var_expand */
/* and must be freed */
if( variables )
list_free( variables );
if( remainder)
list_free( remainder );
if( DEBUG_VAREXP )
{
printf( "expanded to " );
list_print( l );
printf( "\n" );
}
return l;
}
}
/*
* var_edit() - copy input target name to output, performing : modifiers
*/
/* Structure changed */
typedef struct {
char downshift; /* :L -- downshift result */
char upshift; /* :U -- upshift result */
char dotzap; /* :X -- convert . to / */
char parent; /* :P -- go to parent directory */
char *subs; /* :E -- regular expression */
int subslen;
} VAR_ACTS ;
/* added */
# ifdef unix
# define DELIM '/'
# else
# define DELIM '\\'
# endif
/* added */
static int re_substitute();
static void
var_edit( in, mods, out )
char *in;
char *mods;
char *out;
{
FILENAME old, new;
VAR_ACTS acts;
int fileparsed = 0;
/* Parse apart modifiers, putting them into "new" */
/*
* changed.
* Now returns 1 if a filemode has been applied,
* and 0 if we just want a re substitution.
* This is so a re on a string that ends with a "/"
* will still work. The trailing "/" gets discarded
* otherwise.
*/
if( fileparsed = var_mods( mods, &new, &acts ) )
{
/* Parse apart original filename, putting parts into "old" */
file_parse( in, &old );
/* Replace any old with new */
if( new.f_grist.ptr )
old.f_grist = new.f_grist;
if( new.f_root.ptr )
old.f_root = new.f_root;
if( new.f_dir.ptr )
old.f_dir = new.f_dir;
if( new.f_base.ptr )
old.f_base = new.f_base;
if( new.f_suffix.ptr )
old.f_suffix = new.f_suffix;
if( new.f_member.ptr )
old.f_member = new.f_member;
/* If requested, modify old to point to parent */
if( acts.parent )
file_parent( &old );
}
/*
* if we have a regex, then copy
* the file into some tempspace first
* This is a change to the original file.
*/
if( acts.subs )
{
char *re_ptr;
if( fileparsed )
{
char re_buf[ MAXSYM ];
file_build( &old, re_buf, 0);
re_ptr = re_buf;
}
else
{
/*
* just doing a re substitution.
*/
re_ptr = in;
}
/*
* now call the re code to apply the
* result into out.
*/
if( re_substitute( re_ptr, out, MAXSYM, acts.subs, acts.subslen ) )
{
/*
* Non-zero means it failed, so copy stuff
* anyway.
*/
strcpy( out, re_ptr );
}
}
else
{
if( fileparsed )
{
/* Put filename back together */
file_build( &old, out, 0 );
}
else
{
/*
* This can only happen if
* one of the modifiers is
* not recognised.
*/
strcpy(out, in);
}
}
/* Handle upshifting, downshifting now */
if( acts.upshift )
{
for( ; *out; ++out )
*out = toupper( *out );
}
else if( acts.downshift )
{
for( ; *out; ++out )
*out = tolower( *out );
}
/* Change to original file */
else if ( acts.dotzap )
{
for( ; *out; ++out )
if (*out == '.') *out = DELIM ;
}
}
/*
* var_mods() - parse : modifiers into FILENAME structure
*
* The : modifiers in a $(varname:modifier) currently support replacing
* or omitting elements of a filename, and so they are parsed into a
* FILENAME structure (which contains pointers into the original string).
*
* Modifiers of the form "X=value" replace the component X with
* the given value. Modifiers without the "=value" cause everything
* but the component X to be omitted. X is one of:
*
* G <grist>
* D directory name
* B base name
* S .suffix
* M (member)
* R root directory - prepended to whole path
* X change the suffix into the delimiter.
* E apply a regular expression.
*
* This routine sets:
*
* f->f_xxx.ptr = 0
* f->f_xxx.len = 0
* -> leave the original component xxx
*
* f->f_xxx.ptr = string
* f->f_xxx.len = strlen( string )
* -> replace component xxx with string
*
* f->f_xxx.ptr = ""
* f->f_xxx.len = 0
* -> omit component xxx
*
* var_edit() above and file_build() obligingly follow this convention.
*/
/* routine changed */
static int
var_mods( mods, f, acts )
char *mods;
FILENAME *f;
VAR_ACTS *acts;
{
char *flags = "GRDBSM";
int filemodseen = 0;
int havezeroed = 0;
memset( (char *)f, 0, sizeof( *f ) );
memset( (char *)acts, 0, sizeof( *acts ) );
while( *mods )
{
char *fl;
struct filepart *fp;
/* First take care of :U or :L (upshift, downshift) */
if( *mods == 'L' )
{
acts->downshift = 1;
filemodseen = 1;
++mods;
continue;
}
else if( *mods == 'U' )
{
acts->upshift = 1;
filemodseen = 1;
++mods;
continue;
}
else if( *mods == 'P' )
{
acts->parent = 1;
filemodseen = 1;
++mods;
continue;
}
else if( *mods == 'X' )
{
/* changed */
acts->dotzap = 1;
filemodseen = 1;
++mods;
continue;
}
if( *mods == 'E' )
{
/*
* added.
* Mark the fact that we have a re
* by setting fp to null.
*/
fp = NULL;
mods++;
}
else
{
/* Now handle the file component flags */
if( !( fl = strchr( flags, *mods++ ) ) )
break; /* should complain, but so what... */
/* changed */
filemodseen = 1;
fp = &f->part[ fl - flags ];
}
if( *mods++ != '=' )
{
/* :X - turn everything but X off */
int i;
mods--;
/* changed */
if( fp == NULL )
continue;
if( !havezeroed++ )
for( i = 0; i < 6; i++ )
{
f->part[ i ].len = 0;
f->part[ i ].ptr = "";
}
fp->ptr = 0;
}
else
{
/* :X=value - set X to value */
char *p;
if( p = strchr( mods, MAGIC_COLON ) )
{
/* changed */
if( fp )
{
fp->ptr = mods;
fp->len = p - mods;
}
else
{
acts->subs = mods;
acts->subslen = p - mods;
}
mods = p + 1;
}
else
{
/* changed */
int len = strlen(mods);
if(fp)
{
fp->ptr = mods;
fp->len = len;
}
else
{
acts->subs = mods;
acts->subslen = len;
}
mods += len;
}
}
}
/* changed */
return( filemodseen );
}
/*
** Find the next unescaped delim char, and copy
** that string to 'out'
** If we find the delim, they return where we got to
** (pointing at it), otherwise return 0 to indicate failure.
*/
static char *
findelim(out, outlen, start, end, delim)
char *out;
int outlen;
char *start;
char *end;
int delim;
{
int esc = 0;
char *badout = out + outlen;
while((start < end) && (out < badout))
{
if(*start == '\\')
{
if(start + 1 < end)
{
if(*(start + 1) == delim)
{
start++;
}
}
}
else
{
if(*start == delim)
{
*out = 0;
return(start);
}
}
*out++ = *start++;
}
return(0);
}
/*
** Perform the re matching and substitution.
** Output the result to 'out'.
** The re string is 'regstr'
** The replacment string is 'regrep'
** If g is true then repeat the replacement, a
** global substitute.
**/
static int
re_dosub(in, out, olen, regstr, repstr, g)
char *in;
char *out;
int olen;
char *regstr;
char *repstr;
int g;
{
regexp *re;
char *rep;
int n;
char *repb;
char *badout;
static char *saveregstr = 0;
static regexp *savere = 0;
badout = out + olen;
/*
** do we have a saved re ?
** If so see if the string is the same.
*/
if(saveregstr && (strcmp(saveregstr, regstr) == 0))
{
re = savere;
}
else
{
/*
** have new string, compile it
*/
if((re = regcomp(regstr)) == NULL)
{
printf("regcomp failed\n");
return(3);
}
/*
** the regcomp worked, so store the result.
*/
if(saveregstr)
{
freestr(saveregstr);
free(savere);
}
saveregstr = newstr(regstr);
savere = re;
}
while(regexec(re, in))
{
int esc = 0;
n = re->startp[0] - in;
/*
** If we don't have enough room to copy
** out the result, then fail quietly.
*/
if(out + n >= badout)
{
return(4);
}
strncpy(out, in, n);
/* move out to where to write next */
out += n;
/* move in to where to match next */
in = re->endp[0];
/*
** now output the replacement string.
*/
for(repb = rep = repstr;*rep;rep++)
{
int ch;
ch = *rep;
if(esc)
{
esc = 0;
/*
** Look for sub expression matches,
** like \1 or \2.
*/
if(ch >= '0' && ch <= '9')
{
int x = ch - '0';
n = re->endp[x] - re->startp[x];
if(out + n >= badout)
{
return(4);
}
strncpy(out, re->startp[x],n);
out += n;
repb = rep + 1; /* skip this char */
}
continue;
}
/*
** whole pattern replacement.
*/
if(ch == '&')
{
n = rep - repb;
if(n)
{
if(out + n >= badout)
{
return(4);
}
strncpy(out, repb, n);
out+=n;
}
n = re->endp[0] - re->startp[0];
if(out + n >= badout)
{
return(4);
}
strncpy(out, re->startp[0],n);
out += n;
repb = rep + 1;
continue;
}
if(ch == '\\')
{
n = rep - repb;
if(n)
{
if(out + n >= badout)
{
return(4);
}
strncpy(out, repb, n);
out += n;
}
esc = 1;
repb = rep + 1;
continue;
}
}
/*
** copy any bytes left
*/
n = rep - repb;
if(n)
{
if(out + n >= badout)
{
return(4);
}
strncpy(out, repb, n);
out += n;
}
/*
** need global repeat?
*/
if(!g)
break;
}
/* finish off the last bit */
if(out + strlen(in) >= badout)
return(4);
strcpy(out, in);
return(0);
}
/*
** Convert the string after the :E=
** into a null terminated re input string and
** a null terminated replacementstring.
** The format of the string after the =
** is E='re'rep' where the ' char can be any
** character. re is the input string and rep
** the replacement.
** Missing delimiters are silently ignored.
** Running out of space is also silently ignored.
*/
#define RESIZE 80
static int
re_substitute(instring, out, outlen, sub, sublen)
char *instring;
char *out;
int outlen;
char *sub;
int sublen;
{
char *end;
char *current;
char regstr[RESIZE];
char repstr[RESIZE];
char delim;
int gflag = 0;
/*
** The first thing to do is split the sublen string
** into a null terminated regex expression string,
** and a null terminated replacement string.
**
** if we find an error return non-zero, zero
** means sucess.
*/
current = sub;
end = sub + sublen;
/*
** The first char in the string is the regex
** delimiter character.
*/
delim = *current;
/*
** now search for a non-escaped delim.
*/
current++;
if((current = findelim(regstr, RESIZE, current, end, delim)) == 0)
{
return(1);
}
/*
** now find the replacement string.
*/
current++;
if((current = findelim(repstr, RESIZE, current, end, delim)) == NULL)
{
return(2);
}
if((end - current == 2) && (end[-1] == 'g'))
{
gflag = 1;
}
return(re_dosub(instring, out, outlen, regstr, repstr,
gflag));
}