/* * Copyright 1993-2002 Christopher Seiwald and Perforce Software, Inc. * * This file is part of Jam - see jam.c for Copyright information. */ /* * scan.c - the jam yacc scanner * * 12/26/93 (seiwald) - bump buf in yylex to 10240 - yuk. * 09/16/94 (seiwald) - check for overflows, unmatched {}'s, etc. * Also handle tokens abutting EOF by remembering * to return EOF now matter how many times yylex() * reinvokes yyline(). * 02/11/95 (seiwald) - honor only punctuation keywords if SCAN_PUNCT. * 07/27/95 (seiwald) - Include jamgram.h after scan.h, so that YYSTYPE is * defined before Linux's yacc tries to redefine it. * 01/10/01 (seiwald) - \ can now escape any whitespace char * 11/04/02 (seiwald) - const-ing for string literals */ # include "jam.h" # include "lists.h" # include "parse.h" # include "scan.h" # include "jamgram.h" # include "jambase.h" # include "newstr.h" #ifdef GRAPHISOFT_LEXSCAN # include <stdarg.h> /*********************************************************************** separators: "(", ")" "[", "]" "{", "}" ":",";" "!","!=", "?","?=", "&&", "||", "=", " ", "\t", "\n", "\r", EOF todo: + should be too, but C++ is used in jamfile... ***********************************************************************/ char tokenseparator [ 256 ] ; char tokenseparatornext [ 256 ] ; // characters that forms a token if the follow a tokenseparator #endif #ifdef GRAPHISOFT_MPW_FIX #if defined (macintosh) # include "CursorCtl.h" static int totalline = 0; #endif #endif struct keyword { const char *word; int type; } keywords[] = { # include "jamgramtab.h" { 0, 0 } } ; #ifdef GRAPHISOFT_FIX_NONNATIVENEWLINE #define XFILEBUFSIZE 4096 typedef struct XFILE { char buffer [XFILEBUFSIZE]; size_t start; size_t end; FILE* f; } XFILE; static XFILE* xfopen ( const char* file, const char* mode) { FILE* f = fopen (file, mode); if (f == NULL) return NULL; XFILE* result = (XFILE*)(malloc (sizeof (XFILE))); if (result == NULL) { fclose (f); return NULL; } result->f = f; result->start = 0; result->end = 0; return result; } static char* xfgets (char* buf, size_t size, XFILE* f) { size_t remaining = size; while (remaining > 0) { if (f->start == f->end) { /* read to buffer */ f->start = 0; f->end = fread (f->buffer, sizeof(char), XFILEBUFSIZE, f->f); if (f->end == 0) return NULL; /* EOF */ } int i = f->start; int end = (f->end > (f->start+size-1)) ? (f->start+size-1) : f->end ; char* bufd = buf-(f->start)+(size-remaining); char* bufs = f->buffer; while (i < end) { if (bufs[i] == '\n' || bufs[i] == '\r') { bufd[i] = '\n'; bufd[i+1] = 0; const char first = ('\n' == 0x0D) ? '\n' : '\r' ; const char second = ('\n' == 0x0D) ? '\r' : '\n' ; if (first == bufs[i]) { if (i+1 >= f->end) { f->start = 0; f->end = fread (f->buffer, sizeof(char), XFILEBUFSIZE, f->f); i = f->start-1; } if ((i+1 < f->end) && (second == bufs[i+1])) i++; } f->start = i+1; return buf; } else { bufd[i] = bufs[i]; } i++; } remaining -= end-f->start; f->start = end; } return buf; } static void xfclose (XFILE* f) { fclose (f->f); free ((void*)f); } #endif struct include { struct include *next; /* next serial include file */ const char *string; /* pointer into current line */ char **strings; /* for yyfparse() -- text to parse */ #ifdef GRAPHISOFT_FIX_NONNATIVENEWLINE XFILE *file; /* for yyfparse() -- file being read */ #else FILE *file; /* for yyfparse() -- file being read */ #endif const char *fname; /* for yyfparse() -- file name */ int line; /* line counter for error messages */ char buf[ 512 ]; /* for yyfparse() -- line buffer */ } ; static struct include *incp = 0; /* current file; head of chain */ static int scanmode = SCAN_NORMAL; static int anyerrors = 0; static char *symdump( YYSTYPE *s ); #ifdef GRAPHISOFT_LEXSCAN static int lexscanmode = LEXSCANMODE_COMPATIBLE; #endif # define BIGGEST_TOKEN 10240 /* no single token can be larger */ #ifdef GRAPHISOFT_LEXSCAN /* * Set up token separator: */ void initscan (int inlexscanmode) { int i ; for (i = 0; i < 256; i++) { tokenseparator[i] = 0; tokenseparatornext[i] = 0; } tokenseparator[(unsigned char)'['] = 1; tokenseparator[(unsigned char)']'] = 1; tokenseparator[(unsigned char)'('] = 1; tokenseparator[(unsigned char)')'] = 1; tokenseparator[(unsigned char)'{'] = 1; tokenseparator[(unsigned char)'}'] = 1; tokenseparator[(unsigned char)'!'] = 1; /* Note: != should be parsed!!*/ tokenseparator[(unsigned char)'?'] = 1; /* Note: ?= should be parsed!!*/ tokenseparator[(unsigned char)'='] = 1; /* Note: !=, ?= should be parsed!*/ tokenseparator[(unsigned char)'+'] = 3; /* Note: + is not a valid token , but "+=" is!!*/ /* This is problematic.. += shall be a separator, but ++ shouldn't */ tokenseparator[(unsigned char)':'] = 1; tokenseparator[(unsigned char)';'] = 1; tokenseparator[(unsigned char)' '] = 1; tokenseparator[(unsigned char)'\t'] = 1; tokenseparator[(unsigned char)'\n'] = 1; tokenseparator[(unsigned char)'\r'] = 1; tokenseparator[(unsigned char)'<'] = 1; /* Note: <= should be parsed!!*/ tokenseparator[(unsigned char)'>'] = 1; /* Note: >= should be parsed!!*/ tokenseparator[(unsigned char)'&'] = 2; /* Note: & is not a valid token , but "&&" is!!*/ tokenseparator[(unsigned char)'|'] = 2; /* Note: | is not a valid token , but "||" is!!*/ tokenseparatornext[(unsigned char)'?'] = '=' ; tokenseparatornext[(unsigned char)'!'] = '=' ; tokenseparatornext[(unsigned char)'+'] = '=' ; tokenseparatornext[(unsigned char)'<'] = '=' ; tokenseparatornext[(unsigned char)'>'] = '=' ; tokenseparatornext[(unsigned char)'&'] = '&' ; tokenseparatornext[(unsigned char)'|'] = '|' ; lexscanmode = inlexscanmode; } #endif /* * Set parser mode: normal, string, or keyword */ void yymode( int n ) { scanmode = n; } typedef enum { message_warning = 0, message_error = 1 } message_type; #ifdef GRAPHISOFT_JAM #if defined (_MSC_VER) #define JAM_CDECL __cdecl #else #define JAM_CDECL #endif void JAM_CDECL putmessage (message_type type, const char* file, int line, const char* message, ...) { FILE* stream; va_list params; if (type == message_warning) stream = stdout; else stream = stderr; va_start (params, message); #ifdef OS_MAC fprintf (stream, "File '%s' ; line %d \n# ", file, line); #elif defined (NT) fprintf (stream, "%s(%d) : ", file, line); #else fprintf (stream, "%s: line %d: ", file, line); #endif vfprintf (stream, message, params); fprintf (stream, "\n"); va_end (params); } #endif void yyerror( const char *s ) { #ifdef GRAPHISOFT_JAM if (incp) putmessage (message_error, incp->fname, incp->line, "%s at %s", s, symdump( &yylval )); else putmessage (message_error, 0, 0, "%s at %s", s, symdump( &yylval )); #else if( incp ) printf( "%s: line %d: ", incp->fname, incp->line ); printf( "%s at %s\n", s, symdump( &yylval ) ); #endif ++anyerrors; } #ifdef GRAPHISOFT_JAM void yywarning( const char *s ) { putmessage (message_warning, incp->fname, incp->line, "%s at %s", s, symdump( &yylval )); } #endif int yyanyerrors() { return anyerrors != 0; } void yyfparse( const char *s ) { struct include *i = (struct include *)malloc( sizeof( *i ) ); /* Push this onto the incp chain. */ i->string = ""; i->strings = 0; i->file = 0; i->fname = copystr( s ); i->line = 0; i->next = incp; incp = i; /* If the filename is "+", it means use the internal jambase. */ if( !strcmp( s, "+" ) ) i->strings = jambase; } /* * yyline() - read new line and return first character * * Fabricates a continuous stream of characters across include files, * returning EOF at the bitter end. */ int yyline() { struct include *i = incp; if( !incp ) return EOF; #ifdef GRAPHISOFT_MPW_FIX #if defined (macintosh) if (!(++totalline % 1000)) SpinCursor (1); #endif #endif /* Once we start reading from the input stream, we reset the */ /* include insertion point so that the next include file becomes */ /* the head of the list. */ /* If there is more data in this line, return it. */ if( *i->string ) return *i->string++; /* If we're reading from an internal string list, go to the */ /* next string. */ if( i->strings ) { if( !*i->strings ) goto next; i->line++; i->string = *(i->strings++); return *i->string++; } /* If necessary, open the file */ if( !i->file ) { #ifdef GRAPHISOFT_FIX_NONNATIVENEWLINE XFILE *f = NULL; #else FILE *f = stdin; #endif #ifdef GRAPHISOFT_FIX_NONNATIVENEWLINE if( strcmp( i->fname, "-" ) && !( f = xfopen( i->fname, "r" ) ) ) #else if( strcmp( i->fname, "-" ) && !( f = fopen( i->fname, "r" ) ) ) #endif perror( i->fname ); i->file = f; } /* If there's another line in this file, start it. */ #ifdef GRAPHISOFT_FIX_NONNATIVENEWLINE if( i->file && xfgets( i->buf, sizeof( i->buf ), i->file ) ) #else if( i->file && fgets( i->buf, sizeof( i->buf ), i->file ) ) #endif { i->line++; i->string = i->buf; return *i->string++; } next: /* This include is done. */ /* Free it up and return EOF so yyparse() returns to parse_file(). */ incp = i->next; /* Close file, free name */ #ifdef GRAPHISOFT_FIX_NONNATIVENEWLINE if( i->file && i->file != NULL ) xfclose( i->file ); #else if( i->file && i->file != stdin ) fclose( i->file ); #endif freestr( i->fname ); free( (char *)i ); return EOF; } /* * yylex() - set yylval to current token; return its type * * Macros to move things along: * * yychar() - return and advance character; invalid after EOF * yyprev() - back up one character; invalid before yychar() * * yychar() returns a continuous stream of characters, until it hits * the EOF of the current include file. */ # define yychar() ( *incp->string ? *incp->string++ : yyline() ) # define yyprev() ( incp->string-- ) int yylex() { int c; char buf[BIGGEST_TOKEN]; char *b = buf; if( !incp ) goto eof; /* Get first character (whitespace or of token) */ c = yychar(); if( scanmode == SCAN_STRING ) { /* If scanning for a string (action's {}'s), look for the */ /* closing brace. We handle matching braces, if they match! */ int nest = 1; while( c != EOF && b < buf + sizeof( buf ) ) { if( c == '{' ) nest++; if( c == '}' && !--nest ) break; *b++ = c; c = yychar(); } /* We ate the ending brace -- regurgitate it. */ if( c != EOF ) yyprev(); /* Check obvious errors. */ if( b == buf + sizeof( buf ) ) { yyerror( "action block too big" ); goto eof; } if( nest ) { yyerror( "unmatched {} in action block" ); goto eof; } *b = 0; yylval.type = STRING; yylval.string = newstr( buf ); } else { char *b = buf; struct keyword *k; int inquote = 0; #ifdef GRAPHISOFT_LEXSCAN int invariable = 0; /* If !=0 we are inside a $(..) variable evaluation expression. */ int wasdollar = 0; /* Last charater was a '$' */ int istokenseparator = 0; #endif int notkeyword; /* Eat white space */ for( ;; ) { /* Skip past white space */ while( c != EOF && isspace( c ) ) c = yychar(); /* Not a comment? Swallow up comment line. */ if( c != '#' ) break; while( ( c = yychar() ) != EOF && c != '\n' && c != '\r' ) ; } /* c now points to the first character of a token. */ if( c == EOF ) goto eof; /* While scanning the word, disqualify it for (expensive) */ /* keyword lookup when we can: $anything, "anything", \anything */ notkeyword = c == '$'; #ifdef GRAPHISOFT_LEXSCAN /* See if this is a single char token from tokenseparator */ if ((lexscanmode != LEXSCANMODE_OLD) && (c != EOF && tokenseparator[c])) { char oldc = c; *b++ = c; c = yychar(); if (c == tokenseparatornext[oldc]) { /* parse tokens staring with a tokensaparator: ?=, <=, >=, &&, || */ *b++ = c; c = yychar(); } if (lexscanmode == LEXSCANMODE_COMPATIBLE) { if (c != EOF && !isspace (c) && tokenseparator[oldc] != 3) { if( incp ) putmessage (message_warning, incp->fname, incp->line, "Use space before: %c, or put quotes around the string", *(b-1)); } } } else { #endif /* look for white space to delimit word */ /* "'s get stripped but preserve white space */ /* \ protects next character */ while( c != EOF && b < buf + sizeof( buf ) && ( inquote || !istokenseparator ) ) { int isdollar = 0; if( c == '"' ) { /* begin or end " */ inquote = !inquote; notkeyword = 1; } else if( c != '\\' ) { if (!inquote) { if (c == '$') { isdollar = 1; } else if (c == '(') { if (wasdollar && !invariable) { invariable = 1; } else if (invariable) { invariable++; } } else if (c == ')' && invariable) { invariable--; } } /* normal char */ *b++ = c; } else if( ( c = yychar()) != EOF ) { /* \c */ *b++ = c; notkeyword = 1; } else { /* \EOF */ break; } wasdollar = isdollar; c = yychar(); istokenseparator = (c == EOF) || (!invariable && !inquote && !(wasdollar && c == '(') && tokenseparator[c] && tokenseparator[c] != 3); /* incompatiblity in new and old lexscanmode */ if (!inquote && istokenseparator && !isspace (c)) { if (lexscanmode == LEXSCANMODE_COMPATIBLE) { if( incp ) putmessage (message_warning, incp->fname, incp->line, "Use space before: %c, or put quotes around the string", c); } if (lexscanmode == LEXSCANMODE_COMPATIBLE || lexscanmode == LEXSCANMODE_OLD) { istokenseparator = 0; } } } #ifdef GRAPHISOFT_LEXSCAN } #endif /* Check obvious errors. */ if( b == buf + sizeof( buf ) ) { yyerror( "string too big" ); goto eof; } if( inquote ) { yyerror( "unmatched \" in string" ); goto eof; } /* We looked ahead a character - back up. */ if( c != EOF ) yyprev(); /* scan token table */ /* don't scan if it's obviously not a keyword or if its */ /* an alphabetic when were looking for punctuation */ *b = 0; yylval.type = ARG; if( !notkeyword && !( isalpha( *buf ) && scanmode == SCAN_PUNCT ) ) { for( k = keywords; k->word; k++ ) if( *buf == *k->word && !strcmp( k->word, buf ) ) { yylval.type = k->type; yylval.string = k->word; /* used by symdump */ break; } } if( yylval.type == ARG ) yylval.string = newstr( buf ); } if( DEBUG_SCAN ) printf( "scan %s\n", symdump( &yylval ) ); return yylval.type; eof: yylval.type = EOF; return yylval.type; } static char * symdump( YYSTYPE *s ) { static char buf[ BIGGEST_TOKEN + 20 ]; switch( s->type ) { case EOF: sprintf( buf, "EOF" ); break; case 0: sprintf( buf, "unknown symbol %s", s->string ); break; case ARG: sprintf( buf, "argument %s", s->string ); break; case STRING: sprintf( buf, "string \"%s\"", s->string ); break; default: sprintf( buf, "keyword %s", s->string ); break; } return buf; }
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#9 | 2985 | Miklos Fazekas | Scan.c bugfix | ||
#8 | 2983 | Miklos Fazekas | Fixed error in handling cr/lf problem | ||
#7 | 2642 | Miklos Fazekas | Sync to 2.5rc2 | ||
#6 | 2579 | Miklos Fazekas | GSJam to 2.5rc1 integration | ||
#5 | 2578 | Miklos Fazekas | Integrate new lexical scanner code to GSJam | ||
#4 | 2539 | Miklos Fazekas | Updated sources | ||
#3 | 2519 | Miklos Fazekas | Sync to 2.5rc1 | ||
#2 | 1395 | Miklos Fazekas | Merge with main jam | ||
#1 | 1212 | Miklos Fazekas | Created a Jam branch | ||
//guest/perforce_software/jam/src/scan.c | |||||
#2 | 486 | Perforce staff |
Jam 2.3. See RELNOTES for a list of changes from 2.2.x. Just about every source file was touched when jam got ANSI-fied. |
||
#1 | 2 | laura | Add Jam/MR 2.2 source |