/*
// $Id: //guest/paul_dymecki/mondrian/src/main/mondrian/olap/Scanner.java#1 $
// This software is subject to the terms of the Common Public License
// Agreement, available at the following URL:
// http://www.opensource.org/licenses/cpl.html.
// (C) Copyright 1998-2002 Kana Software, Inc. and others.
// All Rights Reserved.
// You must accept the terms of that agreement to use this software.
//
// jhyde, 20 January, 1999
*/
package mondrian.olap;
import java_cup.runtime.*;
import java.util.*;
/**
* Lexical analyzer for MDX.
*/
public class Scanner {
/** single lookahead character */
protected int nextChar;
/** next lookahead character */
private int lookaheadChar = 0;
private Hashtable m_resWordsTable;
private int iMaxResword;
private String m_aResWords[];
protected boolean debug;
private Vector lines; // lines[x] is the start of the x'th line
private int iChar; // number of times advance() has been called
private int iPrevChar; // end of previous token
private int previousSymbol; // previous symbol returned
private boolean inFormula;
Scanner(boolean debug)
{
this.debug = debug;
}
/* Advance input by one character, setting {@link #nextChar}. */
private final void advance()
throws java.io.IOException
{
if (lookaheadChar == 0) {
// We have not called lookahead().
nextChar = getChar();
} else {
// We have called lookahead(); advance to the character it got.
nextChar = lookaheadChar;
lookaheadChar = 0;
}
if (nextChar == '\012') {
lines.addElement(new Integer(iChar));
}
iChar++;
}
/** Peek at the character after {@link #nextChar} without advancing. */
private final int lookahead()
throws java.io.IOException
{
lookaheadChar = getChar();
return lookaheadChar;
}
/** Read a character from input, returning -1 if end of input. */
protected int getChar()
throws java.io.IOException
{
return System.in.read();
}
/** Initialize the scanner */
public void init()
throws java.io.IOException
{
initReswords();
lines = new Vector();
iChar = iPrevChar = 0;
advance();
}
/** Deduce the line and column (0-based) of a symbol. Called by {@link
* Parser#syntax_error}. */
void getLocation(Symbol symbol, int[] loc)
{
int iTarget = symbol.left,
iLine = -1,
iLineEnd = 0,
iLineStart;
do {
iLine++;
iLineStart = iLineEnd;
iLineEnd = Integer.MAX_VALUE;
if (iLine < lines.size())
iLineEnd = ((Integer) lines.elementAt(iLine)).intValue();
} while (iLineEnd < iTarget);
loc[0] = iLine; // line
loc[1] = iTarget - iLineStart; // column
}
private Symbol trace(Symbol s)
{
if (debug) {
String name = null;
if (s.sym < m_aResWords.length)
name = m_aResWords[s.sym];
System.err.println(
"Scanner returns #" + s.sym +
(name == null ? "" : ":" + name) +
(s.value == null ? "" : "(" + s.value.toString() + ")"));
}
return s;
}
private void initResword(int id, String s)
{
m_resWordsTable.put(s, new Integer(id));
if (id > iMaxResword)
iMaxResword = id;
}
private void initReswords()
{
// This list generated by piping the 'terminal' declaration in mdx.cup
// through:
// grep -v // |
// sed -e 's/,//' |
// awk '{printf "initResword(%20s,%c%s%c);",$1,34,$1,34}'
m_resWordsTable = new Hashtable();
iMaxResword = 0;
// initResword(ParserSym.ALL ,"ALL");
initResword(ParserSym.AND ,"AND");
initResword(ParserSym.AS ,"AS");
// initResword(ParserSym.ASC ,"ASC");
// initResword(ParserSym.AXIS ,"AXIS");
// initResword(ParserSym.BACK_COLOR ,"BACK_COLOR");
// initResword(ParserSym.BASC ,"BASC");
// initResword(ParserSym.BDESC ,"BDESC");
initResword(ParserSym.CELL ,"CELL");
// initResword(ParserSym.CELL_ORDINAL ,"CELL_ORDINAL");
// initResword(ParserSym.CHAPTERS ,"CHAPTERS");
// initResword(ParserSym.CHILDREN ,"CHILDREN");
// initResword(ParserSym.COLUMNS ,"COLUMNS");
// initResword(ParserSym.DESC ,"DESC");
initResword(ParserSym.DIMENSION ,"DIMENSION");
initResword(ParserSym.EMPTY ,"EMPTY");
// initResword(ParserSym.FIRSTCHILD ,"FIRSTCHILD");
// initResword(ParserSym.FIRSTSIBLING ,"FIRSTSIBLING");
// initResword(ParserSym.FONT_FLAGS ,"FONT_FLAGS");
// initResword(ParserSym.FONT_NAME ,"FONT_NAME");
// initResword(ParserSym.FONT_SIZE ,"FONT_SIZE");
// initResword(ParserSym.FORE_COLOR ,"FORE_COLOR");
initResword(ParserSym.FORMATTED_VALUE ,"FORMATTED_VALUE");
// initResword(ParserSym.FORMAT_STRING ,"FORMAT_STRING");
initResword(ParserSym.FROM ,"FROM");
// initResword(ParserSym.LAG ,"LAG");
// initResword(ParserSym.LASTCHILD ,"LASTCHILD");
// initResword(ParserSym.LASTSIBLING ,"LASTSIBLING");
// initResword(ParserSym.LEAD ,"LEAD");
initResword(ParserSym.MEMBER ,"MEMBER");
// initResword(ParserSym.MEMBERS ,"MEMBERS");
// initResword(ParserSym.NEXTMEMBER ,"NEXTMEMBER");
initResword(ParserSym.NON ,"NON");
initResword(ParserSym.NOT ,"NOT");
initResword(ParserSym.ON ,"ON");
initResword(ParserSym.OR ,"OR");
// initResword(ParserSym.PAGES ,"PAGES");
// initResword(ParserSym.PARENT ,"PARENT");
// initResword(ParserSym.PREVMEMBER ,"PREVMEMBER");
initResword(ParserSym.PROPERTIES ,"PROPERTIES");
// initResword(ParserSym.RECURSIVE ,"RECURSIVE");
// initResword(ParserSym.ROWS ,"ROWS");
// initResword(ParserSym.SECTIONS ,"SECTIONS");
initResword(ParserSym.SELECT ,"SELECT");
initResword(ParserSym.SET ,"SET");
// initResword(ParserSym.SOLVE_ORDER ,"SOLVE_ORDER");
initResword(ParserSym.VALUE ,"VALUE");
initResword(ParserSym.WHERE ,"WHERE");
initResword(ParserSym.WITH ,"WITH");
initResword(ParserSym.XOR ,"XOR");
m_aResWords = new String[iMaxResword + 1];
Enumeration e = m_resWordsTable.keys();
while (e.hasMoreElements()) {
Object o = e.nextElement();
String s = (String) o;
int i = ((Integer) m_resWordsTable.get(s)).intValue();
m_aResWords[i] = s;
}
}
/** return the name of the reserved word whose token code is "i" */
public String lookupReserved(int i)
{
return m_aResWords[i];
}
private Symbol makeSymbol(int id,Object o)
{
int iPrevPrevChar = iPrevChar;
this.iPrevChar = iChar;
this.previousSymbol = id;
return trace(new Symbol(id, iPrevPrevChar, iChar, o));
}
private Symbol makeNumber(double mantissa, int exponent)
{
double d = mantissa * java.lang.Math.pow(10, exponent);
return makeSymbol(ParserSym.NUMBER, new Double(d));
}
private Symbol makeId(String s, boolean quoted, boolean ampersand)
{
return makeSymbol(
quoted && ampersand ? ParserSym.AMP_QUOTED_ID :
quoted ? ParserSym.QUOTED_ID :
ParserSym.ID,
s);
}
private Symbol makeRes(int i)
{ return makeSymbol(i, m_aResWords[i]); }
private Symbol makeToken(int i, String s)
{ return makeSymbol(i, s); }
private Symbol makeString( String s )
{ return makeSymbol(ParserSym.STRING, s); }
/* recognize and return the next complete token */
public Symbol next_token()
throws java.io.IOException
{
StringBuffer id;
boolean ampersandId = false;
for (;;)
{
switch (nextChar)
{
case '.':
switch (lookahead()) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
// We're looking at the '.' on the start of a number,
// e.g. .1; fall through to parse a number.
break;
default:
advance();
return makeToken(ParserSym.DOT, ".");
}
// fall through
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
// Parse a number. Valid examples include 1, 1.2, 0.1, .1,
// 1e2, 1E2, 1e-2, 1e+2. Invalid examples include e2, 1.2.3,
// 1e2e3, 1e2.3.
//
// Signs preceding numbers (e.g. -1, +1E-5) are valid, but are
// handled by the parser.
final int leftOfPoint = 0;
final int rightOfPoint = 1;
final int inExponent = 2;
int n = 0, nDigits = 0, nSign = 0, exponent = 0;
double mantissa = 0.0;
int state = leftOfPoint;
for (;;) {
if (nextChar == '.') {
if (state == leftOfPoint) {
state = rightOfPoint;
mantissa = n;
n = nDigits = 0;
nSign = 1;
advance();
} else {
// Error: we are seeing a point in the exponent
// (e.g. 1E2.3 or 1.2E3.4) or a second point in the
// mantissa (e.g. 1.2.3). Return what we've got
// and let the parser raise the error.
if (state == rightOfPoint) {
mantissa += (n * java.lang.Math.pow(
10, -nDigits));
} else {
exponent = n * nSign;
}
return makeNumber(mantissa, exponent);
}
} else if (nextChar == 'E' || nextChar == 'e') {
if (state == inExponent) {
// Error: we are seeing an 'e' in the exponent
// (e.g. 1.2e3e4). Return what we've got and let
// the parser raise the error.
exponent = n * nSign;
return makeNumber(mantissa, exponent);
} else {
if (state == leftOfPoint) {
mantissa = n;
} else {
mantissa += (n * java.lang.Math.pow(
10, -nDigits));
}
n = nDigits = 0;
nSign = 1;
advance();
state = inExponent;
}
} else if ((nextChar == '+' || nextChar == '-') &&
state == inExponent &&
nDigits == 0) {
// We're looking at the sign after the 'e'.
nSign = -nSign;
advance();
} else if (nextChar >= '0' && nextChar <= '9') {
n = n * 10 + (nextChar - '0');
nDigits++;
advance();
} else {
// Reached end of number.
if (state == leftOfPoint) {
mantissa = n;
} else if (state == rightOfPoint) {
mantissa += (n * java.lang.Math.pow(10, -nDigits));
} else {
exponent = n * nSign;
}
return makeNumber(mantissa, exponent);
}
}
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
/* parse an identifier */
id = new StringBuffer();
for (;;)
{
id.append((char)nextChar);
advance();
switch (nextChar)
{
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case '_':
break;
default:
String strId = id.toString();
Integer i = (Integer) m_resWordsTable.get(
strId.toUpperCase());
if (i == null) {
// identifier
return makeId(strId, false, false);
} else {
// reserved word
return makeRes(i.intValue());
}
}
}
case '&':
advance();
if (nextChar == '[') {
ampersandId = true;
// fall through
} else {
return makeToken(ParserSym.UNKNOWN, "&");
}
case '[':
/* parse a delimited identifier */
id = new StringBuffer();
for (;;)
{
advance();
switch (nextChar)
{
case ']':
advance();
if (nextChar == ']') {
// ] escaped with ] - leave them as
id.append(']');
id.append(']');
break;
} else {
// end of identifier
if (ampersandId) {
ampersandId = false;
return makeId(id.toString(), true, true);
} else {
return makeId(id.toString(), true, false);
}
}
case -1:
if (ampersandId) {
ampersandId = false;
return makeId(id.toString(), true, true);
} else {
return makeId(id.toString(), true, false);
}
default:
id.append((char)nextChar);
}
}
case ':': advance(); return makeToken(ParserSym.COLON, ":");
case ',': advance(); return makeToken(ParserSym.COMMA, ",");
case '=': advance(); return makeToken(ParserSym.EQ, "=");
case '<':
advance();
switch (nextChar) {
case '>': advance(); return makeToken(ParserSym.NE, "<>");
case '=': advance(); return makeToken(ParserSym.LE, "<=");
default: return makeToken(ParserSym.LT, "<");
}
case '>':
advance();
switch (nextChar) {
case '=': advance(); return makeToken(ParserSym.GE, ">=");
default: return makeToken(ParserSym.GT, ">");
}
case '{': advance(); return makeToken(ParserSym.LBRACE, "{");
case '(': advance(); return makeToken(ParserSym.LPAREN, "(");
case '}': advance(); return makeToken(ParserSym.RBRACE, "}");
case ')': advance(); return makeToken(ParserSym.RPAREN, ")");
case '+': advance(); return makeToken(ParserSym.PLUS, "+");
case '-': advance(); return makeToken(ParserSym.MINUS, "-");
case '*': advance(); return makeToken(ParserSym.ASTERISK, "*");
case '/': advance(); return makeToken(ParserSym.SOLIDUS, "/");
case '|':
advance();
switch (nextChar) {
case '|': advance(); return makeToken(ParserSym.CONCAT, "||");
default: return makeToken(ParserSym.UNKNOWN, "|");
}
case '"':
/* parse a double-quoted string */
id = new StringBuffer();
for (;;)
{
advance();
switch (nextChar)
{
case '"':
advance();
if( nextChar == '"' ){
// " escaped with "
id.append('"');
break;
} else {
// end of string
return makeString(id.toString());
}
case -1:
return makeString(id.toString());
default:
id.append((char)nextChar);
}
}
case '\'':
if (inFormula) {
inFormula = false;
advance();
return makeToken(ParserSym.QUOTE, "'");
} else if (previousSymbol == ParserSym.AS) {
inFormula = true;
advance();
return makeToken(ParserSym.QUOTE, "'");
}
/* parse a single-quoted string */
id = new StringBuffer();
for (;;)
{
advance();
switch (nextChar)
{
case '\'':
advance();
if (nextChar == '\'') {
// " escaped with "
id.append('\'');
break;
} else {
// end of string
return makeString(id.toString());
}
case -1:
return makeString(id.toString());
default:
id.append((char)nextChar);
}
}
case -1:
// we're done
return makeToken(ParserSym.EOF, "EOF");
default:
/* ignore everything else */
iPrevChar = iChar;
advance();
break;
}
}
}
}
// End Scanner.java