package journal.reader; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.text.ParseException; public class Tokenizer { private BufferedReader reader; private StringReader lineReader = null; private int line = 0; private int column = 0; private boolean atLeftMargin = true; private Token lastToken = null; public Tokenizer(Reader reader) { this.reader = new BufferedReader(reader); } // returns next token or NULL if EOL reached // throws ParseException if error state reached public Token nextToken() throws ParseException, IOException { // if previous token was pushed, return that if (lastToken != null) { Token returnToken = lastToken; lastToken = null; return returnToken; } // read first char // if @, ignore @, keep reading till next @ or EOL (=> error) // otherwise, needs to be number or hex (otherwise => error) // Token token = new Token(column, line, atLeftMargin); while (true) { int ch = readChar(); if (ch == -1) { return null; } else if (ch == '@') { return eatStringToken(token); } else if (isHex(ch) || ch == '-') { return eatNumberToken(token, ch); } else if (ch == '\n') { token.setAtLeftMargin(true); continue; // not a token, just end of line. Keep looking } else { throw new ParseException("Could not interpret character " + ch + "(" + (char) ch + ")", line); } } } public void pushToken(Token token) { lastToken = token; } private Token eatNumberToken(Token token, int ch) throws IOException, ParseException { StringBuffer buffer = new StringBuffer(); // add this digit to the token // keep reading until non-digit is reached Token.Type type = Token.Type.INTEGER_TOKEN; do { buffer.append((char)ch); if (isHexChar(ch)) type = Token.Type.HEX_TOKEN; ch = readChar(); } while (isHex(ch)); verifyWhitespaceOrEndOfLine(ch); token.setValue(buffer.toString(), type); return token; } private Token eatStringToken(Token token) throws IOException, ParseException { int ch; // read and add to token until closing '@' is reached // or end of file, which is an error StringBuffer buffer = new StringBuffer(); while (true) { ch = readChar(); while (ch != '@') { if (ch == -1) { throw new ParseException("Unexpected end of file encountered", line); } buffer.append((char) ch); ch = readChar(); } ch = readChar(); // this removes the whitespace after the closing '@' - or checks for @@ if (ch == '@') { buffer.append('@'); // special case, @ is encoded as @@ } else { verifyWhitespaceOrEndOfLine(ch); token.setValue(buffer.toString(), token.isAtLeftMargin() ? Token.Type.START_TOKEN : Token.Type.STRING_TOKEN); return token; } } } private void verifyWhitespaceOrEndOfLine(int ch) throws ParseException { if (!Character.isWhitespace(ch) && ch != -1) { throw new ParseException("Not a whitespace as expected : " + ch + " '" + (char) ch + "'", line); } } private boolean isHex(int ch) { return Character.isDigit(ch) || isHexChar(ch); } private boolean isHexChar(int ch) { return ch == 'A' || ch == 'B' || ch == 'C' || ch == 'D' || ch == 'E' || ch == 'F'; } private int readChar() throws IOException { if (lineReader == null) { if (nextLine()) return -1; } int ch = lineReader.read(); atLeftMargin = false; if (ch == -1) { // end reached if (nextLine()) return -1; ch = '\n'; } column++; return ch; } // return true if end-of-file reached // otherwise false private boolean nextLine() throws IOException { String aLine = reader.readLine(); if (aLine == null) { return true; } lineReader = new StringReader(aLine); ++line; column = 0; atLeftMargin = true; return false; } private static void testTokenizer(String test) { Reader reader = new StringReader(test); Tokenizer tokenizer = new Tokenizer(reader); Token token; try { token = tokenizer.nextToken(); while (token != null) { System.out.println("Found : " + token); token = tokenizer.nextToken(); } } catch (ParseException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { // Testing methods // for now only command line, later special tests, maybe throw TestUnit if (args.length == 0) { testTokenizer("@pv@ 4 @db.have@ 12345 "); testTokenizer("123 3245 456 7 "); testTokenizer("@a blank in here does not matter@\n"); testTokenizer("@so\nwhat\nif\nthere\nare\nreturns@\n"); testTokenizer("@pv@ 7 @db.working@ @@@ex@@@ 1 @ha ha ha@\n"); testTokenizer("@ex@ 1\n@vv@ @db.counter@ @journal@ 2FD005066B82F8949B0E8ADEA6582C74 1\n"); } else { try { Tokenizer tokenizer = new Tokenizer(new FileReader(args[0])); Token token = tokenizer.nextToken(); while (token != null) { System.out.println("Found :" + token); token = tokenizer.nextToken(); } } catch (Exception e) { e.printStackTrace(); } } } public int getColumn() { return column; } public int getLine() { return line; } }
# | Change | User | Description | Committed | |
---|---|---|---|---|---|
#2 | 7374 | Sven Erik Knop | Rename/move file(s) - correct location for Eclipse project | ||
#1 | 6467 | Sven Erik Knop |
Added JournalReader, a Java library of useful tools to read and process checkpoints and journals. Added are a readme.txt to explain some details, and a jar file that contains the compiled class files. The programs will need Java 1.6 to run. |