|
|
@@ -0,0 +1,293 @@ |
|
|
|
package org.rubenruiz.lox; |
|
|
|
|
|
|
|
import java.util.ArrayList; |
|
|
|
import java.util.HashMap; |
|
|
|
import java.util.List; |
|
|
|
import java.util.Map; |
|
|
|
|
|
|
|
import static org.rubenruiz.lox.TokenType.*; |
|
|
|
|
|
|
|
public class Scanner { |
|
|
|
|
|
|
|
// This represents the inputted lox code to parse |
|
|
|
private final String source; |
|
|
|
private final List<Token> tokens = new ArrayList<>(); |
|
|
|
|
|
|
|
// Flow management |
|
|
|
private int start = 0; |
|
|
|
private int current = 0; |
|
|
|
private int line = 1; |
|
|
|
|
|
|
|
// Identifier identifier, this contains a list of |
|
|
|
// reserved words in lox |
|
|
|
private static final Map<String, TokenType> keywords; |
|
|
|
|
|
|
|
static { |
|
|
|
keywords = new HashMap<>(); |
|
|
|
keywords.put("and", AND); |
|
|
|
keywords.put("class", CLASS); |
|
|
|
keywords.put("else", ELSE); |
|
|
|
keywords.put("false", FALSE); |
|
|
|
keywords.put("for", FOR); |
|
|
|
keywords.put("fun", FUN); |
|
|
|
keywords.put("if", IF); |
|
|
|
keywords.put("nil", NIL); |
|
|
|
keywords.put("or", OR); |
|
|
|
keywords.put("print", PRINT); |
|
|
|
keywords.put("return", RETURN); |
|
|
|
keywords.put("super", SUPER); |
|
|
|
keywords.put("this", THIS); |
|
|
|
keywords.put("true", TRUE); |
|
|
|
keywords.put("var", VAR); |
|
|
|
keywords.put("while", WHILE); |
|
|
|
} |
|
|
|
|
|
|
|
public Scanner(String incommingSource) { |
|
|
|
source = incommingSource; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Our runner, this one runs the scanToken |
|
|
|
* method until the file is empty and dresses it |
|
|
|
* to be returned in a nice List for handling elsewhere |
|
|
|
* @return a List containing the tokens it was able to ascertain from the input string |
|
|
|
*/ |
|
|
|
public List<Token> scanTokens() { |
|
|
|
while (!isAtEnd()) { |
|
|
|
start = current; |
|
|
|
scanToken(); |
|
|
|
} |
|
|
|
|
|
|
|
tokens.add(new Token(EOF, "", null, line)); |
|
|
|
return tokens; |
|
|
|
} |
|
|
|
|
|
|
|
public void scanToken() { |
|
|
|
char c = advance(); |
|
|
|
// This converts the incoming char to a token and adds it to the list |
|
|
|
// We started with the single character tokens as they are the easiest |
|
|
|
// Then, we moved on to the potentially double character ones |
|
|
|
// Then the whitespace characters were taken care of |
|
|
|
// Then to the more complicated stuff |
|
|
|
// String literals |
|
|
|
// Some Reserved words |
|
|
|
// Number literals (done in the default section for easier detection |
|
|
|
// without having to specify every numeral) |
|
|
|
// Finally, a catchall that throws an error if it doesn't recognize |
|
|
|
// the symbol |
|
|
|
switch (c){ |
|
|
|
case '(': addToken(LEFT_PAREN); break; |
|
|
|
case ')': addToken(RIGHT_PAREN); break; |
|
|
|
case '{': addToken(LEFT_BRACE); break; |
|
|
|
case '}': addToken(RIGHT_BRACE); break; |
|
|
|
case ',': addToken(COMMA); break; |
|
|
|
case '.': addToken(DOT); break; |
|
|
|
case '-': addToken(MINUS); break; |
|
|
|
case '+': addToken(PLUS); break; |
|
|
|
case ';': addToken(SEMICOLON); break; |
|
|
|
case '*': addToken(STAR); break; |
|
|
|
case '!': addToken(match('=') ? BANG_EQUAL : BANG); break; |
|
|
|
case '=': addToken(match('=') ? EQUAL_EQUAL : EQUAL); break; |
|
|
|
case '<': addToken(match('=') ? LESS_EQUAL : LESS); break; |
|
|
|
case '>': addToken(match('=') ? GREATER_EQUAL : GREATER); break; |
|
|
|
// A little bit of care has to go into handling the '/' as it |
|
|
|
// can either represent a division operand or be the first in |
|
|
|
// a comment, this code peeks ahead to see if there is another |
|
|
|
// '/' coming up and will skip the line if it sees it |
|
|
|
case '/': |
|
|
|
if (match('/')) { |
|
|
|
// A comment goes until the end of the line. |
|
|
|
while (peek() != '\n' && !isAtEnd()) advance(); |
|
|
|
} else { |
|
|
|
addToken(SLASH); |
|
|
|
} |
|
|
|
break; |
|
|
|
case ' ': |
|
|
|
case '\r': |
|
|
|
case '\t': |
|
|
|
// Ignore whitespace. |
|
|
|
break; |
|
|
|
// Move on to the next line when hit with a newline char |
|
|
|
case '\n': line++; break; |
|
|
|
case '\"': string(); break; |
|
|
|
// Single characters that don't fit the above schemes are tossed out and an error is thrown |
|
|
|
default: |
|
|
|
// Check if it is a numeral first and if not, then throw the error |
|
|
|
// Then check if it is an identifier and if not, then throw the error |
|
|
|
if (isDigit(c)) { |
|
|
|
number(); |
|
|
|
} |
|
|
|
else if (isAlpha(c)){ |
|
|
|
identifier(); |
|
|
|
} |
|
|
|
else { |
|
|
|
Lox.error(line, "Unexpected character."); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* String handling, it runs until it finds the second quote or it reaches the |
|
|
|
* end and then outputs the String as a STRING token with the full, potentially |
|
|
|
* multiline string |
|
|
|
*/ |
|
|
|
private void string() { |
|
|
|
while (peek() != '\"' && !isAtEnd()) { |
|
|
|
if (peek() == '\n') line++; |
|
|
|
advance(); |
|
|
|
} |
|
|
|
|
|
|
|
//if it reaches the end, then it didn't find the closing |
|
|
|
// '"' character and the string is unterminated |
|
|
|
if (isAtEnd()) { |
|
|
|
Lox.error(line, "Unterminated String."); |
|
|
|
return; |
|
|
|
} |
|
|
|
|
|
|
|
//This IS the closing '"' |
|
|
|
advance(); |
|
|
|
|
|
|
|
//Remove the quotes from around it |
|
|
|
String stringLiteral = source.substring(start + 1, current - 1); |
|
|
|
addToken(STRING, stringLiteral); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* number literal handling, this runs until it runs into a non-digit |
|
|
|
* character. if it is a decimal, it keeps on running |
|
|
|
* else it adds the number literal to the list |
|
|
|
*/ |
|
|
|
private void number() { |
|
|
|
while (isDigit(peek())) advance(); |
|
|
|
|
|
|
|
// Look for a fractional part. |
|
|
|
if (peek() == '.' && isDigit(peekNext())) { |
|
|
|
// Consume the "." |
|
|
|
advance(); |
|
|
|
|
|
|
|
while (isDigit(peek())) advance(); |
|
|
|
} |
|
|
|
|
|
|
|
addToken(NUMBER, Double.parseDouble(source.substring(start, current))); |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* converts the incoming letters to a valid identifier |
|
|
|
*/ |
|
|
|
private void identifier() { |
|
|
|
while (isAlphaNumeric(peek())) advance(); |
|
|
|
|
|
|
|
// See if the identifier is a reserved word. |
|
|
|
String text = source.substring(start, current); |
|
|
|
|
|
|
|
TokenType type = keywords.get(text); |
|
|
|
if (type == null) type = IDENTIFIER; |
|
|
|
addToken(type); |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Looks for a match one character ahead |
|
|
|
* @param expected the character than match() is looking for |
|
|
|
* @return true if expected == source.charAt(current); false if expected is anything else |
|
|
|
*/ |
|
|
|
private boolean match(char expected) { |
|
|
|
if (isAtEnd()) return false; |
|
|
|
if (source.charAt(current) != expected) return false; |
|
|
|
|
|
|
|
current++; |
|
|
|
return true; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Works just like @see advance() except it doesn't iterate the counter vars |
|
|
|
* so that you can take a quick look ahead without forgetting where you are |
|
|
|
* @return the next character in source |
|
|
|
*/ |
|
|
|
private char peek() { |
|
|
|
if (isAtEnd()) return '\0'; |
|
|
|
return source.charAt(current); |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* This method is primarily designed with the number literal in mind |
|
|
|
* it checks one character beyond @see peek() to check if there is a |
|
|
|
* number after the decimal |
|
|
|
* @return the character after the next character in source |
|
|
|
*/ |
|
|
|
private char peekNext() { |
|
|
|
if (current + 1 >= source.length()) return '\0'; |
|
|
|
return source.charAt(current + 1); |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* checks to see if the inputted character is a digit |
|
|
|
* @param potentialDigit incoming value to be tested |
|
|
|
* @return true if is a char between '0' and '9' (inclusive); false if otherwise |
|
|
|
*/ |
|
|
|
private boolean isDigit(char potentialDigit) { |
|
|
|
return potentialDigit >= '0' && potentialDigit <= '9'; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Checks if the character fits this regex -> [a-zA-Z_] |
|
|
|
* @param potentialAlpha the character to check |
|
|
|
* @return true if it matches the above regex, false otherwise |
|
|
|
*/ |
|
|
|
private boolean isAlpha(char potentialAlpha) { |
|
|
|
return (potentialAlpha >= 'a' && potentialAlpha <= 'z') || |
|
|
|
(potentialAlpha >= 'A' && potentialAlpha <= 'Z') || |
|
|
|
potentialAlpha == '_'; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Checks if the character is either a valid letter (or _) |
|
|
|
* or if it is a number |
|
|
|
* @param incomingCharacter the character to check |
|
|
|
* @return true if its a number, letter or _; false otherwise |
|
|
|
*/ |
|
|
|
private boolean isAlphaNumeric(char incomingCharacter) { |
|
|
|
return isAlpha(incomingCharacter) || isDigit(incomingCharacter); |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Moves the interpreter to the next character in source |
|
|
|
* @return the next character in source |
|
|
|
*/ |
|
|
|
private char advance() { |
|
|
|
current++; |
|
|
|
return source.charAt(current - 1); |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* is a wrapper method for @see (TokenType type, Object literal); |
|
|
|
* attatches a null literal if the token is not a literal |
|
|
|
* @param type incoming non-literal token |
|
|
|
*/ |
|
|
|
private void addToken(TokenType type) { |
|
|
|
addToken(type, null); |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* creates a token with associated literal, |
|
|
|
* NOTE: The literal field can be null and often is if |
|
|
|
* the token is not a literal |
|
|
|
* @param type incoming token |
|
|
|
* @param literal literal value (if token is a literal) |
|
|
|
*/ |
|
|
|
private void addToken(TokenType type, Object literal) { |
|
|
|
String text = source.substring(start, current); |
|
|
|
tokens.add(new Token(type, text, literal, line)); |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Checks to see if we are at the end of the road |
|
|
|
* @return true if current is at or past the end of the length of the string; |
|
|
|
* false if not |
|
|
|
*/ |
|
|
|
private boolean isAtEnd() { |
|
|
|
return current >= source.length(); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
} |