Browse Source

got token parsing working... easier than I thought, but still a lot of code

master
Github 3 years ago
parent
commit
bbebccc45c
9 changed files with 463 additions and 0 deletions
  1. +5
    -0
      .idea/codeStyles/codeStyleConfig.xml
  2. +6
    -0
      .idea/misc.xml
  3. +8
    -0
      .idea/modules.xml
  4. +6
    -0
      .idea/vcs.xml
  5. +11
    -0
      CraftingInterpreters-TreeWalkInterpreter.iml
  6. +93
    -0
      src/org/rubenruiz/lox/Lox.java
  7. +293
    -0
      src/org/rubenruiz/lox/Scanner.java
  8. +19
    -0
      src/org/rubenruiz/lox/Token.java
  9. +22
    -0
      src/org/rubenruiz/lox/TokenType.java

+ 5
- 0
.idea/codeStyles/codeStyleConfig.xml View File

@@ -0,0 +1,5 @@
<component name="ProjectCodeStyleConfiguration">
<state>
<option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
</state>
</component>

+ 6
- 0
.idea/misc.xml View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" languageLevel="JDK_13" default="true" project-jdk-name="13" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>

+ 8
- 0
.idea/modules.xml View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/CraftingInterpreters-TreeWalkInterpreter.iml" filepath="$PROJECT_DIR$/CraftingInterpreters-TreeWalkInterpreter.iml" />
</modules>
</component>
</project>

+ 6
- 0
.idea/vcs.xml View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

+ 11
- 0
CraftingInterpreters-TreeWalkInterpreter.iml View File

@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

+ 93
- 0
src/org/rubenruiz/lox/Lox.java View File

@@ -0,0 +1,93 @@
package org.rubenruiz.lox;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;

public class Lox {

public static boolean hadError = false;

/**
* This method reads in a file and gets all of the file in byte by byte and sends it to be ran
* @param fileLoc the path to the file
* @throws IOException when the file is not there
*/
private static void runFile(String fileLoc) throws IOException{
byte[] bytes = Files.readAllBytes(Paths.get(fileLoc));
run(new String(bytes, Charset.defaultCharset()));

// If we had an error, exit with an exit code
if(hadError) System.exit(65);
}

/**
* Allows for you to use lox in an interactive mode
* @throws IOException
*/
private static void runPrompt() throws IOException {
InputStreamReader input = new InputStreamReader(System.in);
BufferedReader reader = new BufferedReader(input);

// This will run forever, to exit: use ^C or close the prompt
for (;;) {
System.out.print(">>>\t");
run(reader.readLine());
hadError = false;
}
}

/**
* Runs the code given to it token by token
* @param source incoming lox code to be interpreted
*/
private static void run(String source) {
Scanner scanner = new Scanner(source);
List<Token> tokens = scanner.scanTokens();

for (Token token : tokens) {
System.out.println(token);
}

}

/**
*
* @param line
* @param message
*/
public static void error(int line, String message) {
report(line, "", message);
}

private static void report(int line, String location, String message) {
System.err.println("<ERROR> on line [ " + line + " ] --> " + location + " <-- < " + message + " >" );
hadError = true;
}

/**
* Passes the right code to the right part of the interpreter
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// Improper use of lox would be if you were to give it more than one argument
if (args.length > 1) {
System.out.println("Usage: loxRun [script]");
System.exit(64);
}
// If you give it one argument, it assumes that it is a file
else if (args.length == 1) {
runFile(args[0]);
}
// If given no arguments, it will drop you to an interactive prompt
else {
runPrompt();
}
}

}

+ 293
- 0
src/org/rubenruiz/lox/Scanner.java View File

@@ -0,0 +1,293 @@
package org.rubenruiz.lox;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static org.rubenruiz.lox.TokenType.*;

public class Scanner {

// This represents the inputted lox code to parse
private final String source;
private final List<Token> tokens = new ArrayList<>();

// Flow management
private int start = 0;
private int current = 0;
private int line = 1;

// Identifier identifier, this contains a list of
// reserved words in lox
private static final Map<String, TokenType> keywords;

static {
keywords = new HashMap<>();
keywords.put("and", AND);
keywords.put("class", CLASS);
keywords.put("else", ELSE);
keywords.put("false", FALSE);
keywords.put("for", FOR);
keywords.put("fun", FUN);
keywords.put("if", IF);
keywords.put("nil", NIL);
keywords.put("or", OR);
keywords.put("print", PRINT);
keywords.put("return", RETURN);
keywords.put("super", SUPER);
keywords.put("this", THIS);
keywords.put("true", TRUE);
keywords.put("var", VAR);
keywords.put("while", WHILE);
}

public Scanner(String incommingSource) {
source = incommingSource;
}

/**
* Our runner, this one runs the scanToken
* method until the file is empty and dresses it
* to be returned in a nice List for handling elsewhere
* @return a List containing the tokens it was able to ascertain from the input string
*/
public List<Token> scanTokens() {
while (!isAtEnd()) {
start = current;
scanToken();
}

tokens.add(new Token(EOF, "", null, line));
return tokens;
}

public void scanToken() {
char c = advance();
// This converts the incoming char to a token and adds it to the list
// We started with the single character tokens as they are the easiest
// Then, we moved on to the potentially double character ones
// Then the whitespace characters were taken care of
// Then to the more complicated stuff
// String literals
// Some Reserved words
// Number literals (done in the default section for easier detection
// without having to specify every numeral)
// Finally, a catchall that throws an error if it doesn't recognize
// the symbol
switch (c){
case '(': addToken(LEFT_PAREN); break;
case ')': addToken(RIGHT_PAREN); break;
case '{': addToken(LEFT_BRACE); break;
case '}': addToken(RIGHT_BRACE); break;
case ',': addToken(COMMA); break;
case '.': addToken(DOT); break;
case '-': addToken(MINUS); break;
case '+': addToken(PLUS); break;
case ';': addToken(SEMICOLON); break;
case '*': addToken(STAR); break;
case '!': addToken(match('=') ? BANG_EQUAL : BANG); break;
case '=': addToken(match('=') ? EQUAL_EQUAL : EQUAL); break;
case '<': addToken(match('=') ? LESS_EQUAL : LESS); break;
case '>': addToken(match('=') ? GREATER_EQUAL : GREATER); break;
// A little bit of care has to go into handling the '/' as it
// can either represent a division operand or be the first in
// a comment, this code peeks ahead to see if there is another
// '/' coming up and will skip the line if it sees it
case '/':
if (match('/')) {
// A comment goes until the end of the line.
while (peek() != '\n' && !isAtEnd()) advance();
} else {
addToken(SLASH);
}
break;
case ' ':
case '\r':
case '\t':
// Ignore whitespace.
break;
// Move on to the next line when hit with a newline char
case '\n': line++; break;
case '\"': string(); break;
// Single characters that don't fit the above schemes are tossed out and an error is thrown
default:
// Check if it is a numeral first and if not, then throw the error
// Then check if it is an identifier and if not, then throw the error
if (isDigit(c)) {
number();
}
else if (isAlpha(c)){
identifier();
}
else {
Lox.error(line, "Unexpected character.");
}
}
}

/**
* String handling, it runs until it finds the second quote or it reaches the
* end and then outputs the String as a STRING token with the full, potentially
* multiline string
*/
private void string() {
while (peek() != '\"' && !isAtEnd()) {
if (peek() == '\n') line++;
advance();
}

//if it reaches the end, then it didn't find the closing
// '"' character and the string is unterminated
if (isAtEnd()) {
Lox.error(line, "Unterminated String.");
return;
}

//This IS the closing '"'
advance();

//Remove the quotes from around it
String stringLiteral = source.substring(start + 1, current - 1);
addToken(STRING, stringLiteral);

}

/**
* number literal handling, this runs until it runs into a non-digit
* character. if it is a decimal, it keeps on running
* else it adds the number literal to the list
*/
private void number() {
while (isDigit(peek())) advance();

// Look for a fractional part.
if (peek() == '.' && isDigit(peekNext())) {
// Consume the "."
advance();

while (isDigit(peek())) advance();
}

addToken(NUMBER, Double.parseDouble(source.substring(start, current)));
}

/**
* converts the incoming letters to a valid identifier
*/
private void identifier() {
while (isAlphaNumeric(peek())) advance();

// See if the identifier is a reserved word.
String text = source.substring(start, current);

TokenType type = keywords.get(text);
if (type == null) type = IDENTIFIER;
addToken(type);
}

/**
* Looks for a match one character ahead
* @param expected the character than match() is looking for
* @return true if expected == source.charAt(current); false if expected is anything else
*/
private boolean match(char expected) {
if (isAtEnd()) return false;
if (source.charAt(current) != expected) return false;

current++;
return true;
}

/**
* Works just like @see advance() except it doesn't iterate the counter vars
* so that you can take a quick look ahead without forgetting where you are
* @return the next character in source
*/
private char peek() {
if (isAtEnd()) return '\0';
return source.charAt(current);
}

/**
* This method is primarily designed with the number literal in mind
* it checks one character beyond @see peek() to check if there is a
* number after the decimal
* @return the character after the next character in source
*/
private char peekNext() {
if (current + 1 >= source.length()) return '\0';
return source.charAt(current + 1);
}

/**
* checks to see if the inputted character is a digit
* @param potentialDigit incoming value to be tested
* @return true if is a char between '0' and '9' (inclusive); false if otherwise
*/
private boolean isDigit(char potentialDigit) {
return potentialDigit >= '0' && potentialDigit <= '9';
}

/**
* Checks if the character fits this regex -> [a-zA-Z_]
* @param potentialAlpha the character to check
* @return true if it matches the above regex, false otherwise
*/
private boolean isAlpha(char potentialAlpha) {
return (potentialAlpha >= 'a' && potentialAlpha <= 'z') ||
(potentialAlpha >= 'A' && potentialAlpha <= 'Z') ||
potentialAlpha == '_';
}

/**
* Checks if the character is either a valid letter (or _)
* or if it is a number
* @param incomingCharacter the character to check
* @return true if its a number, letter or _; false otherwise
*/
private boolean isAlphaNumeric(char incomingCharacter) {
return isAlpha(incomingCharacter) || isDigit(incomingCharacter);
}

/**
* Moves the interpreter to the next character in source
* @return the next character in source
*/
private char advance() {
current++;
return source.charAt(current - 1);
}

/**
* is a wrapper method for @see (TokenType type, Object literal);
* attatches a null literal if the token is not a literal
* @param type incoming non-literal token
*/
private void addToken(TokenType type) {
addToken(type, null);
}

/**
* creates a token with associated literal,
* NOTE: The literal field can be null and often is if
* the token is not a literal
* @param type incoming token
* @param literal literal value (if token is a literal)
*/
private void addToken(TokenType type, Object literal) {
String text = source.substring(start, current);
tokens.add(new Token(type, text, literal, line));
}

/**
* Checks to see if we are at the end of the road
* @return true if current is at or past the end of the length of the string;
* false if not
*/
private boolean isAtEnd() {
return current >= source.length();
}


}

+ 19
- 0
src/org/rubenruiz/lox/Token.java View File

@@ -0,0 +1,19 @@
package org.rubenruiz.lox;

class Token {
final TokenType type;
final String lexeme;
final Object literal;
final int line;

Token(TokenType type, String lexeme, Object literal, int line) {
this.type = type;
this.lexeme = lexeme;
this.literal = literal;
this.line = line;
}

public String toString() {
return type + " " + lexeme + " " + literal;
}
}

+ 22
- 0
src/org/rubenruiz/lox/TokenType.java View File

@@ -0,0 +1,22 @@
package org.rubenruiz.lox;

enum TokenType {
// Single-character tokens.
LEFT_PAREN, RIGHT_PAREN, LEFT_BRACE, RIGHT_BRACE,
COMMA, DOT, MINUS, PLUS, SEMICOLON, SLASH, STAR,

// One or two character tokens.
BANG, BANG_EQUAL,
EQUAL, EQUAL_EQUAL,
GREATER, GREATER_EQUAL,
LESS, LESS_EQUAL,

// Literals.
IDENTIFIER, STRING, NUMBER,

// Keywords.
AND, CLASS, ELSE, FALSE, FUN, FOR, IF, NIL, OR,
PRINT, RETURN, SUPER, THIS, TRUE, VAR, WHILE,

EOF
}

Loading…
Cancel
Save