Files
mtmc-web/src/jvmMain/java/mtmc/lang/sea/Token.java

427 lines
12 KiB
Java

package mtmc.lang.sea;
import mtmc.lang.Location;
import org.jetbrains.annotations.NotNull;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
public record Token(
Type type,
@NotNull
String content,
int start,
int end
) implements mtmc.lang.Token {
public static final Token SOF = new Token(Type.SOF, "", 0, 0);
public static final Token EOF = new Token(Type.EOF, "", Integer.MAX_VALUE, Integer.MAX_VALUE);
public enum Type {
// Special
LIT_INT(null),
LIT_STR(null),
LIT_CHAR(null),
LIT_IDENT(null),
KW_TYPEDEF("typedef"),
KW_STRUCT("struct"),
KW_IF("if"),
KW_ELSE("else"),
KW_FOR("for"),
KW_WHILE("while"),
KW_DO("do"),
KW_GOTO("goto"),
KW_CONTINUE("continue"),
KW_BREAK("break"),
KW_RETURN("return"),
KW_SIZEOF("sizeof"),
KW_INT("int"),
KW_CHAR("char"),
KW_VOID("void"),
SOF(null),
EOF(null),
// Groups
LEFT_PAREN("("),
RIGHT_PAREN(")"),
LEFT_BRACKET("["),
RIGHT_BRACKET("]"),
LEFT_BRACE("{"),
RIGHT_BRACE("}"),
// Simple Punct
DOT3("..."),
DOT("."),
SEMICOLON(";"),
COMMA(","),
COLON(":"),
TILDE("~"),
QUESTION("?"),
PLUS2("++"),
PLUS_EQ("+="),
PLUS("+"),
DASH2("--"),
DASH_EQ("-="),
ARROW("->"),
DASH("-"),
STAR_EQ("*="),
STAR("*"),
SLASH_EQ("/="),
SLASH("/"),
PERCENT_EQ("%="),
PERCENT("%"),
AMPERSAND2("&&"),
AMPERSAND_EQ("&="),
AMPERSAND("&"),
BAR2("||"),
BAR_EQ("|="),
BAR("|"),
CARET("^"),
CARET_EQ("^="),
LEFT_ARROW2_EQ("<<="),
LEFT_ARROW2("<<"),
LEFT_ARROW_EQ("<="),
LEFT_ARROW("<"),
RIGHT_ARROW2_EQ(">>="),
RIGHT_ARROW2(">>"),
RIGHT_ARROW_EQ(">="),
RIGHT_ARROW(">"),
EQUAL2("=="),
EQUAL("="),
BANG_EQ("!="),
BANG("!");
public final String lex;
public static final Type[] PUNCT;
static {
List<Type> list = new ArrayList<>();
for (Type t : Type.values()) {
if (t.lex != null) {
list.add(t);
}
}
PUNCT = list.toArray(new Type[0]);
}
public static final Type[] KEYWORDS;
static {
List<Type> list = new ArrayList<>();
for (Type t : Type.values()) {
if (t.name().startsWith("KW_")) {
list.add(t);
}
}
KEYWORDS = list.toArray(new Type[0]);
}
Type(String lex) {
this.lex = lex;
}
}
public static int[] getLineAndOffset(String src, int index) {
int line = 1;
int column = 1;
for (int i = 0; i < index && i < src.length(); i++) {
char c = src.charAt(i);
if (c == '\n') {
line = line + 1;
column = 1;
} else {
column = column + 1;
}
}
return new int[]{line, column};
}
public static String getLineFor(String src, int index) {
int start = 0;
for (int i = Math.min(index, src.length() - 1); i >= 0; i--) {
if (src.charAt(i) == '\n') {
start = i + 1;
break;
}
}
int end = src.length();
for (int i = index; i < src.length(); i++) {
if (src.charAt(i) == '\n') {
break;
}
end = i + 1;
}
return src.substring(start, end);
}
public static String highlight(String src, int start, int end) {
var s = getLineAndOffset(src, start);
var e = getLineAndOffset(src, end);
int lineStart;
if (s[0] != e[0]) {
lineStart = 0;
} else {
lineStart = s[1] - 1;
}
int lineEnd = e[1] - 1;
String line = getLineFor(src, end);
StringBuilder result = new StringBuilder();
int off = 0;
if (lineStart > 10) {
result.append("... ");
off += 4;
result.append(line.substring(lineStart, lineEnd));
} else {
result.append(line.substring(0, lineEnd));
}
result.append('\n');
result.repeat(' ', off + lineStart);
if (start == Integer.MAX_VALUE) {
result.append("^ (at EOL)");
} else {
result.repeat('^', lineEnd - lineStart);
result.append(" (here)");
}
return result.toString();
}
public static List<Token> tokenize(String src) throws TokenizeException {
List<Token> tokens = new ArrayList<>();
int offset = 0;
do {
Token token = tokenizeOne(src, offset);
if (token == null) break;
tokens.add(token);
offset = token.end();
} while (true);
return tokens;
}
private static boolean match(String str, int start, String token) {
if (str == null) return false;
if (str.length() - start < token.length()) return false;
for (int i = 0; i < token.length(); i++) {
char c = str.charAt(start + i);
char d = token.charAt(i);
if (c != d) return false;
}
return true;
}
private static boolean match(String str, int start, char c) {
if (str == null) return false;
if (str.length() - start < Character.charCount(c)) return false;
return str.charAt(start) == c;
}
public static Token tokenizeOne(String src, int offset) throws TokenizeException {
while (offset < src.length()) {
if (Character.isWhitespace(src.charAt(offset))) {
offset += Character.charCount(src.charAt(offset));
} else if (match(src, offset, "//")) {
offset += 2;
while (offset < src.length()) {
char c = src.charAt(offset);
offset += Character.charCount(c);
if (c == '\n') {
break;
}
}
} else if (match(src, offset, "/*")) {
offset += 2;
while (offset < src.length()) {
if (match(src, offset, "*/")) {
offset += 2;
break;
} else {
offset += Character.charCount(src.charAt(offset));
}
}
} else {
break;
}
}
if (offset >= src.length()) return null;
int start = offset;
Type type;
String content = null;
char c = src.charAt(offset);
if (Character.isDigit(c)) {
do {
offset += Character.charCount(src.charAt(offset));
} while (offset < src.length() && Character.isDigit(src.charAt(offset)));
content = src.substring(start, offset);
type = Type.LIT_INT;
} else if (Character.isLetter(c) || c == '_') {
do {
offset += Character.charCount(src.charAt(offset));
} while (offset < src.length() && (Character.isLetter(src.charAt(offset)) || Character.isDigit(src.charAt(offset)) || src.charAt(offset) == '_'));
content = src.substring(start, offset);
type = Type.LIT_IDENT;
for (var ty : Type.KEYWORDS) {
if (content.equals(ty.lex)) {
type = ty;
break;
}
}
} else if (c == '\'') {
offset += Character.charCount(c);
char d = src.charAt(offset);
offset += Character.charCount(d);
if (d == '\\') {
if (offset >= src.length()) throw new TokenizeException("invalid character escape " + d, start, offset);
d = src.charAt(offset);
offset += Character.charCount(d);
content = switch (d) {
case 'n':
yield "\n";
case 'r':
yield "\r";
case 't':
yield "\t";
case '\\':
yield "\\";
case '\'':
yield "'";
case '"':
yield "\"";
case '?':
yield "?";
default:
throw new TokenizeException("invalid character escape " + d, start, offset);
};
} else {
content = String.valueOf(d);
}
if (offset >= src.length() || src.charAt(offset) != '\'') {
throw new TokenizeException("unterminated character literal", start, offset);
}
offset += Character.charCount('\'');
type = Type.LIT_CHAR;
} else if (c == '"') {
offset += Character.charCount(src.charAt(offset));
StringBuilder sb = new StringBuilder();
while (offset < src.length() && src.charAt(offset) != '"') {
char d = src.charAt(offset);
offset += Character.charCount(d);
if (d == '\\') {
d = src.charAt(offset);
offset += Character.charCount(d);
char s = switch (d) {
case 'n':
yield '\n';
case 'r':
yield '\r';
case 't':
yield '\t';
case '\\':
yield '\\';
case '\'':
yield '\'';
case '"':
yield '"';
case '?':
yield '?';
default:
throw new TokenizeException("invalid string escape " + d, start, offset);
};
sb.append(s);
} else if (d == '\n') {
break;
} else {
sb.append(d);
}
}
if (offset >= src.length() || src.charAt(offset) != '"') {
throw new TokenizeException("unterminated string literal", start, offset);
}
content = sb.toString();
offset += Character.charCount('\"');
type = Type.LIT_STR;
} else {
type = null;
for (Type t : Type.PUNCT) {
if (match(src, start, t.lex)) {
type = t;
content = t.lex;
offset += t.lex.length();
break;
}
}
if (type == null) {
throw new TokenizeException("unexpected character '" + src.charAt(start) + "'", start, offset);
}
}
Objects.requireNonNull(content);
return new Token(type, content, start, offset);
}
public static class TokenizeException extends IllegalArgumentException {
public final int start, end;
public TokenizeException(String msg, int start, int end) {
super(msg);
this.start = start;
this.end = end;
}
@Override
public String toString() {
return "TokenizeException at " + start + ":" + end + ", " + getLocalizedMessage();
}
}
@Override
public boolean equals(Object o) {
if (o instanceof String s) return Objects.equals(content, s);
if (!(o instanceof Token token)) return false;
return end == token.end && start == token.start && Objects.equals(content, token.content) && type == token.type;
}
@Override
public int hashCode() {
return Objects.hash(type, content, start, end);
}
@Override
public Location getStart() {
return new Location(start);
}
@Override
public Location getEnd() {
return new Location(end);
}
@Override
public String getContent() {
return content();
}
}