package lexer import ( "fmt" "iter" "unicode" "unicode/utf8" "robaertschi.xyz/robaertschi/tt/token" ) type ErrorCallback func(token.Loc, string, ...any) type Lexer struct { input string position int readPosition int ch rune linePosition int lineCount int errors int errorCallback ErrorCallback file string } func New(input string, file string) (*Lexer, error) { l := &Lexer{input: input, file: file} if err := l.readChar(); err != nil { return nil, err } return l, nil } func (l *Lexer) Iter() iter.Seq[token.Token] { return func(yield func(token.Token) bool) { for { if !yield(l.NextToken()) { return } } } } func (l *Lexer) WithErrorCallback(errorCallback ErrorCallback) { l.errorCallback = errorCallback } func (l *Lexer) loc() token.Loc { return token.Loc{ Line: l.lineCount, Col: l.position - l.linePosition, Pos: l.position, File: l.file, } } func (l *Lexer) NextToken() token.Token { l.skipWhitespace() var tok token.Token tok.Loc = l.loc() switch l.ch { case ';': tok = l.newToken(token.Semicolon) case '=': if l.peekByte() == '=' { pos := l.position l.readChar() l.readChar() tok.Type = token.DoubleEqual tok.Literal = l.input[pos:l.position] return tok } tok = l.newToken(token.Equal) case '<': if l.peekByte() == '=' { pos := l.position l.readChar() l.readChar() tok.Type = token.LessThanEqual tok.Literal = l.input[pos:l.position] return tok } tok = l.newToken(token.LessThan) case '>': if l.peekByte() == '=' { pos := l.position l.readChar() l.readChar() tok.Type = token.GreaterThanEqual tok.Literal = l.input[pos:l.position] return tok } tok = l.newToken(token.GreaterThan) case '(': tok = l.newToken(token.OpenParen) case ')': tok = l.newToken(token.CloseParen) case '+': tok = l.newToken(token.Plus) case '-': tok = l.newToken(token.Minus) case '*': tok = l.newToken(token.Asterisk) case '/': tok = l.newToken(token.Slash) case '{': tok = l.newToken(token.OpenBrack) case '}': tok = l.newToken(token.CloseBrack) case '!': if l.peekByte() == '=' { pos := l.position l.readChar() l.readChar() tok.Type = token.NotEqual tok.Literal = l.input[pos:l.position] return tok } tok = l.newToken(token.Illegal) case -1: tok.Literal = "" tok.Type = token.Eof default: if isNumber(l.ch) { tok.Literal = l.readInteger() tok.Type = token.Int return tok } else if unicode.IsLetter(l.ch) { tok.Literal = l.readIdentifier() tok.Type = token.LookupKeyword(tok.Literal) return tok } else { if l.errorCallback != nil { l.errorCallback(tok.Loc, "Unknown character %r", l.ch) } tok = l.newToken(token.Illegal) } } if err := l.readChar(); err != nil { if l.errorCallback != nil { l.errorCallback(tok.Loc, "%v", err.Error()) } } return tok } func (l *Lexer) newToken(t token.TokenType) token.Token { return token.Token{ Type: t, Literal: string(l.ch), Loc: l.loc(), } } func (l *Lexer) readChar() (err error) { if l.readPosition < len(l.input) { l.position = l.readPosition if l.ch == '\n' { l.linePosition = l.position l.lineCount += 1 } r, w := utf8.DecodeRuneInString(l.input[l.readPosition:]) if r == utf8.RuneError && w == 1 { err = fmt.Errorf("Found illegal UTF-8 encoding") } else if r == '\uFEFF' && l.position > 0 { err = fmt.Errorf("Found illegal BOM") } l.readPosition += w l.ch = r } else { l.position = len(l.input) if l.ch == '\n' { l.linePosition = l.position l.lineCount += 1 } l.ch = -1 } return } func (l *Lexer) peekByte() byte { if l.readPosition < len(l.input) { return l.input[l.readPosition] } else { return 0 } } func (l *Lexer) readIdentifier() string { startPos := l.position for unicode.IsLetter(l.ch) || isNumber(l.ch) || l.ch == '_' { l.readChar() } return l.input[startPos:l.position] } func (l *Lexer) readInteger() string { startPos := l.position for isNumber(l.ch) { l.readChar() } return l.input[startPos:l.position] } func isNumber(ch rune) bool { return '0' <= ch && ch <= '9' } func (l *Lexer) skipWhitespace() { for unicode.IsSpace(l.ch) { l.readChar() } } func (l *Lexer) error(loc token.Loc, format string, args ...any) { if l.errorCallback != nil { l.errorCallback(loc, format, args...) } l.errors += 1 }