thorgot/lexer/lexer.go
2024-11-13 21:30:28 +01:00

138 lines
2.4 KiB
Go

package lexer
import (
"fmt"
"git.robaertschi.xyz/robaertschi/thorgot/token"
)
type Lexer struct {
input string
ch byte
pos int
readPos int
// Loc
col int
line int
}
func New(input string) Lexer {
lexer := Lexer{input: input}
lexer.line = 1
lexer.readChar()
return lexer
}
func (l Lexer) String() string {
return fmt.Sprintf("Lexer{input: \"%v\", ch: '%c', pos: %v, readPos: %v, col: %v, line: %v}", l.input, l.ch, l.pos, l.readPos, l.col, l.line)
}
func (l *Lexer) readChar() {
if l.readPos >= len(l.input) {
l.ch = 0
} else {
l.ch = l.input[l.readPos]
}
if l.ch == '\n' {
l.col = 0
l.line += 1
}
l.pos = l.readPos
l.readPos += 1
l.col += 1
}
func (l *Lexer) makeToken(t token.TokenType, literal string) token.Token {
return token.Token{Token: t, Literal: literal, Loc: token.Loc{Line: l.line, Col: l.col}}
}
func isDigit(ch byte) bool {
return '0' <= ch && ch <= '9'
}
func isLetter(ch byte) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z'
}
func isValidIdentChar(ch byte) bool {
return ch == '_' || isLetter(ch)
}
func (l *Lexer) skipWhitespace() {
for l.ch == '\r' || l.ch == '\b' || l.ch == '\t' || l.ch == ' ' {
l.readChar()
}
}
func (l *Lexer) readIdentifier() token.Token {
loc := token.Loc{Line: l.line, Col: l.col}
pos := l.pos
l.readChar()
for isDigit(l.ch) || isLetter(l.ch) {
l.readChar()
}
t := token.LookupKeyword(l.input[pos:l.pos])
return token.Token{Token: t, Loc: loc, Literal: l.input[pos:l.pos]}
}
func (l *Lexer) readNumber() token.Token {
pos := l.pos
loc := token.Loc{Line: l.line, Col: l.col}
for isDigit(l.ch) {
l.readChar()
}
return token.Token{Token: token.Integer, Loc: loc, Literal: l.input[pos:l.pos]}
}
func (l *Lexer) NextToken() token.Token {
l.skipWhitespace()
var tok token.Token
tok.Loc = token.Loc{Line: l.line, Col: l.col}
tok.Literal = string(l.ch)
switch l.ch {
case '\n':
tok.Token = token.NewLine
case ';':
tok.Token = token.Semicolon
case ':':
tok.Token = token.Colon
case '=':
tok.Token = token.Equal
case '{':
tok.Token = token.LBrace
case '}':
tok.Token = token.RBrace
case '(':
tok.Token = token.LParen
case ')':
tok.Token = token.RParen
case 0:
return l.makeToken(token.Eof, "")
default:
if isValidIdentChar(l.ch) {
return l.readIdentifier()
} else if isDigit(l.ch) {
return l.readNumber()
}
tok.Token = token.Illegal
}
l.readChar()
return tok
}