138 lines
2.4 KiB
Go
138 lines
2.4 KiB
Go
package lexer
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"git.robaertschi.xyz/robaertschi/thorgot/token"
|
|
)
|
|
|
|
type Lexer struct {
|
|
input string
|
|
ch byte
|
|
pos int
|
|
readPos int
|
|
|
|
// Loc
|
|
col int
|
|
line int
|
|
}
|
|
|
|
func New(input string) Lexer {
|
|
lexer := Lexer{input: input}
|
|
lexer.line = 1
|
|
|
|
lexer.readChar()
|
|
|
|
return lexer
|
|
}
|
|
|
|
func (l Lexer) String() string {
|
|
return fmt.Sprintf("Lexer{input: \"%v\", ch: '%c', pos: %v, readPos: %v, col: %v, line: %v}", l.input, l.ch, l.pos, l.readPos, l.col, l.line)
|
|
}
|
|
|
|
func (l *Lexer) readChar() {
|
|
if l.readPos >= len(l.input) {
|
|
l.ch = 0
|
|
} else {
|
|
l.ch = l.input[l.readPos]
|
|
}
|
|
|
|
if l.ch == '\n' {
|
|
l.col = 0
|
|
l.line += 1
|
|
}
|
|
|
|
l.pos = l.readPos
|
|
l.readPos += 1
|
|
l.col += 1
|
|
}
|
|
|
|
func (l *Lexer) makeToken(t token.TokenType, literal string) token.Token {
|
|
return token.Token{Token: t, Literal: literal, Loc: token.Loc{Line: l.line, Col: l.col}}
|
|
}
|
|
|
|
func isDigit(ch byte) bool {
|
|
return '0' <= ch && ch <= '9'
|
|
}
|
|
|
|
func isLetter(ch byte) bool {
|
|
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z'
|
|
}
|
|
|
|
func isValidIdentChar(ch byte) bool {
|
|
return ch == '_' || isLetter(ch)
|
|
}
|
|
|
|
func (l *Lexer) skipWhitespace() {
|
|
for l.ch == '\r' || l.ch == '\b' || l.ch == '\t' || l.ch == ' ' {
|
|
l.readChar()
|
|
}
|
|
}
|
|
|
|
func (l *Lexer) readIdentifier() token.Token {
|
|
loc := token.Loc{Line: l.line, Col: l.col}
|
|
pos := l.pos
|
|
|
|
l.readChar()
|
|
|
|
for isDigit(l.ch) || isLetter(l.ch) {
|
|
l.readChar()
|
|
}
|
|
|
|
t := token.LookupKeyword(l.input[pos:l.pos])
|
|
|
|
return token.Token{Token: t, Loc: loc, Literal: l.input[pos:l.pos]}
|
|
}
|
|
|
|
func (l *Lexer) readNumber() token.Token {
|
|
pos := l.pos
|
|
loc := token.Loc{Line: l.line, Col: l.col}
|
|
|
|
for isDigit(l.ch) {
|
|
l.readChar()
|
|
}
|
|
|
|
return token.Token{Token: token.Integer, Loc: loc, Literal: l.input[pos:l.pos]}
|
|
}
|
|
|
|
func (l *Lexer) NextToken() token.Token {
|
|
l.skipWhitespace()
|
|
var tok token.Token
|
|
tok.Loc = token.Loc{Line: l.line, Col: l.col}
|
|
tok.Literal = string(l.ch)
|
|
|
|
switch l.ch {
|
|
case '\n':
|
|
tok.Token = token.NewLine
|
|
case ';':
|
|
tok.Token = token.Semicolon
|
|
case ':':
|
|
tok.Token = token.Colon
|
|
case '=':
|
|
tok.Token = token.Equal
|
|
case '{':
|
|
tok.Token = token.LBrace
|
|
case '}':
|
|
tok.Token = token.RBrace
|
|
case '(':
|
|
tok.Token = token.LParen
|
|
case ')':
|
|
tok.Token = token.RParen
|
|
|
|
case 0:
|
|
return l.makeToken(token.Eof, "")
|
|
|
|
default:
|
|
if isValidIdentChar(l.ch) {
|
|
return l.readIdentifier()
|
|
} else if isDigit(l.ch) {
|
|
return l.readNumber()
|
|
}
|
|
|
|
tok.Token = token.Illegal
|
|
}
|
|
|
|
l.readChar()
|
|
return tok
|
|
}
|