lexer impl and tests

This commit is contained in:
Robin Bärtschi 2024-11-13 19:49:12 +01:00
parent 47784287bb
commit c57c38bc81
4 changed files with 134 additions and 7 deletions

2
go.mod
View File

@ -1,3 +1,3 @@
module robaertschi.xyz/robaertschi/thorgot module git.robaertschi.xyz/robaertschi/thorgot
go 1.23.2 go 1.23.2

View File

@ -1,6 +1,8 @@
package lexer package lexer
import "robaertschi.xyz/robaertschi/thorgot/token" import (
"git.robaertschi.xyz/robaertschi/thorgot/token"
)
type Lexer struct { type Lexer struct {
input string input string
@ -15,6 +17,7 @@ type Lexer struct {
func New(input string) Lexer { func New(input string) Lexer {
lexer := Lexer{input: input} lexer := Lexer{input: input}
lexer.line = 1
lexer.readChar() lexer.readChar()
@ -42,13 +45,86 @@ func (l *Lexer) makeToken(t token.TokenType, literal string) token.Token {
return token.Token{Token: t, Literal: literal, Loc: token.Loc{Line: l.line, Col: l.col}} return token.Token{Token: t, Literal: literal, Loc: token.Loc{Line: l.line, Col: l.col}}
} }
func (l *Lexer) NextToken() token.Token { func isDigit(ch byte) bool {
var token token.Token return '0' <= ch && ch <= '9'
}
switch l.ch { func isLetter(ch byte) bool {
case 0: return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z'
}
func isValidIdentChar(ch byte) bool {
return ch == '_' || isLetter(ch)
}
func (l *Lexer) skipWhitespace() {
for l.ch == '\r' || l.ch == '\b' || l.ch == '\t' || l.ch == ' ' {
l.readChar()
}
}
func (l *Lexer) readIdentifier() token.Token {
loc := token.Loc{Line: l.line, Col: l.col}
pos := l.pos
l.readChar()
for isDigit(l.ch) || isLetter(l.ch) {
l.readChar()
} }
return token t := token.LookupKeyword(l.input[pos:l.pos])
return token.Token{Token: t, Loc: loc, Literal: l.input[pos:l.pos]}
}
func (l *Lexer) readNumber() token.Token {
pos := l.pos
loc := token.Loc{Line: l.line, Col: l.col}
for isDigit(l.ch) {
l.readChar()
}
return token.Token{Token: token.Integer, Loc: loc, Literal: l.input[pos:l.pos]}
}
func (l *Lexer) NextToken() token.Token {
l.skipWhitespace()
var tok token.Token
tok.Literal = string(l.ch)
switch l.ch {
case '\n':
tok.Token = token.EndLine
case ';':
tok.Token = token.Semicolon
case ':':
tok.Token = token.Colon
case '=':
tok.Token = token.Equal
case '{':
tok.Token = token.LBrace
case '}':
tok.Token = token.RBrace
case '(':
tok.Token = token.LParen
case ')':
tok.Token = token.RParen
case 0:
return l.makeToken(token.Eof, "")
default:
if isValidIdentChar(l.ch) {
return l.readIdentifier()
} else if isDigit(l.ch) {
return l.readNumber()
}
tok.Token = token.Illegal
}
l.readChar()
return tok
} }

49
lexer/lexer_test.go Normal file
View File

@ -0,0 +1,49 @@
package lexer_test
import (
"testing"
"git.robaertschi.xyz/robaertschi/thorgot/lexer"
"git.robaertschi.xyz/robaertschi/thorgot/token"
)
func TestCorrectTokens(t *testing.T) {
tests := []struct {
expectedTokens []token.Token
input string
}{{
expectedTokens: []token.Token{{Token: token.Eof, Literal: "", Loc: token.Loc{Line: 1, Col: 1}}},
input: "",
}, {input: "hello 1234 ; () {}",
expectedTokens: []token.Token{
{Token: token.Identifier, Literal: "hello", Loc: token.Loc{Line: 1, Col: 1}},
{Token: token.Integer, Literal: "1234", Loc: token.Loc{Line: 1, Col: 7}},
{Token: token.Semicolon, Literal: ";", Loc: token.Loc{Line: 1, Col: 12}},
{Token: token.LParen, Literal: "(", Loc: token.Loc{Line: 1, Col: 14}},
{Token: token.RParen, Literal: ")", Loc: token.Loc{Line: 1, Col: 15}},
}}}
for _, test := range tests {
lexer := lexer.New(test.input)
for _, expected := range test.expectedTokens {
actual := lexer.NextToken()
if expected.Literal != actual.Literal {
t.Errorf("Literal is not equal: actual = (%v) is not expected = (%v)", actual.Literal, expected.Literal)
}
if expected.Token != actual.Token {
t.Errorf("Token is not equal: actual = (%v) is not expected = (%v)", actual.Token, expected.Token)
}
if expected.Loc.Line != actual.Loc.Line {
t.Errorf("Loc Line is not equal: actual = (%v) is not expected = (%v)", actual.Loc.Line, expected.Loc.Line)
}
if expected.Loc.Col != actual.Loc.Col {
t.Errorf("Loc Col is not equal: actual = (%v) is not expected = (%v)", actual.Loc.Col, expected.Loc.Col)
}
}
}
}

View File

@ -29,6 +29,8 @@ const (
Identifier = "Identifier" Identifier = "Identifier"
Integer = "Integer" // 19232
// Keywords // Keywords
Fn = "Fn" // fn Fn = "Fn" // fn
) )