lexer impl and tests

2024-11-13 19:49:12 +01:00 · 2024-11-13 19:49:12 +01:00 · c57c38bc81
commit c57c38bc81
parent 47784287bb
4 changed files with 134 additions and 7 deletions
--- a/go.mod
+++ b/go.mod
@ -1,3 +1,3 @@
-module robaertschi.xyz/robaertschi/thorgot
+module git.robaertschi.xyz/robaertschi/thorgot
 go 1.23.2
--- a/lexer/lexer.go
+++ b/lexer/lexer.go
@ -1,6 +1,8 @@
 package lexer
-import "robaertschi.xyz/robaertschi/thorgot/token"
+import (
 	"git.robaertschi.xyz/robaertschi/thorgot/token"
 )
 type Lexer struct {
 	input   string
@ -15,6 +17,7 @@ type Lexer struct {
 func New(input string) Lexer {
 	lexer := Lexer{input: input}
 	lexer.line = 1
 	lexer.readChar()
@ -42,13 +45,86 @@ func (l *Lexer) makeToken(t token.TokenType, literal string) token.Token {
 	return token.Token{Token: t, Literal: literal, Loc: token.Loc{Line: l.line, Col: l.col}}
 }
-func (l *Lexer) NextToken() token.Token {
+func isDigit(ch byte) bool {
-	var token token.Token
+	return '0' <= ch && ch <= '9'
 }
-	switch l.ch {
+func isLetter(ch byte) bool {
-	case 0:
+	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z'
 }
 func isValidIdentChar(ch byte) bool {
 	return ch == '_' || isLetter(ch)
 }
 func (l *Lexer) skipWhitespace() {
 	for l.ch == '\r' || l.ch == '\b' || l.ch == '\t' || l.ch == ' ' {
 		l.readChar()
 	}
 }
 func (l *Lexer) readIdentifier() token.Token {
 	loc := token.Loc{Line: l.line, Col: l.col}
 	pos := l.pos
 	l.readChar()
 	for isDigit(l.ch) || isLetter(l.ch) {
 		l.readChar()
 	}
-	return token
+	t := token.LookupKeyword(l.input[pos:l.pos])
 	return token.Token{Token: t, Loc: loc, Literal: l.input[pos:l.pos]}
 }
 func (l *Lexer) readNumber() token.Token {
 	pos := l.pos
 	loc := token.Loc{Line: l.line, Col: l.col}
 	for isDigit(l.ch) {
 		l.readChar()
 	}
 	return token.Token{Token: token.Integer, Loc: loc, Literal: l.input[pos:l.pos]}
 }
 func (l *Lexer) NextToken() token.Token {
 	l.skipWhitespace()
 	var tok token.Token
 	tok.Literal = string(l.ch)
 	switch l.ch {
 	case '\n':
 		tok.Token = token.EndLine
 	case ';':
 		tok.Token = token.Semicolon
 	case ':':
 		tok.Token = token.Colon
 	case '=':
 		tok.Token = token.Equal
 	case '{':
 		tok.Token = token.LBrace
 	case '}':
 		tok.Token = token.RBrace
 	case '(':
 		tok.Token = token.LParen
 	case ')':
 		tok.Token = token.RParen
 	case 0:
 		return l.makeToken(token.Eof, "")
 	default:
 		if isValidIdentChar(l.ch) {
 			return l.readIdentifier()
 		} else if isDigit(l.ch) {
 			return l.readNumber()
 		}
 		tok.Token = token.Illegal
 	}
 	l.readChar()
 	return tok
 }
--- a/lexer/lexer_test.go
+++ b/lexer/lexer_test.go
@ -0,0 +1,49 @@
 package lexer_test
 import (
 	"testing"
 	"git.robaertschi.xyz/robaertschi/thorgot/lexer"
 	"git.robaertschi.xyz/robaertschi/thorgot/token"
 )
 func TestCorrectTokens(t *testing.T) {
 	tests := []struct {
 		expectedTokens []token.Token
 		input          string
 	}{{
 		expectedTokens: []token.Token{{Token: token.Eof, Literal: "", Loc: token.Loc{Line: 1, Col: 1}}},
 		input:          "",
 	}, {input: "hello 1234 ; () {}",
 		expectedTokens: []token.Token{
 			{Token: token.Identifier, Literal: "hello", Loc: token.Loc{Line: 1, Col: 1}},
 			{Token: token.Integer, Literal: "1234", Loc: token.Loc{Line: 1, Col: 7}},
 			{Token: token.Semicolon, Literal: ";", Loc: token.Loc{Line: 1, Col: 12}},
 			{Token: token.LParen, Literal: "(", Loc: token.Loc{Line: 1, Col: 14}},
 			{Token: token.RParen, Literal: ")", Loc: token.Loc{Line: 1, Col: 15}},
 		}}}
 	for _, test := range tests {
 		lexer := lexer.New(test.input)
 		for _, expected := range test.expectedTokens {
 			actual := lexer.NextToken()
 			if expected.Literal != actual.Literal {
 				t.Errorf("Literal is not equal: actual = (%v) is not expected = (%v)", actual.Literal, expected.Literal)
 			}
 			if expected.Token != actual.Token {
 				t.Errorf("Token is not equal: actual = (%v) is not expected = (%v)", actual.Token, expected.Token)
 			}
 			if expected.Loc.Line != actual.Loc.Line {
 				t.Errorf("Loc Line is not equal: actual = (%v) is not expected = (%v)", actual.Loc.Line, expected.Loc.Line)
 			}
 			if expected.Loc.Col != actual.Loc.Col {
 				t.Errorf("Loc Col is not equal: actual = (%v) is not expected = (%v)", actual.Loc.Col, expected.Loc.Col)
 			}
 		}
 	}
 }
--- a/token/token.go
+++ b/token/token.go
@ -29,6 +29,8 @@ const (
 	Identifier = "Identifier"
 	Integer = "Integer" // 19232
 	// Keywords
 	Fn = "Fn" // fn
 )
`@ -1,3 +1,3 @@`
	`module robaertschi.xyz/robaertschi/thorgot`	`module git.robaertschi.xyz/robaertschi/thorgot`

	`go 1.23.2`	`go 1.23.2`