diff --git a/go.mod b/go.mod index ad8f9f3..6416106 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ -module robaertschi.xyz/robaertschi/thorgot +module git.robaertschi.xyz/robaertschi/thorgot go 1.23.2 diff --git a/lexer/lexer.go b/lexer/lexer.go index be84f4d..e69052e 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -1,6 +1,8 @@ package lexer -import "robaertschi.xyz/robaertschi/thorgot/token" +import ( + "git.robaertschi.xyz/robaertschi/thorgot/token" +) type Lexer struct { input string @@ -15,6 +17,7 @@ type Lexer struct { func New(input string) Lexer { lexer := Lexer{input: input} + lexer.line = 1 lexer.readChar() @@ -42,13 +45,86 @@ func (l *Lexer) makeToken(t token.TokenType, literal string) token.Token { return token.Token{Token: t, Literal: literal, Loc: token.Loc{Line: l.line, Col: l.col}} } -func (l *Lexer) NextToken() token.Token { - var token token.Token +func isDigit(ch byte) bool { + return '0' <= ch && ch <= '9' +} - switch l.ch { - case 0: +func isLetter(ch byte) bool { + return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' +} +func isValidIdentChar(ch byte) bool { + return ch == '_' || isLetter(ch) +} + +func (l *Lexer) skipWhitespace() { + for l.ch == '\r' || l.ch == '\b' || l.ch == '\t' || l.ch == ' ' { + l.readChar() + } +} + +func (l *Lexer) readIdentifier() token.Token { + loc := token.Loc{Line: l.line, Col: l.col} + pos := l.pos + + l.readChar() + + for isDigit(l.ch) || isLetter(l.ch) { + l.readChar() } - return token + t := token.LookupKeyword(l.input[pos:l.pos]) + + return token.Token{Token: t, Loc: loc, Literal: l.input[pos:l.pos]} +} + +func (l *Lexer) readNumber() token.Token { + pos := l.pos + loc := token.Loc{Line: l.line, Col: l.col} + + for isDigit(l.ch) { + l.readChar() + } + + return token.Token{Token: token.Integer, Loc: loc, Literal: l.input[pos:l.pos]} +} + +func (l *Lexer) NextToken() token.Token { + l.skipWhitespace() + var tok token.Token + tok.Literal = string(l.ch) + + switch l.ch { + case '\n': + tok.Token = token.EndLine + case ';': + tok.Token = token.Semicolon + case ':': + tok.Token = token.Colon + case '=': + tok.Token = token.Equal + case '{': + tok.Token = token.LBrace + case '}': + tok.Token = token.RBrace + case '(': + tok.Token = token.LParen + case ')': + tok.Token = token.RParen + + case 0: + return l.makeToken(token.Eof, "") + + default: + if isValidIdentChar(l.ch) { + return l.readIdentifier() + } else if isDigit(l.ch) { + return l.readNumber() + } + + tok.Token = token.Illegal + } + + l.readChar() + return tok } diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go new file mode 100644 index 0000000..e891289 --- /dev/null +++ b/lexer/lexer_test.go @@ -0,0 +1,49 @@ +package lexer_test + +import ( + "testing" + + "git.robaertschi.xyz/robaertschi/thorgot/lexer" + "git.robaertschi.xyz/robaertschi/thorgot/token" +) + +func TestCorrectTokens(t *testing.T) { + tests := []struct { + expectedTokens []token.Token + input string + }{{ + expectedTokens: []token.Token{{Token: token.Eof, Literal: "", Loc: token.Loc{Line: 1, Col: 1}}}, + input: "", + }, {input: "hello 1234 ; () {}", + expectedTokens: []token.Token{ + {Token: token.Identifier, Literal: "hello", Loc: token.Loc{Line: 1, Col: 1}}, + {Token: token.Integer, Literal: "1234", Loc: token.Loc{Line: 1, Col: 7}}, + {Token: token.Semicolon, Literal: ";", Loc: token.Loc{Line: 1, Col: 12}}, + {Token: token.LParen, Literal: "(", Loc: token.Loc{Line: 1, Col: 14}}, + {Token: token.RParen, Literal: ")", Loc: token.Loc{Line: 1, Col: 15}}, + }}} + + for _, test := range tests { + lexer := lexer.New(test.input) + for _, expected := range test.expectedTokens { + actual := lexer.NextToken() + + if expected.Literal != actual.Literal { + t.Errorf("Literal is not equal: actual = (%v) is not expected = (%v)", actual.Literal, expected.Literal) + } + + if expected.Token != actual.Token { + t.Errorf("Token is not equal: actual = (%v) is not expected = (%v)", actual.Token, expected.Token) + } + + if expected.Loc.Line != actual.Loc.Line { + t.Errorf("Loc Line is not equal: actual = (%v) is not expected = (%v)", actual.Loc.Line, expected.Loc.Line) + } + + if expected.Loc.Col != actual.Loc.Col { + t.Errorf("Loc Col is not equal: actual = (%v) is not expected = (%v)", actual.Loc.Col, expected.Loc.Col) + } + + } + } +} diff --git a/token/token.go b/token/token.go index 073102b..27a5417 100644 --- a/token/token.go +++ b/token/token.go @@ -29,6 +29,8 @@ const ( Identifier = "Identifier" + Integer = "Integer" // 19232 + // Keywords Fn = "Fn" // fn )