commit 6f9d64b2bf55c914cd3732b405a6af2dce85b2e9 Author: Robin Date: Sun Jan 19 21:45:07 2025 +0100 Initial Commit diff --git a/ast/ast.go b/ast/ast.go new file mode 100644 index 0000000..d823278 --- /dev/null +++ b/ast/ast.go @@ -0,0 +1,75 @@ +package ast + +import ( + "fmt" + "strings" + + "robaertschi.xyz/robaertschi/tt/token" +) + +type Node interface { + TokenLiteral() string + String() string +} + +type Declaration interface { + Node + declarationNode() +} + +type Expression interface { + Node + expressionNode() +} + +type Program struct { + Declarations []Declaration +} + +func (p *Program) TokenLiteral() string { + if len(p.Declarations) > 0 { + return p.Declarations[0].TokenLiteral() + } + return "" +} + +func (p *Program) String() string { + var builder strings.Builder + + for _, decl := range p.Declarations { + builder.WriteString(decl.String()) + builder.WriteRune('\n') + } + + return builder.String() +} + +type FunctionDeclaration struct { + Token token.Token // The token.FN + Body Expression + Name string +} + +func (fd *FunctionDeclaration) declarationNode() {} +func (fd *FunctionDeclaration) TokenLiteral() string { return fd.Token.Literal } +func (fd *FunctionDeclaration) String() string { + return fmt.Sprintf("fn %v() = %v;", fd.Name, fd.Body.String()) +} + +// Represents a Expression that we failed to parse +type ErrorExpression struct { + InvalidToken token.Token +} + +func (e *ErrorExpression) expressionNode() {} +func (e *ErrorExpression) TokenLiteral() string { return e.InvalidToken.Literal } +func (e *ErrorExpression) String() string { return "" } + +type IntegerExpression struct { + Token token.Token // The token.INT + Value int64 +} + +func (ie *IntegerExpression) expressionNode() {} +func (ie *IntegerExpression) TokenLiteral() string { return ie.Token.Literal } +func (ie *IntegerExpression) String() string { return ie.Token.Literal } diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..04dcae2 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module robaertschi.xyz/robaertschi/tt + +go 1.23.4 diff --git a/lexer/lexer.go b/lexer/lexer.go new file mode 100644 index 0000000..a0c6687 --- /dev/null +++ b/lexer/lexer.go @@ -0,0 +1,171 @@ +package lexer + +import ( + "fmt" + "iter" + "unicode" + "unicode/utf8" + + "robaertschi.xyz/robaertschi/tt/token" +) + +type ErrorCallback func(token.Loc, string, ...any) + +type Lexer struct { + input string + position int + readPosition int + ch rune + + linePosition int + lineCount int + + errors int + errorCallback ErrorCallback + + file string +} + +func New(input string, file string) (*Lexer, error) { + l := &Lexer{input: input, file: file} + if err := l.readChar(); err != nil { + return nil, err + } + return l, nil +} + +func (l *Lexer) Iter() iter.Seq[token.Token] { + return func(yield func(token.Token) bool) { + for { + if !yield(l.NextToken()) { + return + } + } + } +} + +func (l *Lexer) WithErrorCallback(errorCallback ErrorCallback) { + l.errorCallback = errorCallback +} + +func (l *Lexer) loc() token.Loc { + return token.Loc{ + Line: l.lineCount, + Col: l.position - l.linePosition, + Pos: l.position, + File: l.file, + } +} + +func (l *Lexer) NextToken() token.Token { + l.skipWhitespace() + var tok token.Token + tok.Loc = l.loc() + + switch l.ch { + case ';': + tok = l.newToken(token.SEMICOLON) + case '=': + tok = l.newToken(token.EQUAL) + case '(': + tok = l.newToken(token.OPEN_PAREN) + case ')': + tok = l.newToken(token.CLOSE_PAREN) + case -1: + tok.Literal = "" + tok.Type = token.EOF + default: + if isNumber(l.ch) { + tok.Literal = l.readInteger() + tok.Type = token.INT + return tok + } else if unicode.IsLetter(l.ch) { + tok.Literal = l.readIdentifier() + tok.Type = token.LookupKeyword(tok.Literal) + return tok + } else { + if l.errorCallback != nil { + l.errorCallback(tok.Loc, "Unknown character %r", l.ch) + } + tok = l.newToken(token.ILLEGAL) + } + } + if err := l.readChar(); err != nil { + if l.errorCallback != nil { + l.errorCallback(tok.Loc, "%v", err.Error()) + } + } + return tok +} + +func (l *Lexer) newToken(t token.TokenType) token.Token { + return token.Token{ + Type: t, + Literal: string(l.ch), + Loc: l.loc(), + } +} + +func (l *Lexer) readChar() (err error) { + if l.readPosition < len(l.input) { + l.position = l.readPosition + if l.ch == '\n' { + l.linePosition = l.position + l.lineCount += 1 + } + r, w := utf8.DecodeRuneInString(l.input[l.readPosition:]) + if r == utf8.RuneError && w == 1 { + err = fmt.Errorf("Found illegal UTF-8 encoding") + } else if r == '\uFEFF' && l.position > 0 { + err = fmt.Errorf("Found illegal BOM") + } + l.readPosition += w + l.ch = r + } else { + l.position = len(l.input) + if l.ch == '\n' { + l.linePosition = l.position + l.lineCount += 1 + } + l.ch = -1 + } + return +} + +func (l *Lexer) readIdentifier() string { + startPos := l.position + + for unicode.IsLetter(l.ch) || isNumber(l.ch) || l.ch == '_' { + l.readChar() + } + + return l.input[startPos:l.position] +} + +func (l *Lexer) readInteger() string { + startPos := l.position + + for isNumber(l.ch) { + l.readChar() + } + + return l.input[startPos:l.position] +} + +func isNumber(ch rune) bool { + return '0' <= ch && ch <= '9' +} + +func (l *Lexer) skipWhitespace() { + for unicode.IsSpace(l.ch) { + l.readChar() + } +} + +func (l *Lexer) error(loc token.Loc, format string, args ...any) { + if l.errorCallback != nil { + l.errorCallback(loc, format, args) + } + + l.errors += 1 +} diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go new file mode 100644 index 0000000..f4011a8 --- /dev/null +++ b/lexer/lexer_test.go @@ -0,0 +1,55 @@ +package lexer + +import ( + "fmt" + "testing" + + "robaertschi.xyz/robaertschi/tt/token" +) + +type lexerTest struct { + input string + expectedToken []token.Token +} + +func runLexerTest(t *testing.T, test lexerTest) { + t.Helper() + + l, err := New(test.input, "test.tt") + l.WithErrorCallback(func(l token.Loc, s string, a ...any) { + format := fmt.Sprintf(s, a) + t.Errorf("Lexer error callback called: %s:%d:%d %s", l.File, l.Line, l.Col, format) + }) + if err != nil { + t.Errorf("creating lexer failed: %v", err) + } + + for i, expectedToken := range test.expectedToken { + actualToken := l.NextToken() + t.Logf("expected: %v, got: %v", expectedToken, actualToken) + + if expectedToken.Literal != actualToken.Literal { + t.Errorf("%d: expected literal %q, got %q", i, expectedToken.Literal, actualToken.Literal) + } + + if expectedToken.Type != actualToken.Type { + t.Errorf("%d: expected type %q, got %q", i, expectedToken.Type, actualToken.Type) + } + } +} + +func TestBasicFunctionality(t *testing.T) { + runLexerTest(t, lexerTest{ + input: "fn main() = 0;", + expectedToken: []token.Token{ + {Type: token.FN, Literal: "fn"}, + {Type: token.IDENT, Literal: "main"}, + {Type: token.OPEN_PAREN, Literal: "("}, + {Type: token.CLOSE_PAREN, Literal: ")"}, + {Type: token.EQUAL, Literal: "="}, + {Type: token.INT, Literal: "0"}, + {Type: token.SEMICOLON, Literal: ";"}, + {Type: token.EOF, Literal: ""}, + }, + }) +} diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..901fec7 --- /dev/null +++ b/parser/parser.go @@ -0,0 +1,18 @@ +package parser + +import ( + "robaertschi.xyz/robaertschi/tt/lexer" + "robaertschi.xyz/robaertschi/tt/token" +) + +type ErrorCallback func(token.Token, string, ...any) + +type Parser struct { + lexer lexer.Lexer + + curToken token.Token + peekToken token.Token + + errors int + errorCallback ErrorCallback +} diff --git a/test.tt b/test.tt new file mode 100644 index 0000000..cde2cda --- /dev/null +++ b/test.tt @@ -0,0 +1 @@ +fn main = 0; diff --git a/token/token.go b/token/token.go new file mode 100644 index 0000000..7343987 --- /dev/null +++ b/token/token.go @@ -0,0 +1,43 @@ +package token + +type Loc struct { + Line int + Col int + Pos int + File string +} + +type TokenType string + +type Token struct { + Type TokenType + Literal string + Loc Loc +} + +var keywords = map[string]TokenType{ + "fn": FN, +} + +const ( + ILLEGAL TokenType = "ILLEGAL" + EOF TokenType = "EOF" + + IDENT TokenType = "IDENT" + INT TokenType = "INT" + + SEMICOLON = ";" + EQUAL = "=" + OPEN_PAREN = "(" + CLOSE_PAREN = ")" + + // Keywords + FN = "FN" +) + +func LookupKeyword(literal string) TokenType { + if value, ok := keywords[literal]; ok { + return value + } + return IDENT +} diff --git a/utils/utils.go b/utils/utils.go new file mode 100644 index 0000000..d4b585b --- /dev/null +++ b/utils/utils.go @@ -0,0 +1 @@ +package utils