diff --git a/Architecture.md b/Architecture.md new file mode 100644 index 0000000..1903bb8 --- /dev/null +++ b/Architecture.md @@ -0,0 +1,13 @@ +tt Programming Language Backend Architecture + +# Goals +- Easy support for different architectures and OSs +- Easily Optimisable on most levels +- Good Performance + +# Architecture + +AST --> Type Checking --> TAST --> IR Emission --> TTIR --> Codegen --> TTASM --> Emit --> FASM -> Binary + +TTIR: TT Intermediate Representation is the Representation that the AST gets turned into. This will be mostly be used for optimissing and abstracting away from Assembly +TAST: Typed Ast diff --git a/ast/ast.go b/ast/ast.go index d823278..a33dffe 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -63,7 +63,7 @@ type ErrorExpression struct { func (e *ErrorExpression) expressionNode() {} func (e *ErrorExpression) TokenLiteral() string { return e.InvalidToken.Literal } -func (e *ErrorExpression) String() string { return "" } +func (e *ErrorExpression) String() string { return "" } type IntegerExpression struct { Token token.Token // The token.INT diff --git a/lexer/lexer.go b/lexer/lexer.go index a0c6687..8444ecc 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -64,20 +64,20 @@ func (l *Lexer) NextToken() token.Token { switch l.ch { case ';': - tok = l.newToken(token.SEMICOLON) + tok = l.newToken(token.Semicolon) case '=': - tok = l.newToken(token.EQUAL) + tok = l.newToken(token.Equal) case '(': - tok = l.newToken(token.OPEN_PAREN) + tok = l.newToken(token.OpenParen) case ')': - tok = l.newToken(token.CLOSE_PAREN) + tok = l.newToken(token.CloseParen) case -1: tok.Literal = "" - tok.Type = token.EOF + tok.Type = token.Eof default: if isNumber(l.ch) { tok.Literal = l.readInteger() - tok.Type = token.INT + tok.Type = token.Int return tok } else if unicode.IsLetter(l.ch) { tok.Literal = l.readIdentifier() @@ -87,7 +87,7 @@ func (l *Lexer) NextToken() token.Token { if l.errorCallback != nil { l.errorCallback(tok.Loc, "Unknown character %r", l.ch) } - tok = l.newToken(token.ILLEGAL) + tok = l.newToken(token.Illegal) } } if err := l.readChar(); err != nil { @@ -164,7 +164,7 @@ func (l *Lexer) skipWhitespace() { func (l *Lexer) error(loc token.Loc, format string, args ...any) { if l.errorCallback != nil { - l.errorCallback(loc, format, args) + l.errorCallback(loc, format, args...) } l.errors += 1 diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index f4011a8..3f5453c 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -17,7 +17,7 @@ func runLexerTest(t *testing.T, test lexerTest) { l, err := New(test.input, "test.tt") l.WithErrorCallback(func(l token.Loc, s string, a ...any) { - format := fmt.Sprintf(s, a) + format := fmt.Sprintf(s, a...) t.Errorf("Lexer error callback called: %s:%d:%d %s", l.File, l.Line, l.Col, format) }) if err != nil { @@ -42,14 +42,14 @@ func TestBasicFunctionality(t *testing.T) { runLexerTest(t, lexerTest{ input: "fn main() = 0;", expectedToken: []token.Token{ - {Type: token.FN, Literal: "fn"}, - {Type: token.IDENT, Literal: "main"}, - {Type: token.OPEN_PAREN, Literal: "("}, - {Type: token.CLOSE_PAREN, Literal: ")"}, - {Type: token.EQUAL, Literal: "="}, - {Type: token.INT, Literal: "0"}, - {Type: token.SEMICOLON, Literal: ";"}, - {Type: token.EOF, Literal: ""}, + {Type: token.Fn, Literal: "fn"}, + {Type: token.Ident, Literal: "main"}, + {Type: token.OpenParen, Literal: "("}, + {Type: token.CloseParen, Literal: ")"}, + {Type: token.Equal, Literal: "="}, + {Type: token.Int, Literal: "0"}, + {Type: token.Semicolon, Literal: ";"}, + {Type: token.Eof, Literal: ""}, }, }) } diff --git a/parser/parser.go b/parser/parser.go index 901fec7..fe3cb2b 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1,18 +1,211 @@ package parser import ( + "fmt" + "strconv" + + "robaertschi.xyz/robaertschi/tt/ast" "robaertschi.xyz/robaertschi/tt/lexer" "robaertschi.xyz/robaertschi/tt/token" ) +type precedence int + +const ( + LOWEST precedence = iota + SUM + PRODUCT +) + type ErrorCallback func(token.Token, string, ...any) +type prefixParseFn func() ast.Expression +type infixParseFn func(ast.Expression) ast.Expression type Parser struct { - lexer lexer.Lexer - curToken token.Token peekToken token.Token errors int errorCallback ErrorCallback + + l *lexer.Lexer + prefixParseFns map[token.TokenType]prefixParseFn + infixParseFns map[token.TokenType]infixParseFn +} + +func New(l *lexer.Lexer) *Parser { + p := &Parser{l: l} + + p.prefixParseFns = make(map[token.TokenType]prefixParseFn) + p.registerPrefixFn(token.Int, p.parseIntegerExpression) + + p.infixParseFns = make(map[token.TokenType]infixParseFn) + + p.nextToken() + p.nextToken() + + return p +} + +func (p *Parser) WithErrorCallback(errorCallback ErrorCallback) { + p.errorCallback = errorCallback +} + +func (p *Parser) registerInfixFn(tt token.TokenType, infix infixParseFn) { + p.infixParseFns[tt] = infix +} + +func (p *Parser) registerPrefixFn(tt token.TokenType, fn prefixParseFn) { + p.prefixParseFns[tt] = fn +} + +func (p *Parser) nextToken() { + p.curToken = p.peekToken + p.peekToken = p.l.NextToken() + fmt.Printf("curToken: %q, peekToken: %q\n", p.curToken.Type, p.peekToken.Type) +} + +func (p *Parser) curTokenIs(tt token.TokenType) bool { + return p.curToken.Type == tt +} + +func (p *Parser) peekTokenIs(tt token.TokenType) bool { + return p.peekToken.Type == tt +} + +func getPrecedence(tt token.TokenType) precedence { + switch tt { + default: + return LOWEST + } +} + +func (p *Parser) peekPrecedence() precedence { + return getPrecedence(p.peekToken.Type) +} + +func (p *Parser) error(t token.Token, format string, args ...any) { + if p.errorCallback != nil { + p.errorCallback(t, format, args...) + } else { + fmt.Printf("%s:%d:%d ", t.Loc.File, t.Loc.Line, t.Loc.Col) + fmt.Printf(format, args...) + fmt.Println() + } + + p.errors += 1 +} + +func (p *Parser) exprError(invalidToken token.Token, format string, args ...any) ast.Expression { + p.error(invalidToken, format, args...) + return &ast.ErrorExpression{ + InvalidToken: invalidToken, + } +} + +func (p *Parser) expect(tt token.TokenType) bool { + if p.curToken.Type != tt { + p.error(p.curToken, "expected %q, got %q", tt, p.curToken.Type) + return false + } + return true +} + +func (p *Parser) expectPeek(tt token.TokenType) bool { + if p.peekToken.Type != tt { + p.error(p.peekToken, "expected %q, got %q", tt, p.peekToken.Type) + p.nextToken() + return false + } + p.nextToken() + return true +} + +func (p *Parser) ParseProgram() *ast.Program { + decls := []ast.Declaration{} + + for p.curToken.Type != token.Eof { + decl := p.parseDeclaration() + if decl != nil { + decls = append(decls, decl) + } + p.nextToken() + } + + return &ast.Program{ + Declarations: decls, + } +} + +func (p *Parser) parseDeclaration() ast.Declaration { + if !p.expect(token.Fn) { + return nil + } + tok := p.curToken + if !p.expectPeek(token.Ident) { + return nil + } + + name := p.curToken.Literal + if !p.expectPeek(token.OpenParen) { + return nil + } + if !p.expectPeek(token.CloseParen) { + return nil + } + if !p.expectPeek(token.Equal) { + return nil + } + + p.nextToken() + expr := p.parseExpression(LOWEST) + if !p.expectPeek(token.Semicolon) { + return nil + } + + return &ast.FunctionDeclaration{ + Token: tok, + Name: name, + Body: expr, + } +} + +func (p *Parser) parseExpression(precedence precedence) ast.Expression { + prefix := p.prefixParseFns[p.curToken.Type] + if prefix == nil { + return p.exprError(p.curToken, "could not parse invalid token in expression %s", p.curToken.Type) + } + + leftExpr := prefix() + + for !p.peekTokenIs(token.Semicolon) && precedence < p.peekPrecedence() { + infix := p.infixParseFns[p.peekToken.Type] + if infix == nil { + return leftExpr + } + + p.nextToken() + + leftExpr = infix(leftExpr) + } + + return leftExpr +} + +func (p *Parser) parseIntegerExpression() ast.Expression { + if !p.expect(token.Int) { + return &ast.ErrorExpression{InvalidToken: p.curToken} + } + + int := &ast.IntegerExpression{ + Token: p.curToken, + } + + value, err := strconv.ParseInt(int.Token.Literal, 0, 64) + if err != nil { + return p.exprError(int.Token, "invalid integer literal: %v", err) + } + + int.Value = value + return int } diff --git a/parser/parser_test.go b/parser/parser_test.go new file mode 100644 index 0000000..4f670a6 --- /dev/null +++ b/parser/parser_test.go @@ -0,0 +1,106 @@ +package parser + +import ( + "fmt" + "testing" + + "robaertschi.xyz/robaertschi/tt/ast" + "robaertschi.xyz/robaertschi/tt/lexer" + "robaertschi.xyz/robaertschi/tt/token" +) + +type parserTest struct { + input string + expectedProgram ast.Program +} + +func runParserTest(test parserTest, t *testing.T) { + t.Helper() + l, err := lexer.New(test.input, "test.tt") + l.WithErrorCallback(func(l token.Loc, s string, a ...any) { + format := fmt.Sprintf(s, a...) + t.Errorf("Lexer error callback called: %s:%d:%d %s", l.File, l.Line, l.Col, format) + }) + + if err != nil { + t.Errorf("creating lexer failed: %v", err) + } + + p := New(l) + p.WithErrorCallback(func(tok token.Token, s string, a ...any) { + format := fmt.Sprintf(s, a...) + t.Errorf("Parser error callback called: %s:%d:%d %s", tok.Loc.File, tok.Loc.Line, tok.Loc.Col, format) + }) + + actual := p.ParseProgram() + + if p.errors > 0 { + t.Fatalf("parser errors: %d", p.errors) + } + + if len(actual.Declarations) != len(test.expectedProgram.Declarations) { + t.Fatalf("expected %d declarations, got %d", len(test.expectedProgram.Declarations), len(actual.Declarations)) + } + + for i, decl := range test.expectedProgram.Declarations { + expectDeclarationSame(t, decl, actual.Declarations[i]) + } +} + +func expectDeclarationSame(t *testing.T, expected ast.Declaration, actual ast.Declaration) { + t.Helper() + + switch expected := expected.(type) { + case *ast.FunctionDeclaration: + actual, ok := actual.(*ast.FunctionDeclaration) + if !ok { + t.Errorf("expected function declaration, got %T", actual) + return + } + if actual.Name != expected.Name { + t.Errorf("expected function name %s, got %s", expected.Name, actual.Name) + } + + expectExpression(t, expected.Body, actual.Body) + } +} + +func expectExpression(t *testing.T, expected ast.Expression, actual ast.Expression) { + t.Helper() + + switch expected := expected.(type) { + case *ast.ErrorExpression: + actual, ok := actual.(*ast.ErrorExpression) + if !ok { + t.Errorf("expected error expression, got %T", actual) + return + } + if actual.InvalidToken != expected.InvalidToken { + t.Errorf("expected invalid token %v, got %v", expected.InvalidToken, actual.InvalidToken) + } + case *ast.IntegerExpression: + integerExpr, ok := actual.(*ast.IntegerExpression) + if !ok { + t.Errorf("expected *ast.IntegerExpression, got %T", actual) + return + } + if integerExpr.Value != expected.Value { + t.Errorf("expected integer value %d, got %d", expected.Value, integerExpr.Value) + } + } +} + +func TestFunctionDeclaration(t *testing.T) { + test := parserTest{ + input: "fn main() = 0;", + expectedProgram: ast.Program{ + Declarations: []ast.Declaration{ + &ast.FunctionDeclaration{ + Name: "main", + Body: &ast.IntegerExpression{Value: 0, Token: token.Token{Type: token.Int, Literal: "0"}}, + }, + }, + }, + } + runParserTest(test, t) +} diff --git a/tast/tast.go b/tast/tast.go new file mode 100644 index 0000000..12e1620 --- /dev/null +++ b/tast/tast.go @@ -0,0 +1,76 @@ +// Typed AST +// Almost identical to the AST, but contains types and contains only correct types. +// Also, it does not contain a Error Expression, because, that an previous error + +package tast + +import ( + "fmt" + "strings" + + "robaertschi.xyz/robaertschi/tt/token" + "robaertschi.xyz/robaertschi/tt/types" +) + +type Node interface { + TokenLiteral() string + String() string +} + +type Declaration interface { + Node + declarationNode() +} + +type Expression interface { + Node + expressionNode() + Type() types.Type +} + +type Program struct { + Declarations []Declaration +} + +func (p *Program) TokenLiteral() string { + if len(p.Declarations) > 0 { + return p.Declarations[0].TokenLiteral() + } + return "" +} + +func (p *Program) String() string { + var builder strings.Builder + + for _, decl := range p.Declarations { + builder.WriteString(decl.String()) + builder.WriteRune('\n') + } + + return builder.String() +} + +type FunctionDeclaration struct { + Token token.Token // The token.FN + Body Expression + Name string + ReturnType types.Type +} + +func (fd *FunctionDeclaration) declarationNode() {} +func (fd *FunctionDeclaration) TokenLiteral() string { return fd.Token.Literal } +func (fd *FunctionDeclaration) String() string { + return fmt.Sprintf("fn %v(): %v = %v;", fd.Name, fd.ReturnType.Name(), fd.Body.String()) +} + +type IntegerExpression struct { + Token token.Token // The token.INT + Value int64 +} + +func (ie *IntegerExpression) expressionNode() {} +func (ie *IntegerExpression) Type() types.Type { + return types.I64 +} +func (ie *IntegerExpression) TokenLiteral() string { return ie.Token.Literal } +func (ie *IntegerExpression) String() string { return ie.Token.Literal } diff --git a/token/token.go b/token/token.go index 7343987..97862cd 100644 --- a/token/token.go +++ b/token/token.go @@ -16,28 +16,28 @@ type Token struct { } var keywords = map[string]TokenType{ - "fn": FN, + "fn": Fn, } const ( - ILLEGAL TokenType = "ILLEGAL" - EOF TokenType = "EOF" + Illegal TokenType = "ILLEGAL" + Eof TokenType = "EOF" - IDENT TokenType = "IDENT" - INT TokenType = "INT" + Ident TokenType = "IDENT" + Int TokenType = "INT" - SEMICOLON = ";" - EQUAL = "=" - OPEN_PAREN = "(" - CLOSE_PAREN = ")" + Semicolon = ";" + Equal = "=" + OpenParen = "(" + CloseParen = ")" // Keywords - FN = "FN" + Fn = "FN" ) func LookupKeyword(literal string) TokenType { if value, ok := keywords[literal]; ok { return value } - return IDENT + return Ident } diff --git a/ttir/ttir.go b/ttir/ttir.go new file mode 100644 index 0000000..5cac9d4 --- /dev/null +++ b/ttir/ttir.go @@ -0,0 +1,33 @@ +package ttir + +type Program struct { + Functions []Function +} + +type Function struct { + Name string + Instructions []Instruction +} + +type Instruction interface { + String() string + instruction() +} + +type Ret struct { + op Operand +} + +func (r *Ret) String() {} +func (r *Ret) instruction() {} + +type Operand interface { + String() string + operand() +} + +type Constant struct { + Value int64 +} + +func (c *Constant) operand() {} diff --git a/typechecker/checker.go b/typechecker/checker.go new file mode 100644 index 0000000..940bf1f --- /dev/null +++ b/typechecker/checker.go @@ -0,0 +1,60 @@ +package typechecker + +import ( + "errors" + "fmt" + + "robaertschi.xyz/robaertschi/tt/ast" + "robaertschi.xyz/robaertschi/tt/tast" + "robaertschi.xyz/robaertschi/tt/token" +) + +type Checker struct{} + +func New() *Checker { + return &Checker{} +} + +func (c *Checker) error(t token.Token, format string, args ...any) error { + return fmt.Errorf("%s:%d:%d %s", t.Loc.File, t.Loc.Line, t.Loc.Col, fmt.Sprintf(format, args...)) +} + +func (c *Checker) CheckProgram(program ast.Program) (tast.Program, error) { + decls := []tast.Declaration{} + errs := []error{} + + for _, decl := range program.Declarations { + decl, err := c.checkDeclaration(decl) + if err == nil { + decls = append(decls, decl) + } else { + errs = append(errs, err) + } + } + + return tast.Program{Declarations: decls}, errors.Join(errs...) +} + +func (c *Checker) checkDeclaration(decl ast.Declaration) (tast.Declaration, error) { + switch decl := decl.(type) { + case *ast.FunctionDeclaration: + body, err := c.checkExpression(decl.Body) + + if err != nil { + return nil, err + } + + return &tast.FunctionDeclaration{Token: decl.Token, Body: body, ReturnType: body.Type(), Name: decl.Name}, nil + } + return nil, errors.New("unhandled declaration in type checker") +} + +func (c *Checker) checkExpression(expr ast.Expression) (tast.Expression, error) { + switch expr := expr.(type) { + case *ast.IntegerExpression: + return &tast.IntegerExpression{Token: expr.Token, Value: expr.Value}, nil + case *ast.ErrorExpression: + return nil, c.error(expr.InvalidToken, "invalid expression") + } + return nil, fmt.Errorf("unhandled expression in type checker") +} diff --git a/types/types.go b/types/types.go new file mode 100644 index 0000000..995a1a2 --- /dev/null +++ b/types/types.go @@ -0,0 +1,36 @@ +package types + +type Type interface { + // Checks if the two types are the same + IsSameType(Type) bool + Name() string +} + +type TypeId struct { + id int64 + name string +} + +const ( + I64Id int64 = iota +) + +var ( + I64 = New(I64Id, "i64") +) + +func (ti *TypeId) IsSameType(t Type) bool { + if ti2, ok := t.(*TypeId); ok { + return ti.id == ti2.id + } + + return false +} + +func (ti *TypeId) Name() string { + return ti.name +} + +func New(id int64, name string) Type { + return &TypeId{id: id, name: name} +}