From ff2ff3d417d17382b32cee47f6376254e0d7ffec Mon Sep 17 00:00:00 2001 From: Robin Date: Wed, 13 Nov 2024 21:30:28 +0100 Subject: [PATCH] ast --- 2 | 109 ++++++++++++++++++++++++++++++++++++++ ast/ast.go | 124 ++++++++++++++++++++++++++++++++++++++++++++ get_to_work.th | 3 ++ lexer/lexer.go | 9 +++- lexer/lexer_test.go | 6 ++- parser/parser.go | 1 + token/token.go | 2 +- 7 files changed, 251 insertions(+), 3 deletions(-) create mode 100644 2 create mode 100644 ast/ast.go create mode 100644 get_to_work.th create mode 100644 parser/parser.go diff --git a/2 b/2 new file mode 100644 index 0000000..634cfaf --- /dev/null +++ b/2 @@ -0,0 +1,109 @@ +package ast + +import ( + "strings" + + "git.robaertschi.xyz/robaertschi/thorgot/token" +) + +type Optional[T any] struct { + HasValue bool + Value T +} + +func (o Optional[T]) OrElse(other T) T { + if o.HasValue { + return o.Value + } + + return other +} + +type Indentation int + +func (i Indentation) indent() string { + return strings.Repeat(" ", int(i*4)) +} + +type Node interface { + TokenLiteral() string + String(Indentation) string +} + +type ExpressionNode interface { + Node + expressionNode() +} + +type StatementNode interface { + Node + statementNode() +} + +type Type string + +func (t Type) String() string { + return string(t) +} + +type Block struct { + Token token.Token // the RBrace token + Statements []StatementNode +} + +func (b *Block) TokenLiteral() string { return b.Token.Literal } +func (b *Block) String(i Indentation) string { + var out strings.Builder + + ind := i.indent() + + out.WriteString(ind + "{\n") + for _, statement := range b.Statements { + out.WriteString(statement.String(i + 1)) + } + out.WriteString(ind + "}\n") + + return out.String() +} +func (b *Block) statementNode() {} + +type FunctionArgument struct { + Name string + Type Type +} + +type Function struct { + Token token.Token // the Fn token + Name string + Arguments []FunctionArgument + ReturnType Type + HasReturnType bool + Block Block +} + +func (f *Function) TokenLiteral() string { return f.Token.Literal } +func (f *Function) String(i Indentation) string { + var out strings.Builder + + ind := i.indent() + out.WriteString(ind + "fn " + f.Name + "(") + for i, arg := range f.Arguments { + out.WriteString(arg.Name + " " + arg.Type.String()) + if i != len(f.Arguments)-1 { + out.WriteString(", ") + } + } + out.WriteString(") ") + + if f.HasReturnType { + out.WriteString(f.ReturnType.String() + " ") + } + + out.WriteString(f.Block.String(i)) + + return out.String() +} +func (f *Function) statementNode() {} + +type VariableDefiniton struct { +} diff --git a/ast/ast.go b/ast/ast.go new file mode 100644 index 0000000..fa7062f --- /dev/null +++ b/ast/ast.go @@ -0,0 +1,124 @@ +package ast + +import ( + "strings" + + "git.robaertschi.xyz/robaertschi/thorgot/token" +) + +// Statements should start with the specified Indentation, Expression should only do that on new lines +type Indentation int + +func (i Indentation) indent() string { + return strings.Repeat(" ", int(i*4)) +} + +type Node interface { + TokenLiteral() string + String(Indentation) string +} + +type ExpressionNode interface { + Node + expressionNode() +} + +type StatementNode interface { + Node + statementNode() +} + +type Type string + +func (t Type) String() string { + return string(t) +} + +type Block struct { + Token token.Token // the RBrace token + Statements []StatementNode +} + +func (b *Block) TokenLiteral() string { return b.Token.Literal } +func (b *Block) String(i Indentation) string { + var out strings.Builder + + ind := i.indent() + + out.WriteString(ind + "{\n") + for _, statement := range b.Statements { + out.WriteString(statement.String(i + 1)) + } + out.WriteString(ind + "}\n") + + return out.String() +} +func (b *Block) statementNode() {} + +type FunctionArgument struct { + Name string + Type Type +} + +type Function struct { + Token token.Token // the Fn token + Name string + Arguments []FunctionArgument + ReturnType Type + HasReturnType bool + Block Block +} + +func (f *Function) TokenLiteral() string { return f.Token.Literal } +func (f *Function) String(i Indentation) string { + var out strings.Builder + + ind := i.indent() + out.WriteString(ind + "fn " + f.Name + "(") + for i, arg := range f.Arguments { + out.WriteString(arg.Name + " " + arg.Type.String()) + if i != len(f.Arguments)-1 { + out.WriteString(", ") + } + } + out.WriteString(") ") + + if f.HasReturnType { + out.WriteString(f.ReturnType.String() + " ") + } + + out.WriteString(f.Block.String(i)) + + return out.String() +} +func (f *Function) statementNode() {} + +type ImplicitVariableDefiniton struct { + Token token.Token // The Identifier token + Name string + Value ExpressionNode +} + +func (ivd *ImplicitVariableDefiniton) TokenLiteral() string { + return ivd.Token.Literal +} +func (ivd *ImplicitVariableDefiniton) String(i Indentation) string { + var out strings.Builder + + out.WriteString(i.indent() + ivd.Name + " := ") + out.WriteString(ivd.Value.String(i)) + out.WriteString("\n") + + return out.String() +} + +func (ivd *ImplicitVariableDefiniton) statementNode() {} + +type IntegerLiteral struct { + Token token.Token + Value int64 +} + +func (il *IntegerLiteral) TokenLiteral() string { return il.Token.Literal } +func (il *IntegerLiteral) String(i Indentation) string { return il.Token.Literal } +func (il *IntegerLiteral) expressionNode() {} diff --git a/get_to_work.th b/get_to_work.th new file mode 100644 index 0000000..f178ac5 --- /dev/null +++ b/get_to_work.th @@ -0,0 +1,3 @@ +fn main() { + a := 2 +} diff --git a/lexer/lexer.go b/lexer/lexer.go index e69052e..6520635 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -1,6 +1,8 @@ package lexer import ( + "fmt" + "git.robaertschi.xyz/robaertschi/thorgot/token" ) @@ -24,6 +26,10 @@ func New(input string) Lexer { return lexer } +func (l Lexer) String() string { + return fmt.Sprintf("Lexer{input: \"%v\", ch: '%c', pos: %v, readPos: %v, col: %v, line: %v}", l.input, l.ch, l.pos, l.readPos, l.col, l.line) +} + func (l *Lexer) readChar() { if l.readPos >= len(l.input) { l.ch = 0 @@ -92,11 +98,12 @@ func (l *Lexer) readNumber() token.Token { func (l *Lexer) NextToken() token.Token { l.skipWhitespace() var tok token.Token + tok.Loc = token.Loc{Line: l.line, Col: l.col} tok.Literal = string(l.ch) switch l.ch { case '\n': - tok.Token = token.EndLine + tok.Token = token.NewLine case ';': tok.Token = token.Semicolon case ':': diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index e891289..2fdcb9b 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -14,13 +14,17 @@ func TestCorrectTokens(t *testing.T) { }{{ expectedTokens: []token.Token{{Token: token.Eof, Literal: "", Loc: token.Loc{Line: 1, Col: 1}}}, input: "", - }, {input: "hello 1234 ; () {}", + }, {input: "hello 1234 ; () {}\n", expectedTokens: []token.Token{ {Token: token.Identifier, Literal: "hello", Loc: token.Loc{Line: 1, Col: 1}}, {Token: token.Integer, Literal: "1234", Loc: token.Loc{Line: 1, Col: 7}}, {Token: token.Semicolon, Literal: ";", Loc: token.Loc{Line: 1, Col: 12}}, {Token: token.LParen, Literal: "(", Loc: token.Loc{Line: 1, Col: 14}}, {Token: token.RParen, Literal: ")", Loc: token.Loc{Line: 1, Col: 15}}, + {Token: token.LBrace, Literal: "{", Loc: token.Loc{Line: 1, Col: 17}}, + {Token: token.RBrace, Literal: "}", Loc: token.Loc{Line: 1, Col: 18}}, + {Token: token.NewLine, Literal: "\n", Loc: token.Loc{Line: 2, Col: 1}}, + {Token: token.Eof, Literal: "", Loc: token.Loc{Line: 2, Col: 2}}, }}} for _, test := range tests { diff --git a/parser/parser.go b/parser/parser.go new file mode 100644 index 0000000..0bfe2c2 --- /dev/null +++ b/parser/parser.go @@ -0,0 +1 @@ +package parser diff --git a/token/token.go b/token/token.go index 27a5417..7cfe8be 100644 --- a/token/token.go +++ b/token/token.go @@ -17,7 +17,7 @@ const ( Illegal TokenType = "Illegal" Eof = "Eof" - EndLine = "EndLine" + NewLine = "NewLine" Semicolon = "Semicolon" // ; Colon = "Colon" // :