From 5db30a2a8479fa23ea6cabb0d0599e616ff61464 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robin=20B=C3=A4rtschi?= Date: Thu, 14 Nov 2024 15:42:39 +0100 Subject: [PATCH] added statement parsing, missing tests and expression --- ast/ast.go | 25 ++++++- lexer/lexer.go | 26 +++---- lexer/lexer_test.go | 24 +++---- parser/parser.go | 162 ++++++++++++++++++++++++++++++++++++++++++++ token/token.go | 3 +- 5 files changed, 214 insertions(+), 26 deletions(-) diff --git a/ast/ast.go b/ast/ast.go index fa7062f..638a916 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -34,6 +34,29 @@ func (t Type) String() string { return string(t) } +type Program struct { + Statements []StatementNode +} + +func (p *Program) TokenLiteral() string { + if len(p.Statements) > 0 { + return p.Statements[0].TokenLiteral() + } else { + return "" + } +} + +func (p *Program) String() string { + out := strings.Builder{} + + for _, statement := range p.Statements { + out.WriteString(statement.String(0)) + out.WriteString("\n") + } + + return out.String() +} + type Block struct { Token token.Token // the RBrace token Statements []StatementNode @@ -66,7 +89,7 @@ type Function struct { Arguments []FunctionArgument ReturnType Type HasReturnType bool - Block Block + Block *Block } func (f *Function) TokenLiteral() string { return f.Token.Literal } diff --git a/lexer/lexer.go b/lexer/lexer.go index 6520635..18400f4 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -48,7 +48,7 @@ func (l *Lexer) readChar() { } func (l *Lexer) makeToken(t token.TokenType, literal string) token.Token { - return token.Token{Token: t, Literal: literal, Loc: token.Loc{Line: l.line, Col: l.col}} + return token.Token{Type: t, Literal: literal, Loc: token.Loc{Line: l.line, Col: l.col}} } func isDigit(ch byte) bool { @@ -81,7 +81,7 @@ func (l *Lexer) readIdentifier() token.Token { t := token.LookupKeyword(l.input[pos:l.pos]) - return token.Token{Token: t, Loc: loc, Literal: l.input[pos:l.pos]} + return token.Token{Type: t, Loc: loc, Literal: l.input[pos:l.pos]} } func (l *Lexer) readNumber() token.Token { @@ -92,7 +92,7 @@ func (l *Lexer) readNumber() token.Token { l.readChar() } - return token.Token{Token: token.Integer, Loc: loc, Literal: l.input[pos:l.pos]} + return token.Token{Type: token.Integer, Loc: loc, Literal: l.input[pos:l.pos]} } func (l *Lexer) NextToken() token.Token { @@ -103,21 +103,23 @@ func (l *Lexer) NextToken() token.Token { switch l.ch { case '\n': - tok.Token = token.NewLine + tok.Type = token.NewLine case ';': - tok.Token = token.Semicolon + tok.Type = token.Semicolon case ':': - tok.Token = token.Colon + tok.Type = token.Colon + case ',': + tok.Type = token.Comma case '=': - tok.Token = token.Equal + tok.Type = token.Equal case '{': - tok.Token = token.LBrace + tok.Type = token.LBrace case '}': - tok.Token = token.RBrace + tok.Type = token.RBrace case '(': - tok.Token = token.LParen + tok.Type = token.LParen case ')': - tok.Token = token.RParen + tok.Type = token.RParen case 0: return l.makeToken(token.Eof, "") @@ -129,7 +131,7 @@ func (l *Lexer) NextToken() token.Token { return l.readNumber() } - tok.Token = token.Illegal + tok.Type = token.Illegal } l.readChar() diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index 2fdcb9b..399a211 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -12,19 +12,19 @@ func TestCorrectTokens(t *testing.T) { expectedTokens []token.Token input string }{{ - expectedTokens: []token.Token{{Token: token.Eof, Literal: "", Loc: token.Loc{Line: 1, Col: 1}}}, + expectedTokens: []token.Token{{Type: token.Eof, Literal: "", Loc: token.Loc{Line: 1, Col: 1}}}, input: "", }, {input: "hello 1234 ; () {}\n", expectedTokens: []token.Token{ - {Token: token.Identifier, Literal: "hello", Loc: token.Loc{Line: 1, Col: 1}}, - {Token: token.Integer, Literal: "1234", Loc: token.Loc{Line: 1, Col: 7}}, - {Token: token.Semicolon, Literal: ";", Loc: token.Loc{Line: 1, Col: 12}}, - {Token: token.LParen, Literal: "(", Loc: token.Loc{Line: 1, Col: 14}}, - {Token: token.RParen, Literal: ")", Loc: token.Loc{Line: 1, Col: 15}}, - {Token: token.LBrace, Literal: "{", Loc: token.Loc{Line: 1, Col: 17}}, - {Token: token.RBrace, Literal: "}", Loc: token.Loc{Line: 1, Col: 18}}, - {Token: token.NewLine, Literal: "\n", Loc: token.Loc{Line: 2, Col: 1}}, - {Token: token.Eof, Literal: "", Loc: token.Loc{Line: 2, Col: 2}}, + {Type: token.Identifier, Literal: "hello", Loc: token.Loc{Line: 1, Col: 1}}, + {Type: token.Integer, Literal: "1234", Loc: token.Loc{Line: 1, Col: 7}}, + {Type: token.Semicolon, Literal: ";", Loc: token.Loc{Line: 1, Col: 12}}, + {Type: token.LParen, Literal: "(", Loc: token.Loc{Line: 1, Col: 14}}, + {Type: token.RParen, Literal: ")", Loc: token.Loc{Line: 1, Col: 15}}, + {Type: token.LBrace, Literal: "{", Loc: token.Loc{Line: 1, Col: 17}}, + {Type: token.RBrace, Literal: "}", Loc: token.Loc{Line: 1, Col: 18}}, + {Type: token.NewLine, Literal: "\n", Loc: token.Loc{Line: 2, Col: 1}}, + {Type: token.Eof, Literal: "", Loc: token.Loc{Line: 2, Col: 2}}, }}} for _, test := range tests { @@ -36,8 +36,8 @@ func TestCorrectTokens(t *testing.T) { t.Errorf("Literal is not equal: actual = (%v) is not expected = (%v)", actual.Literal, expected.Literal) } - if expected.Token != actual.Token { - t.Errorf("Token is not equal: actual = (%v) is not expected = (%v)", actual.Token, expected.Token) + if expected.Type != actual.Type { + t.Errorf("Token is not equal: actual = (%v) is not expected = (%v)", actual.Type, expected.Type) } if expected.Loc.Line != actual.Loc.Line { diff --git a/parser/parser.go b/parser/parser.go index 0bfe2c2..e09f980 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -1 +1,163 @@ package parser + +import ( + "fmt" + + "git.robaertschi.xyz/robaertschi/thorgot/ast" + "git.robaertschi.xyz/robaertschi/thorgot/lexer" + "git.robaertschi.xyz/robaertschi/thorgot/token" +) + +type Parser struct { + lexer lexer.Lexer + curToken token.Token + peekToken token.Token + + Errors []error +} + +func New(lexer lexer.Lexer) Parser { + p := Parser{} + + p.nextToken() + p.nextToken() + + return p +} + +func (p *Parser) error(err error) ast.StatementNode { + p.Errors = append(p.Errors, err) + return nil +} + +func (p *Parser) expectPeek(t token.TokenType) bool { + if p.peekToken.Type == t { + p.nextToken() + return true + } else { + p.peekError(t) + return false + } +} + +func (p *Parser) peekTokenIs(t token.TokenType) bool { + return p.peekToken.Type == t +} + +func (p *Parser) peekError(t token.TokenType) { + err := fmt.Errorf("Expected token %v to be %v", t, p.peekToken.Type) + p.Errors = append(p.Errors, err) +} + +func (p *Parser) nextToken() { + p.curToken = p.peekToken + p.peekToken = p.lexer.NextToken() +} + +func (p *Parser) ParseProgram() ast.Program { + program := ast.Program{} + program.Statements = make([]ast.StatementNode, 0) + + for p.curToken.Type != token.Eof { + stmt := p.parseStatement() + if stmt != nil { + program.Statements = append(program.Statements, stmt) + } + } + + return program +} + +func (p *Parser) parseStatement() ast.StatementNode { + switch p.curToken.Type { + case token.Fn: + return p.parseFunction() + } + + return p.error(fmt.Errorf("Invalid token %v found with literal %v", p.curToken.Type, p.curToken.Literal)) +} + +func (p *Parser) parseFunctionArguments() []ast.FunctionArgument { + args := make([]ast.FunctionArgument, 0) + + for p.peekTokenIs(token.Identifier) { + p.nextToken() + name := p.curToken.Literal + + if !p.expectPeek(token.Identifier) { + return nil + } + + args = append(args, ast.FunctionArgument{Name: name, Type: ast.Type(p.curToken.Literal)}) + + if !p.peekTokenIs(token.Comma) { + break + } + p.nextToken() + } + + if !p.expectPeek(token.RParen) { + return nil + } + + return args +} + +func (p *Parser) parseFunction() *ast.Function { + f := &ast.Function{Token: p.curToken} + + if !p.expectPeek(token.Identifier) { + return nil + } + + f.Name = p.curToken.Literal + + if !p.expectPeek(token.LParen) { + return nil + } + + args := p.parseFunctionArguments() + + if args == nil { + return nil + } + + f.Arguments = args + + if p.peekTokenIs(token.Identifier) { + p.nextToken() + f.ReturnType = ast.Type(p.curToken.Literal) + f.HasReturnType = true + } + + if !p.expectPeek(token.LBrace) { + return nil + } + + // parse block + + f.Block = p.parseBlock() + if f.Block == nil { + return nil + } + + return f +} + +func (p *Parser) parseBlock() *ast.Block { + b := &ast.Block{Token: p.curToken} + // skip { + p.nextToken() + + for p.curToken.Type != token.RBrace { + stmt := p.parseStatement() + if stmt == nil { + return nil + } + b.Statements = append(b.Statements, stmt) + } + + p.nextToken() + + return b +} diff --git a/token/token.go b/token/token.go index 7cfe8be..bda5b1b 100644 --- a/token/token.go +++ b/token/token.go @@ -8,7 +8,7 @@ type Loc struct { } type Token struct { - Token TokenType + Type TokenType Literal string Loc Loc } @@ -21,6 +21,7 @@ const ( Semicolon = "Semicolon" // ; Colon = "Colon" // : + Comma = "," // , Equal = "Equal" // = LBrace = "LBrace" // { RBrace = "RBrace" // }