thorgot/lexer/lexer.go

package lexer

import (
	"fmt"

	"git.robaertschi.xyz/robaertschi/thorgot/token"
)

type Lexer struct {
	input   string
	ch      byte
	pos     int
	readPos int

	// Loc
	col  int
	line int
}

func New(input string) Lexer {
	lexer := Lexer{input: input}
	lexer.line = 1

	lexer.readChar()

	return lexer
}

func (l Lexer) String() string {
	return fmt.Sprintf("Lexer{input: \"%v\", ch: '%c', pos: %v, readPos: %v, col: %v, line: %v}", l.input, l.ch, l.pos, l.readPos, l.col, l.line)
}

func (l *Lexer) readChar() {
	if l.readPos >= len(l.input) {
		l.ch = 0
	} else {
		l.ch = l.input[l.readPos]
	}

	if l.ch == '\n' {
		l.col = 0
		l.line += 1
	}

	l.pos = l.readPos
	l.readPos += 1
	l.col += 1
}

func (l *Lexer) makeToken(t token.TokenType, literal string) token.Token {
	return token.Token{Token: t, Literal: literal, Loc: token.Loc{Line: l.line, Col: l.col}}
}

func isDigit(ch byte) bool {
	return '0' <= ch && ch <= '9'
}

func isLetter(ch byte) bool {
	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z'
}

func isValidIdentChar(ch byte) bool {
	return ch == '_' || isLetter(ch)
}

func (l *Lexer) skipWhitespace() {
	for l.ch == '\r' || l.ch == '\b' || l.ch == '\t' || l.ch == ' ' {
		l.readChar()
	}
}

func (l *Lexer) readIdentifier() token.Token {
	loc := token.Loc{Line: l.line, Col: l.col}
	pos := l.pos

	l.readChar()

	for isDigit(l.ch) || isLetter(l.ch) {
		l.readChar()
	}

	t := token.LookupKeyword(l.input[pos:l.pos])

	return token.Token{Token: t, Loc: loc, Literal: l.input[pos:l.pos]}
}

func (l *Lexer) readNumber() token.Token {
	pos := l.pos
	loc := token.Loc{Line: l.line, Col: l.col}

	for isDigit(l.ch) {
		l.readChar()
	}

	return token.Token{Token: token.Integer, Loc: loc, Literal: l.input[pos:l.pos]}
}

func (l *Lexer) NextToken() token.Token {
	l.skipWhitespace()
	var tok token.Token
	tok.Loc = token.Loc{Line: l.line, Col: l.col}
	tok.Literal = string(l.ch)

	switch l.ch {
	case '\n':
		tok.Token = token.NewLine
	case ';':
		tok.Token = token.Semicolon
	case ':':
		tok.Token = token.Colon
	case '=':
		tok.Token = token.Equal
	case '{':
		tok.Token = token.LBrace
	case '}':
		tok.Token = token.RBrace
	case '(':
		tok.Token = token.LParen
	case ')':
		tok.Token = token.RParen

	case 0:
		return l.makeToken(token.Eof, "")

	default:
		if isValidIdentChar(l.ch) {
			return l.readIdentifier()
		} else if isDigit(l.ch) {
			return l.readNumber()
		}

		tok.Token = token.Illegal
	}

	l.readChar()
	return tok
}