From 81de12583102f67fb336e6d0e413e43100705dc7 Mon Sep 17 00:00:00 2001 From: Michael Ochmann Date: Tue, 23 Aug 2022 15:18:19 +0200 Subject: [PATCH] added filename to Token location --- src/Lexer.php | 63 +++++++++++++++++++++++++++++---------------------- src/Token.php | 8 ++++--- 2 files changed, 41 insertions(+), 30 deletions(-) diff --git a/src/Lexer.php b/src/Lexer.php index 9a386fd..a8a39cd 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -3,29 +3,34 @@ namespace parkdown; class Lexer { - private array $source; + private array $source; + private ?string $fileName; - public function __construct(string $sourceCode) { - $unifiedSource = str_replace(["\r\n", "\r"], "\n", $sourceCode); - $this->source = explode("\n", trim($unifiedSource, "\n")); + public function __construct(string $sourceCode, ?string $fileName = null) { + $this->fileName = $fileName; + $unifiedSource = str_replace(["\r\n", "\r"], "\n", $sourceCode); + $this->source = explode("\n", trim($unifiedSource, "\n")); } public function tokenize() : array { $tokens = []; + $row = 1; foreach ($this->source as $line) { if (strlen($line) < 1) { - array_push($tokens, new Token(TokenType::EOL, "\n")); + array_push($tokens, new Token(TokenType::EOL, "\n", [$row, 0, $this->fileName])); + $row++; continue; } $buffer = ""; $number = false; + $col = 1; - $clearBuffer = function() use (&$buffer, &$tokens) { + $clearBuffer = function() use (&$buffer, &$tokens, $col, $row) { if (strlen($buffer) < 1) return; - array_push($tokens, new Token(TokenType::TEXT, $buffer)); + array_push($tokens, new Token(TokenType::TEXT, $buffer, [$col, $row, $this->fileName])); $buffer = ""; }; @@ -34,66 +39,66 @@ class Lexer { $clearBuffer(); $number = true; } else if (!is_numeric($char) && $number) { - array_push($tokens, new Token(TokenType::NUMBER, $buffer)); + array_push($tokens, new Token(TokenType::NUMBER, $buffer, [$col, $row, $this->fileName])); $buffer = ""; $number = false; } switch($char) { case '#': $clearBuffer(); - array_push($tokens, new Token(TokenType::HASH, $char)); + array_push($tokens, new Token(TokenType::HASH, $char, [$col, $row, $this->fileName])); break; case '*': $clearBuffer(); - array_push($tokens, new Token(TokenType::ASTERISK, $char)); + array_push($tokens, new Token(TokenType::ASTERISK, $char, [$col, $row, $this->fileName])); break; case '.': $clearBuffer(); - array_push($tokens, new Token(TokenType::DOT, $char)); + array_push($tokens, new Token(TokenType::DOT, $char, [$col, $row, $this->fileName])); break; case '-': $clearBuffer(); - array_push($tokens, new Token(TokenType::MINUS, $char)); + array_push($tokens, new Token(TokenType::MINUS, $char, [$col, $row, $this->fileName])); break; case '`': $clearBuffer(); - array_push($tokens, new Token(TokenType::BACKTICK, $char)); + array_push($tokens, new Token(TokenType::BACKTICK, $char, [$col, $row, $this->fileName])); break; case '[': $clearBuffer(); - array_push($tokens, new Token(TokenType::LBRACKET, $char)); + array_push($tokens, new Token(TokenType::LBRACKET, $char, [$col, $row, $this->fileName])); break; case ']': $clearBuffer(); - array_push($tokens, new Token(TokenType::RBRACKET, $char)); + array_push($tokens, new Token(TokenType::RBRACKET, $char, [$col, $row, $this->fileName])); break; case '(': $clearBuffer(); - array_push($tokens, new Token(TokenType::LPAREN, $char)); + array_push($tokens, new Token(TokenType::LPAREN, $char, [$col, $row, $this->fileName])); break; case ')': $clearBuffer(); - array_push($tokens, new Token(TokenType::RPAREN, $char)); + array_push($tokens, new Token(TokenType::RPAREN, $char, [$col, $row, $this->fileName])); break; case '!': $clearBuffer(); - array_push($tokens, new Token(TokenType::BANG, $char)); + array_push($tokens, new Token(TokenType::BANG, $char, [$col, $row, $this->fileName])); break; case '|': $clearBuffer(); - array_push($tokens, new Token(TokenType::PIPE, $char)); + array_push($tokens, new Token(TokenType::PIPE, $char, [$col, $row, $this->fileName])); break; case '\\': $clearBuffer(); - array_push($tokens, new Token(TokenType::BACKSLASH, $char)); + array_push($tokens, new Token(TokenType::BACKSLASH, $char, [$col, $row, $this->fileName])); break; case '>': $clearBuffer(); - array_push($tokens, new Token(TokenType::GT, $char)); + array_push($tokens, new Token(TokenType::GT, $char, [$col, $row, $this->fileName])); break; case ' ': $clearBuffer(); - array_push($tokens, new Token(TokenType::TAB, $char)); + array_push($tokens, new Token(TokenType::TAB, $char, [$col, $row, $this->fileName])); break; case ':': if (str_ends_with($buffer, "http") || str_ends_with($buffer, "https")) { @@ -102,26 +107,30 @@ class Lexer { } $clearBuffer(); - array_push($tokens, new Token(TokenType::COLON, $char)); + array_push($tokens, new Token(TokenType::COLON, $char, [$col, $row, $this->fileName])); break; case '{': $clearBuffer(); - array_push($tokens, new Token(TokenType::LBRACE, $char)); + array_push($tokens, new Token(TokenType::LBRACE, $char, [$col, $row, $this->fileName])); break; case '}': $clearBuffer(); - array_push($tokens, new Token(TokenType::RBRACE, $char)); + array_push($tokens, new Token(TokenType::RBRACE, $char, [$col, $row, $this->fileName])); break; default: $buffer .= $char; break; } + + $col++; } $clearBuffer(); - array_push($tokens, new Token(TokenType::EOL, "\n")); + array_push($tokens, new Token(TokenType::EOL, "\n", [$col, $row, $this->fileName])); + + $row++; } $clearBuffer(); - array_push($tokens, new Token(TokenType::EOF, "\0")); + array_push($tokens, new Token(TokenType::EOF, "\0", [$col, $row, $this->fileName])); return $tokens; } diff --git a/src/Token.php b/src/Token.php index a0cf6dc..41b2a69 100644 --- a/src/Token.php +++ b/src/Token.php @@ -29,9 +29,11 @@ enum TokenType { class Token { public TokenType $type; public string $data; + public array $location; - public function __construct(TokenType $type, string $data = "") { - $this->type = $type; - $this->data = $data; + public function __construct(TokenType $type, string $data = "", array $location = []) { + $this->type = $type; + $this->data = $data; + $this->location = $location; } } \ No newline at end of file