You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
120 lines
3.0 KiB
120 lines
3.0 KiB
<?php declare(strict_types=1);
|
|
|
|
namespace parkdown;
|
|
|
|
class Lexer {
|
|
private array $source;
|
|
|
|
public function __construct(string $sourceCode) {
|
|
$unifiedSource = str_replace(["\r\n", "\r"], "\n", $sourceCode);
|
|
$this->source = explode("\n", trim($unifiedSource, "\n"));
|
|
}
|
|
|
|
public function tokenize() : array {
|
|
$tokens = [];
|
|
|
|
foreach ($this->source as $line) {
|
|
if (strlen($line) < 1) {
|
|
array_push($tokens, new Token(TokenType::EOL, "\n"));
|
|
continue;
|
|
}
|
|
|
|
$buffer = "";
|
|
$number = false;
|
|
|
|
$clearBuffer = function() use (&$buffer, &$tokens) {
|
|
if (strlen($buffer) < 1)
|
|
return;
|
|
array_push($tokens, new Token(TokenType::TEXT, $buffer));
|
|
$buffer = "";
|
|
};
|
|
|
|
foreach(str_split($line) as $char) {
|
|
if (is_numeric($char) && !$number) {
|
|
$clearBuffer();
|
|
$number = true;
|
|
} else if (!is_numeric($char) && $number) {
|
|
array_push($tokens, new Token(TokenType::NUMBER, $buffer));
|
|
$buffer = "";
|
|
$number = false;
|
|
}
|
|
switch($char) {
|
|
case '#':
|
|
$clearBuffer();
|
|
array_push($tokens, new Token(TokenType::HASH, $char));
|
|
break;
|
|
case '*':
|
|
$clearBuffer();
|
|
array_push($tokens, new Token(TokenType::ASTERISK, $char));
|
|
break;
|
|
case '.':
|
|
$clearBuffer();
|
|
array_push($tokens, new Token(TokenType::DOT, $char));
|
|
break;
|
|
case '-':
|
|
$clearBuffer();
|
|
array_push($tokens, new Token(TokenType::MINUS, $char));
|
|
break;
|
|
case '`':
|
|
$clearBuffer();
|
|
array_push($tokens, new Token(TokenType::BACKTICK, $char));
|
|
break;
|
|
case '[':
|
|
$clearBuffer();
|
|
array_push($tokens, new Token(TokenType::LBRACKET, $char));
|
|
break;
|
|
case ']':
|
|
$clearBuffer();
|
|
array_push($tokens, new Token(TokenType::RBRACKET, $char));
|
|
break;
|
|
case '(':
|
|
$clearBuffer();
|
|
array_push($tokens, new Token(TokenType::LPAREN, $char));
|
|
break;
|
|
case ')':
|
|
$clearBuffer();
|
|
array_push($tokens, new Token(TokenType::RPAREN, $char));
|
|
break;
|
|
case '!':
|
|
$clearBuffer();
|
|
array_push($tokens, new Token(TokenType::BANG, $char));
|
|
break;
|
|
case '|':
|
|
$clearBuffer();
|
|
array_push($tokens, new Token(TokenType::PIPE, $char));
|
|
break;
|
|
case '\\':
|
|
$clearBuffer();
|
|
array_push($tokens, new Token(TokenType::BACKSLASH, $char));
|
|
break;
|
|
case '>':
|
|
$clearBuffer();
|
|
array_push($tokens, new Token(TokenType::GT, $char));
|
|
break;
|
|
case ' ':
|
|
$clearBuffer();
|
|
array_push($tokens, new Token(TokenType::TAB, $char));
|
|
break;
|
|
case ':':
|
|
if (str_ends_with($buffer, "http") || str_ends_with($buffer, "https")) {
|
|
$buffer .= $char;
|
|
continue 2;
|
|
}
|
|
|
|
$clearBuffer();
|
|
array_push($tokens, new Token(TokenType::COLON, $char));
|
|
break;
|
|
default:
|
|
$buffer .= $char;
|
|
break;
|
|
}
|
|
}
|
|
$clearBuffer();
|
|
array_push($tokens, new Token(TokenType::EOL, "\n"));
|
|
}
|
|
$clearBuffer();
|
|
array_push($tokens, new Token(TokenType::EOF, "\0"));
|
|
|
|
return $tokens;
|
|
}
|
|
} |