a recusive descent markdown parser in PHP
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

120 lines
3.0 KiB

<?php declare(strict_types=1);
namespace parkdown;
class Lexer {
private array $source;
public function __construct(string $sourceCode) {
$unifiedSource = str_replace(["\r\n", "\r"], "\n", $sourceCode);
$this->source = explode("\n", trim($unifiedSource, "\n"));
}
public function tokenize() : array {
$tokens = [];
foreach ($this->source as $line) {
if (strlen($line) < 1) {
array_push($tokens, new Token(TokenType::EOL, "\n"));
continue;
}
$buffer = "";
$number = false;
$clearBuffer = function() use (&$buffer, &$tokens) {
if (strlen($buffer) < 1)
return;
array_push($tokens, new Token(TokenType::TEXT, $buffer));
$buffer = "";
};
foreach(str_split($line) as $char) {
if (is_numeric($char) && !$number) {
$clearBuffer();
$number = true;
} else if (!is_numeric($char) && $number) {
array_push($tokens, new Token(TokenType::NUMBER, $buffer));
$buffer = "";
$number = false;
}
switch($char) {
case '#':
$clearBuffer();
array_push($tokens, new Token(TokenType::HASH, $char));
break;
case '*':
$clearBuffer();
array_push($tokens, new Token(TokenType::ASTERISK, $char));
break;
case '.':
$clearBuffer();
array_push($tokens, new Token(TokenType::DOT, $char));
break;
case '-':
$clearBuffer();
array_push($tokens, new Token(TokenType::MINUS, $char));
break;
case '`':
$clearBuffer();
array_push($tokens, new Token(TokenType::BACKTICK, $char));
break;
case '[':
$clearBuffer();
array_push($tokens, new Token(TokenType::LBRACKET, $char));
break;
case ']':
$clearBuffer();
array_push($tokens, new Token(TokenType::RBRACKET, $char));
break;
case '(':
$clearBuffer();
array_push($tokens, new Token(TokenType::LPAREN, $char));
break;
case ')':
$clearBuffer();
array_push($tokens, new Token(TokenType::RPAREN, $char));
break;
case '!':
$clearBuffer();
array_push($tokens, new Token(TokenType::BANG, $char));
break;
case '|':
$clearBuffer();
array_push($tokens, new Token(TokenType::PIPE, $char));
break;
case '\\':
$clearBuffer();
array_push($tokens, new Token(TokenType::BACKSLASH, $char));
break;
case '>':
$clearBuffer();
array_push($tokens, new Token(TokenType::GT, $char));
break;
case ' ':
$clearBuffer();
array_push($tokens, new Token(TokenType::TAB, $char));
break;
case ':':
if (str_ends_with($buffer, "http") || str_ends_with($buffer, "https")) {
$buffer .= $char;
continue 2;
}
$clearBuffer();
array_push($tokens, new Token(TokenType::COLON, $char));
break;
default:
$buffer .= $char;
break;
}
}
$clearBuffer();
array_push($tokens, new Token(TokenType::EOL, "\n"));
}
$clearBuffer();
array_push($tokens, new Token(TokenType::EOF, "\0"));
return $tokens;
}
}