You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
548 lines
16 KiB
548 lines
16 KiB
<?php declare(strict_types=1);
|
|
|
|
namespace parkdown;
|
|
|
|
use DOMDocument;
|
|
use DOMElement;
|
|
use DOMNode;
|
|
|
|
enum ListType {
|
|
case ORDERED;
|
|
case UNORDERED;
|
|
}
|
|
|
|
class Parser {
|
|
const MAGIC_CHAR = "*";
|
|
|
|
private array $tokenStream;
|
|
private int $pointer;
|
|
private DOMDocument $document;
|
|
private array $references;
|
|
|
|
public function __construct(array $tokenStream) {
|
|
$this->tokenStream = $tokenStream;
|
|
$this->pointer = 0;
|
|
$this->document = new DOMDocument();
|
|
$this->references = [];
|
|
}
|
|
|
|
private function current() : Token {
|
|
return $this->peek();
|
|
}
|
|
|
|
private function next() : Token {
|
|
return $this->peek(1);
|
|
}
|
|
|
|
private function last() : Token {
|
|
return $this->peek(-1);
|
|
}
|
|
|
|
private function peek(int $amount = 0) : Token {
|
|
$amount += $this->pointer;
|
|
if ($amount < 0 || $amount >= count($this->tokenStream))
|
|
return new Token(TokenType::EOF);
|
|
|
|
return $this->tokenStream[$amount];
|
|
}
|
|
|
|
private function consume() : Token {
|
|
$char = $this->current();
|
|
$this->pointer++;
|
|
|
|
return $char;
|
|
}
|
|
|
|
private static function StripBackslashes(string $text) : string {
|
|
return stripslashes($text);
|
|
}
|
|
|
|
private function resolveReferences(DOMElement $node) : void {
|
|
if (count($this->references) < 1)
|
|
return;
|
|
|
|
if ($node->hasAttribute("href")) {
|
|
$href = $node->getAttribute("href");
|
|
if (substr($href, 0, 1) === self::MAGIC_CHAR) {
|
|
$index = substr($href, 1, strlen($href) - 2);
|
|
if (array_key_exists($index, $this->references))
|
|
$node->setAttribute("href", $this->references[$index]);
|
|
}
|
|
}
|
|
if ($node->hasChildNodes()) {
|
|
foreach ($node->childNodes as $child) {
|
|
if ($child->nodeType === XML_ELEMENT_NODE)
|
|
$this->resolveReferences($child);
|
|
}
|
|
}
|
|
}
|
|
|
|
public function debug() : void {
|
|
echo "<pre>";
|
|
print_r($this->tokenStream);
|
|
echo "</pre>";
|
|
}
|
|
// PARSING
|
|
|
|
private function parseBold() : DOMNode {
|
|
$buffer = "";
|
|
while ($this->current()->type !== TokenType::ASTERISK && $this->current()->type !== TokenType::EOL) {
|
|
$buffer .= $this->consume()->data;
|
|
}
|
|
$this->consume();
|
|
$this->consume();
|
|
|
|
return $this->document->createElement("b", $buffer);
|
|
}
|
|
|
|
private function parseItalic() : DOMNode {
|
|
$buffer = "";
|
|
while ($this->current()->type !== TokenType::ASTERISK && $this->current()->type !== TokenType::EOL) {
|
|
$buffer .= $this->consume()->data;
|
|
}
|
|
$this->consume();
|
|
|
|
return $this->document->createElement("i", $buffer);
|
|
}
|
|
|
|
private function parseCode() : DOMNode {
|
|
$buffer = "";
|
|
$this->consume();
|
|
while ($this->current()->type !== TokenType::BACKTICK && $this->current()->type !== TokenType::EOL)
|
|
$buffer .= $this->consume()->data;
|
|
|
|
$this->consume();
|
|
|
|
return $this->document->createElement("code", $buffer);
|
|
}
|
|
|
|
private function parseLink() : ?DOMNode {
|
|
$text = "";
|
|
$consumption = 1;
|
|
|
|
$lbracket = $this->consume();
|
|
while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::EOL)) {
|
|
$text .= $this->consume()->data;
|
|
$consumption++;
|
|
}
|
|
$rbracket = $this->consume();
|
|
$consumption++;
|
|
|
|
if ($this->current()->type !== TokenType::LBRACKET && $this->current()->type !== TokenType::LPAREN) {
|
|
$this->pointer -= $consumption;
|
|
return null;
|
|
}
|
|
$lbracketOrParen = $this->consume();
|
|
|
|
$index = "";
|
|
while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::RPAREN || $this->current()->type === TokenType::EOL))
|
|
$index .= $this->consume()->data;
|
|
$rbracket = $this->consume();
|
|
|
|
$elm = $this->document->createElement("a", $text);
|
|
$href = $lbracketOrParen->type === TokenType::LPAREN ? $index :
|
|
(array_key_exists($index, $this->references) ? $this->references[$index] : self::MAGIC_CHAR.$index.self::MAGIC_CHAR);
|
|
$elm->setAttribute("href", $href);
|
|
|
|
return $elm;
|
|
}
|
|
|
|
private function parseText($paragraph = false) : array {
|
|
$elms = [];
|
|
$buffer = "";
|
|
|
|
$clearBuffer = function() use (&$elms, &$buffer) {
|
|
array_push($elms, $this->document->createTextNode($buffer));
|
|
$buffer = "";
|
|
};
|
|
|
|
while ($this->current()->type !== TokenType::EOF) {
|
|
if ((!$paragraph && $this->current()->type === TokenType::EOL) ||
|
|
($paragraph && ($this->current()->type === TokenType::EOL && $this->next()->type === TokenType::EOL)) || $this->current()->type === TokenType::EOF)
|
|
break;
|
|
if ($this->current()->type === TokenType::BACKSLASH && in_array($this->next()->type, [
|
|
TokenType::BACKTICK,
|
|
TokenType::ASTERISK,
|
|
TokenType::LBRACKET,
|
|
TokenType::BANG
|
|
])) {
|
|
$this->consume()->data; // backslash
|
|
$buffer .= $this->consume()->data;
|
|
continue;
|
|
} elseif ($this->current()->type === TokenType::ASTERISK) {
|
|
$clearBuffer();
|
|
if ($this->next()->type === TokenType::ASTERISK) {
|
|
$this->consume();
|
|
$this->consume();
|
|
array_push($elms, $this->parseBold());
|
|
} else {
|
|
$this->consume();
|
|
array_push($elms, $this->parseItalic());
|
|
}
|
|
continue;
|
|
} elseif ($this->current()->type === TokenType::BACKTICK) {
|
|
$clearBuffer();
|
|
array_push($elms, $this->parseCode());
|
|
continue;
|
|
} elseif ($this->current()->type === TokenType::LBRACKET) {
|
|
$links = $this->parseLink();
|
|
if ($links !== null) {
|
|
$clearBuffer();
|
|
array_push($elms, $links);
|
|
} else {
|
|
$buffer .= self::StripBackslashes($this->consume()->data);
|
|
continue;
|
|
}
|
|
continue;
|
|
} elseif ($this->current()->type === TokenType::BANG) {
|
|
$bang = $this->consume();
|
|
if ($this->current()->type !== TokenType::LBRACKET) {
|
|
$buffer .= self::StripBackslashes($this->consume()->data);
|
|
continue;
|
|
}
|
|
$lbracket = $this->consume();
|
|
$alt = "";
|
|
while ($this->current()->type !== TokenType::RBRACKET && $this->current()->type !== TokenType::EOL)
|
|
$alt .= self::StripBackslashes($this->consume()->data);
|
|
|
|
if ($this->current()->type !== TokenType::RBRACKET || $this->next()->type !== TokenType::LPAREN) {
|
|
$buffer .= "!";
|
|
$this->pointer -= strlen($alt) + 1;
|
|
continue;
|
|
}
|
|
$rbracket = $this->consume();
|
|
$lparen = $this->consume();
|
|
$src = "";
|
|
while ($this->current()->type !== TokenType::RPAREN && $this->current()->type !== TokenType::EOL)
|
|
$src .= $this->consume()->data;
|
|
if ($this->current()->type !== TokenType::RPAREN) {
|
|
$buffer .= "](";
|
|
$this->pointer -= strlen($alt) + 1;
|
|
continue;
|
|
}
|
|
$rparen = $this->consume();
|
|
$elm = $this->document->createElement("img");
|
|
if (strlen($alt) > 0)
|
|
$elm->setAttribute("alt", $alt);
|
|
$elm->setAttribute("src", $src);
|
|
$clearBuffer();
|
|
array_push($elms, $elm);
|
|
continue;
|
|
} else
|
|
$buffer .= self::StripBackslashes($this->consume()->data);
|
|
}
|
|
if (strlen($buffer) > 0)
|
|
array_push($elms, $this->document->createTextNode($buffer));
|
|
|
|
return $elms;
|
|
}
|
|
|
|
private function parseList(ListType $type = ListType::UNORDERED, int $level = 0) : DOMNode {
|
|
$list = $this->document->createElement($type === ListType::UNORDERED ? "ul" : "ol");
|
|
|
|
while ($this->current()->type !== TokenType::EOF &&
|
|
($this->current()->type !== TokenType::EOL && $this->next()->type !== TokenType::EOL)) {
|
|
|
|
// if we encounter a single linebreak, we are done with the current item
|
|
if ($this->current()->type === TokenType::EOL) {
|
|
$this->consume();
|
|
}
|
|
|
|
// first we remove leading tabs
|
|
while ($this->current()->type === TokenType::TAB)
|
|
$this->consume();
|
|
|
|
if ($this->current()->type === TokenType::EOF)
|
|
break;
|
|
// then we except an asterisk or a number followed by a period
|
|
if ($type === ListType::UNORDERED) {
|
|
$asterisk = $this->consume();
|
|
assert($asterisk->type === TokenType::ASTERISK, "expected asterisk, got ".$asterisk->type->name);
|
|
} else {
|
|
$number = $this->consume();
|
|
assert($number->type === TokenType::NUMBER, "expected number, got ".$number->type->name);
|
|
$period = $this->consume();
|
|
assert($period->type === TokenType::DOT, "expected period, got ".$period->type->name);
|
|
}
|
|
|
|
// then we parse the node content
|
|
$elm = $this->document->createElement("li");
|
|
foreach ($this->parseText() as $node)
|
|
$elm->appendChild($node);
|
|
|
|
// now we check, if the level of the next line is higher than the current level.
|
|
// if so, we want to append a sub list to the current item
|
|
|
|
// here should be a EOL
|
|
assert($this->current()->type === TokenType::EOL, "expected EOL, got ".$this->current()->type->name);
|
|
$this->consume();
|
|
|
|
$nextLevel = 0;
|
|
while ($this->current()->type === TokenType::TAB) {
|
|
$this->consume();
|
|
$nextLevel++;
|
|
}
|
|
// reset pointer, as we did not really want to consume the tokens, but did for
|
|
// convenience
|
|
$this->pointer -= $nextLevel;
|
|
|
|
if ($nextLevel > $level)
|
|
$elm->appendChild($this->parseList($type, $nextLevel));
|
|
|
|
// then we append the list item to the list
|
|
$list->appendChild($elm);
|
|
|
|
// if next level is lower than current, we are done with the current sub list
|
|
if ($nextLevel < $level)
|
|
break;
|
|
}
|
|
|
|
return $list;
|
|
}
|
|
|
|
private function buildParagraph(array $elms) : void {
|
|
if (count($elms) < 1)
|
|
return;
|
|
|
|
$elm = $this->document->createElement("p");
|
|
$i = 0;
|
|
foreach ($elms as $node) {
|
|
if ($node->nodeName === "#text" && trim($node->textContent) === "")
|
|
continue;
|
|
$elm->appendChild($node);
|
|
$i++;
|
|
}
|
|
if ($i < 1)
|
|
return;
|
|
$this->document->appendChild($elm);
|
|
}
|
|
|
|
private function parseHeading() : void {
|
|
$level = 0;
|
|
|
|
while ($this->current()->type === TokenType::HASH) {
|
|
$level++;
|
|
$this->consume();
|
|
}
|
|
$elm = $this->document->createElement("h".$level);
|
|
foreach ($this->parseText() as $node)
|
|
$elm->appendChild($node);
|
|
$this->document->appendChild($elm);
|
|
}
|
|
|
|
private function parseCodeBlock() : void {
|
|
if (!($this->next()->type === TokenType::BACKTICK && $this->peek(2)->type === TokenType::BACKTICK)) {
|
|
$this->buildParagraph($this->parseText());
|
|
|
|
return;
|
|
}
|
|
$this->consume();
|
|
$this->consume();
|
|
$this->consume(); // ```
|
|
|
|
$lang = $this->parseText();
|
|
$lang = count($lang) > 0 ? trim($lang[0]->data) : null;
|
|
|
|
$container = $this->document->createElement("pre");
|
|
if ($lang)
|
|
$container->setAttribute("data-lang", $lang);
|
|
|
|
$buffer = "";
|
|
while (!($this->current()->type === TokenType::BACKTICK &&
|
|
$this->next()->type === TokenType::BACKTICK &&
|
|
$this->peek(2)->type === TokenType::BACKTICK) && $this->current()->type !== TokenType::EOF) {
|
|
$buffer .= self::StripBackslashes($this->consume()->data);
|
|
}
|
|
if ($this->current()->type !== TokenType::EOF) {
|
|
$this->consume();
|
|
$this->consume();
|
|
$this->consume();
|
|
}
|
|
|
|
$elm = $this->document->createElement("code", htmlspecialchars($buffer));
|
|
$container->appendChild($elm);
|
|
$this->document->appendChild($container);
|
|
$this->consume();
|
|
}
|
|
|
|
private function parseReference() : void {
|
|
if (($this->next()->type !== TokenType::NUMBER && $this->next()->type !== TokenType::TEXT) ||
|
|
$this->peek(2)->type !== TokenType::RBRACKET ||
|
|
$this->peek(3)->type !== TokenType::COLON) {
|
|
$this->buildParagraph($this->parseText());
|
|
return;
|
|
}
|
|
$lbracket = $this->consume();
|
|
$index = $this->consume()->data;
|
|
$rbracket = $this->consume();
|
|
$colon = $this->consume();
|
|
|
|
$buffer = "";
|
|
while ($this->current()->type !== TokenType::EOL && $this->current()->type !== TokenType::EOF) {
|
|
$buffer .= $this->consume()->data;
|
|
}
|
|
$this->consume();
|
|
|
|
$this->references[$index] = trim($buffer);
|
|
}
|
|
|
|
private function parseTableHead(string $nodeName = "th", ?array $props = null) : DOMNode {
|
|
$elm = $this->document->createElement("tr");
|
|
$i = 0;
|
|
while ($this->current()->type !== TokenType::EOL && $this->current()->type !== TokenType::EOF) {
|
|
$pipe = $this->consume();
|
|
$buffer = "";
|
|
while ($this->current()->type !== TokenType::PIPE && $this->current()->type !== TokenType::EOL)
|
|
$buffer .= $this->consume()->data;
|
|
|
|
if ($buffer === "")
|
|
continue;
|
|
$col = $this->document->createElement($nodeName, $buffer);
|
|
if ($props)
|
|
$col->setAttribute("style", "text-align: ".$props[$i]);
|
|
$elm->appendChild($col);
|
|
$i++;
|
|
}
|
|
|
|
return $elm;
|
|
}
|
|
|
|
private function parseTableRow(array $props) : DOMNode {
|
|
return $this->parseTableHead("td", $props);
|
|
}
|
|
|
|
private function parseTableAlignment() : array {
|
|
$props = [];
|
|
|
|
$this->consume(); // EOL
|
|
|
|
while ($this->current()->type !== TokenType::EOL && $this->current()->type !== TokenType::EOF) {
|
|
$pipe = $this->consume();
|
|
$buffer = "";
|
|
while ($this->current()->type !== TokenType::PIPE && $this->current()->type !== TokenType::EOL)
|
|
$buffer .= $this->consume()->data;
|
|
|
|
if ($buffer === "")
|
|
continue;
|
|
|
|
$firstChar = substr($buffer, 0, 1);
|
|
$lastChar = substr($buffer, strlen($buffer) - 1, 1);
|
|
if ($firstChar === ':' && $lastChar !== ':')
|
|
array_push($props, "left");
|
|
elseif ($lastChar === ':' && $firstChar !== ':')
|
|
array_push($props, "right");
|
|
elseif ($firstChar === ':' && $lastChar === ':')
|
|
array_push($props, "center");
|
|
else
|
|
array_push($props, "left");
|
|
}
|
|
|
|
return $props;
|
|
}
|
|
|
|
private function parseTable() : void {
|
|
$elm = $this->document->createElement("table");
|
|
$head = $this->parseTableHead();
|
|
$props = $this->parseTableAlignment();
|
|
|
|
$i = 0;
|
|
foreach($head->childNodes as $col) {
|
|
$col->setAttribute("style", "text-align: ".$props[$i]);
|
|
$i++;
|
|
}
|
|
|
|
$elm->appendChild($head);
|
|
|
|
while ($this->current()->type === TokenType::EOL && $this->next()->type === TokenType::PIPE) {
|
|
$this->consume(); // EOL
|
|
$elm->appendChild($this->parseTableRow($props));
|
|
}
|
|
|
|
$this->document->appendChild($elm);
|
|
}
|
|
|
|
private function parseBlockQuote() : void {
|
|
if (!str_starts_with($this->next()->data, " ")) {
|
|
$this->buildParagraph($this->parseText());
|
|
return;
|
|
}
|
|
$buffer = "";
|
|
$elm = $this->document->createElement("blockquote", $buffer);
|
|
while (!($this->current()->type === TokenType::EOF) && !($this->current()->type === TokenType::EOL && $this->next()->type !== TokenType::GT)) {
|
|
$gt = $this->consume();
|
|
if ($this->current()->type === TokenType::EOL) {
|
|
$this->consume();
|
|
$line = $this->document->createTextNode($buffer);
|
|
$br = $this->document->createElement("br");
|
|
$buffer = "";
|
|
$elm->appendChild($line);
|
|
$elm->appendChild($br);
|
|
continue;
|
|
}
|
|
$buffer .= $this->current()->data;
|
|
}
|
|
|
|
$this->document->appendChild($elm);
|
|
}
|
|
|
|
private function parseHorizontalRule() : void {
|
|
if (!($this->next()->type === TokenType::MINUS &&
|
|
$this->peek(2)->type === TokenType::MINUS &&
|
|
$this->peek(3)->type === TokenType::EOL)) {
|
|
$this->buildParagraph($this->parseText());
|
|
return;
|
|
}
|
|
$this->consume(); // -
|
|
$this->consume(); // -
|
|
$this->consume(); // -
|
|
$this->consume(); // EOL
|
|
|
|
$elm = $this->document->createElement("hr");
|
|
$this->document->appendChild($elm);
|
|
}
|
|
|
|
public function parse() : DOMDocument {
|
|
while ($this->current()->type !== TokenType::EOF) {
|
|
switch($this->current()->type) {
|
|
case TokenType::ASTERISK:
|
|
$list = $this->parseList();
|
|
$this->document->appendChild($list);
|
|
break;
|
|
case TokenType::HASH:
|
|
$this->parseHeading();
|
|
break;
|
|
case TokenType::NUMBER:
|
|
$list = $this->parseList(ListType::ORDERED);
|
|
$this->document->appendChild($list);
|
|
break;
|
|
case TokenType::BACKTICK:
|
|
$this->parseCodeBlock();
|
|
break;
|
|
case TokenType::EOL:
|
|
$this->consume();
|
|
break;
|
|
case TokenType::LBRACKET:
|
|
$this->parseReference();
|
|
break;
|
|
case TokenType::PIPE:
|
|
$this->parseTable();
|
|
break;
|
|
case TokenType::GT:
|
|
$this->parseBlockQuote();
|
|
break;
|
|
case TokenType::MINUS:
|
|
$this->parseHorizontalRule();
|
|
break;
|
|
case TokenType::TEXT:
|
|
default:
|
|
$this->buildParagraph($this->parseText(true));
|
|
break;
|
|
}
|
|
|
|
}
|
|
foreach($this->document->childNodes as $node)
|
|
$this->resolveReferences($node);
|
|
|
|
return $this->document;
|
|
}
|
|
} |