tokenStream = $tokenStream; $this->pointer = 0; $this->document = new DOMDocument(); $this->references = []; } private function current() : Token { return $this->peek(); } private function next() : Token { return $this->peek(1); } private function last() : Token { return $this->peek(-1); } private function peek(int $amount = 0) : Token { $amount += $this->pointer; if ($amount < 0 || $amount >= count($this->tokenStream)) return new Token(TokenType::EOF); return $this->tokenStream[$amount]; } private function consume() : Token { $char = $this->current(); $this->pointer++; return $char; } private static function StripBackslashes(string $text) : string { return stripslashes($text); } private function resolveReferences(DOMElement $node) : void { if (count($this->references) < 1) return; if ($node->hasAttribute("href")) { $href = $node->getAttribute("href"); if (substr($href, 0, 1) === self::MAGIC_CHAR) { $index = substr($href, 1, strlen($href) - 2); if (array_key_exists($index, $this->references)) $node->setAttribute("href", $this->references[$index]); } } if ($node->hasChildNodes()) { foreach ($node->childNodes as $child) { if ($child->nodeType === XML_ELEMENT_NODE) $this->resolveReferences($child); } } } public function debug() : void { echo "
"; print_r($this->tokenStream); echo ""; } // PARSING private function parseBold() : DOMNode { $buffer = ""; while ($this->current()->type !== TokenType::ASTERISK && $this->current()->type !== TokenType::EOL) { $buffer .= $this->consume()->data; } $this->consume(); $this->consume(); return $this->document->createElement("b", $buffer); } private function parseItalic() : DOMNode { $buffer = ""; while ($this->current()->type !== TokenType::ASTERISK && $this->current()->type !== TokenType::EOL) { $buffer .= $this->consume()->data; } $this->consume(); return $this->document->createElement("i", $buffer); } private function parseCode() : DOMNode { $buffer = ""; $this->consume(); while ($this->current()->type !== TokenType::BACKTICK && $this->current()->type !== TokenType::EOL) $buffer .= $this->consume()->data; $this->consume(); return $this->document->createElement("code", $buffer); } private function parseLink() : ?DOMNode { $text = ""; $consumption = 1; $lbracket = $this->consume(); while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::EOL)) { $text .= $this->consume()->data; $consumption++; } $rbracket = $this->consume(); $consumption++; if ($this->current()->type !== TokenType::LBRACKET && $this->current()->type !== TokenType::LPAREN) { $this->pointer -= $consumption; return null; } $lbracketOrParen = $this->consume(); $index = ""; while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::RPAREN || $this->current()->type === TokenType::EOL)) $index .= $this->consume()->data; $rbracket = $this->consume(); $elm = $this->document->createElement("a", $text); $href = $lbracketOrParen->type === TokenType::LPAREN ? $index : (array_key_exists($index, $this->references) ? $this->references[$index] : self::MAGIC_CHAR.$index.self::MAGIC_CHAR); $elm->setAttribute("href", $href); return $elm; } private function parseText($paragraph = false) : array { $elms = []; $buffer = ""; $clearBuffer = function() use (&$elms, &$buffer) { array_push($elms, $this->document->createTextNode($buffer)); $buffer = ""; }; while ($this->current()->type !== TokenType::EOF) { if ((!$paragraph && $this->current()->type === TokenType::EOL) || ($paragraph && ($this->current()->type === TokenType::EOL && $this->next()->type === TokenType::EOL)) || $this->current()->type === TokenType::EOF) break; if ($this->current()->type === TokenType::BACKSLASH && in_array($this->next()->type, [ TokenType::BACKTICK, TokenType::ASTERISK, TokenType::LBRACKET, TokenType::BANG, TokenType::LBRACE ])) { $this->consume()->data; // backslash $buffer .= $this->consume()->data; continue; } elseif ($this->current()->type === TokenType::ASTERISK) { $clearBuffer(); if ($this->next()->type === TokenType::ASTERISK) { $this->consume(); $this->consume(); array_push($elms, $this->parseBold()); } else { $this->consume(); array_push($elms, $this->parseItalic()); } continue; } elseif ($this->current()->type === TokenType::BACKTICK) { $clearBuffer(); array_push($elms, $this->parseCode()); continue; } elseif ($this->current()->type === TokenType::LBRACKET) { $links = $this->parseLink(); if ($links !== null) { $clearBuffer(); array_push($elms, $links); } else { $buffer .= self::StripBackslashes($this->consume()->data); continue; } continue; } elseif ($this->current()->type === TokenType::BANG) { $bang = $this->consume(); if ($this->current()->type !== TokenType::LBRACKET) { $buffer .= self::StripBackslashes($this->consume()->data); continue; } $lbracket = $this->consume(); $alt = ""; while ($this->current()->type !== TokenType::RBRACKET && $this->current()->type !== TokenType::EOL) $alt .= self::StripBackslashes($this->consume()->data); if ($this->current()->type !== TokenType::RBRACKET || $this->next()->type !== TokenType::LPAREN) { $buffer .= "!"; $this->pointer -= strlen($alt) + 1; continue; } $rbracket = $this->consume(); $lparen = $this->consume(); $src = ""; while ($this->current()->type !== TokenType::RPAREN && $this->current()->type !== TokenType::EOL) $src .= $this->consume()->data; if ($this->current()->type !== TokenType::RPAREN) { $buffer .= "]("; $this->pointer -= strlen($alt) + 1; continue; } $rparen = $this->consume(); $elm = $this->document->createElement("img"); if (strlen($alt) > 0) $elm->setAttribute("alt", $alt); $elm->setAttribute("src", $src); $clearBuffer(); array_push($elms, $elm); continue; } elseif ($this->current()->type === TokenType::LBRACE) { $lbrace = $this->consume(); assert($lbrace->type === TokenType::LBRACE, "expected left brace, got ".$lbrace->type->name); $content = ""; while ($this->current()->type !== TokenType::EOF && $this->current()->type !== TokenType::EOL && $this->current()->type !== TokenType::RBRACE) { $content .= $this->consume()->data; } $rbrace = $this->consume(); assert($rbrace->type === TokenType::RBRACE, "expected right brace, got ".$rbrace->type->name); $attributes = array_map(function($element) { return trim($element); }, explode(',', $content)); $obj = new Attributes(); foreach($attributes as $attribute) { if (!in_array($attribute[0], [".", "#"])) continue; switch ($attribute[0]) { case ".": array_push($obj->classes, substr($attribute, 1)); break; case "#": $obj->id = substr($attribute, 1); break; default: continue 2; } } array_push($elms, $obj); } else $buffer .= self::StripBackslashes($this->consume()->data); } if (strlen($buffer) > 0) array_push($elms, $this->document->createTextNode($buffer)); return $elms; } private function parseList(ListType $type = ListType::UNORDERED, int $level = 0) : DOMNode { $list = $this->document->createElement($type === ListType::UNORDERED ? "ul" : "ol"); while ($this->current()->type !== TokenType::EOF && ($this->current()->type !== TokenType::EOL && $this->next()->type !== TokenType::EOL)) { // if we encounter a single linebreak, we are done with the current item if ($this->current()->type === TokenType::EOL) { $this->consume(); } // first we remove leading tabs while ($this->current()->type === TokenType::TAB) $this->consume(); if ($this->current()->type === TokenType::EOF) break; // then we except an asterisk or a number followed by a period if ($type === ListType::UNORDERED) { $asterisk = $this->consume(); assert($asterisk->type === TokenType::ASTERISK, "expected asterisk, got ".$asterisk->type->name); } else { $number = $this->consume(); assert($number->type === TokenType::NUMBER, "expected number, got ".$number->type->name); $period = $this->consume(); assert($period->type === TokenType::DOT, "expected period, got ".$period->type->name); } // then we parse the node content $elm = $this->document->createElement("li"); foreach ($this->parseText() as $node) if ($node instanceof DOMNode) $elm->appendChild($node); // now we check, if the level of the next line is higher than the current level. // if so, we want to append a sub list to the current item // here should be a EOL assert($this->current()->type === TokenType::EOL, "expected EOL, got ".$this->current()->type->name); $this->consume(); $nextLevel = 0; while ($this->current()->type === TokenType::TAB) { $this->consume(); $nextLevel++; } // reset pointer, as we did not really want to consume the tokens, but did for // convenience $this->pointer -= $nextLevel; if ($nextLevel > $level) $elm->appendChild($this->parseList($type, $nextLevel)); // then we append the list item to the list $list->appendChild($elm); // if next level is lower than current, we are done with the current sub list if ($nextLevel < $level) break; } return $list; } private function buildParagraph(array $elms) : void { if (count($elms) < 1) return; $elm = $this->document->createElement("p"); $i = 0; foreach ($elms as $node) { if ($node instanceof Attributes) { if (count($node->classes) > 0) $elm->setAttribute("class", join(" ", $node->classes)); if ($node->id) $elm->setAttribute("id", $node->id); continue; } if ($node->nodeName === "#text" && trim($node->textContent) === "") continue; $elm->appendChild($node); $i++; } if ($i < 1) return; $this->document->appendChild($elm); } private function parseHeading() : void { $level = 0; while ($this->current()->type === TokenType::HASH) { $level++; $this->consume(); } $elm = $this->document->createElement("h".$level); foreach ($this->parseText() as $node) if ($node instanceof DOMNode) $elm->appendChild($node); $this->document->appendChild($elm); } private function parseCodeBlock() : void { if (!($this->next()->type === TokenType::BACKTICK && $this->peek(2)->type === TokenType::BACKTICK)) { $this->buildParagraph($this->parseText()); return; } $this->consume(); $this->consume(); $this->consume(); // ``` $lang = $this->parseText(); $lang = count($lang) > 0 ? trim($lang[0]->data) : null; $container = $this->document->createElement("pre"); if ($lang) { $container->setAttribute("data-lang", $lang); $container->setAttribute("class", "language-$lang"); } $buffer = ""; while (!($this->current()->type === TokenType::BACKTICK && $this->next()->type === TokenType::BACKTICK && $this->peek(2)->type === TokenType::BACKTICK) && $this->current()->type !== TokenType::EOF) { $buffer .= self::StripBackslashes($this->consume()->data); } if ($this->current()->type !== TokenType::EOF) { $this->consume(); $this->consume(); $this->consume(); } $elm = $this->document->createElement("code", htmlspecialchars($buffer)); if ($lang) { $elm->setAttribute("data-lang", $lang); $elm->setAttribute("class", "language-$lang"); } $container->appendChild($elm); $this->document->appendChild($container); $this->consume(); } private function parseReference() : void { if (($this->next()->type !== TokenType::NUMBER && $this->next()->type !== TokenType::TEXT) || $this->peek(2)->type !== TokenType::RBRACKET || $this->peek(3)->type !== TokenType::COLON) { $this->buildParagraph($this->parseText()); return; } $lbracket = $this->consume(); $index = $this->consume()->data; $rbracket = $this->consume(); $colon = $this->consume(); $buffer = ""; while ($this->current()->type !== TokenType::EOL && $this->current()->type !== TokenType::EOF) { $buffer .= $this->consume()->data; } $this->consume(); $this->references[$index] = trim($buffer); } private function parseTableHead(string $nodeName = "th", ?array $props = null) : DOMNode { $elm = $this->document->createElement("tr"); $i = 0; while ($this->current()->type !== TokenType::EOL && $this->current()->type !== TokenType::EOF) { $pipe = $this->consume(); $buffer = ""; while ($this->current()->type !== TokenType::PIPE && $this->current()->type !== TokenType::EOL) $buffer .= $this->consume()->data; if ($buffer === "") continue; $col = $this->document->createElement($nodeName, $buffer); if ($props) $col->setAttribute("style", "text-align: ".$props[$i]); $elm->appendChild($col); $i++; } return $elm; } private function parseTableRow(array $props) : DOMNode { return $this->parseTableHead("td", $props); } private function parseTableAlignment() : array { $props = []; $this->consume(); // EOL while ($this->current()->type !== TokenType::EOL && $this->current()->type !== TokenType::EOF) { $pipe = $this->consume(); $buffer = ""; while ($this->current()->type !== TokenType::PIPE && $this->current()->type !== TokenType::EOL) $buffer .= $this->consume()->data; if ($buffer === "") continue; $firstChar = substr($buffer, 0, 1); $lastChar = substr($buffer, strlen($buffer) - 1, 1); if ($firstChar === ':' && $lastChar !== ':') array_push($props, "left"); elseif ($lastChar === ':' && $firstChar !== ':') array_push($props, "right"); elseif ($firstChar === ':' && $lastChar === ':') array_push($props, "center"); else array_push($props, "left"); } return $props; } private function parseTable() : void { $elm = $this->document->createElement("table"); $head = $this->parseTableHead(); $props = $this->parseTableAlignment(); $i = 0; foreach($head->childNodes as $col) { $col->setAttribute("style", "text-align: ".$props[$i]); $i++; } $elm->appendChild($head); while ($this->current()->type === TokenType::EOL && $this->next()->type === TokenType::PIPE) { $this->consume(); // EOL $elm->appendChild($this->parseTableRow($props)); } $this->document->appendChild($elm); } private function parseBlockQuote() : void { if (!str_starts_with($this->next()->data, " ")) { $this->buildParagraph($this->parseText()); return; } $buffer = ""; $elm = $this->document->createElement("blockquote", $buffer); while (!($this->current()->type === TokenType::EOF) && !($this->current()->type === TokenType::EOL && $this->next()->type !== TokenType::GT)) { $gt = $this->consume(); if ($this->current()->type === TokenType::EOL) { $this->consume(); $line = $this->document->createTextNode($buffer); $br = $this->document->createElement("br"); $buffer = ""; $elm->appendChild($line); $elm->appendChild($br); continue; } $buffer .= $this->current()->data; } $this->document->appendChild($elm); } private function parseHorizontalRule() : void { if (!($this->next()->type === TokenType::MINUS && $this->peek(2)->type === TokenType::MINUS && $this->peek(3)->type === TokenType::EOL)) { $this->buildParagraph($this->parseText()); return; } $this->consume(); // - $this->consume(); // - $this->consume(); // - $this->consume(); // EOL $elm = $this->document->createElement("hr"); $this->document->appendChild($elm); } public function parse() : DOMDocument { while ($this->current()->type !== TokenType::EOF) { switch($this->current()->type) { case TokenType::ASTERISK: if ($this->next()->type === TokenType::ASTERISK) { $this->buildParagraph($this->parseText(true)); break; } $list = $this->parseList(); $this->document->appendChild($list); break; case TokenType::HASH: $this->parseHeading(); break; case TokenType::NUMBER: $list = $this->parseList(ListType::ORDERED); $this->document->appendChild($list); break; case TokenType::BACKTICK: $this->parseCodeBlock(); break; case TokenType::EOL: $this->consume(); break; case TokenType::LBRACKET: $this->parseReference(); break; case TokenType::PIPE: $this->parseTable(); break; case TokenType::GT: $this->parseBlockQuote(); break; case TokenType::MINUS: $this->parseHorizontalRule(); break; case TokenType::TEXT: default: $this->buildParagraph($this->parseText(true)); break; } } foreach($this->document->childNodes as $node) $this->resolveReferences($node); return $this->document; } }