@ -6,6 +6,7 @@ use Attribute;
use DOMDocument;
use DOMDocument;
use DOMElement;
use DOMElement;
use DOMNode;
use DOMNode;
use phpDocumentor\Reflection\DocBlock\Tags\Throws;
enum ListType {
enum ListType {
case ORDERED;
case ORDERED;
@ -24,12 +25,14 @@ class Parser {
private int $pointer;
private int $pointer;
private DOMDocument $document;
private DOMDocument $document;
private array $references;
private array $references;
private bool $strict;
public function __construct(array $tokenStream) {
public function __construct(array $tokenStream, $strict = false ) {
$this->tokenStream = $tokenStream;
$this->tokenStream = $tokenStream;
$this->pointer = 0;
$this->pointer = 0;
$this->document = new DOMDocument();
$this->document = new DOMDocument();
$this->references = [];
$this->references = [];
$this->strict = $strict;
}
}
private function current() : Token {
private function current() : Token {
@ -44,6 +47,11 @@ class Parser {
return $this->peek(-1);
return $this->peek(-1);
}
}
private function insert(Token $token, int $offset = 0) : void {
$newElement = [$token];
array_splice($this->tokenStream, $this->pointer + $offset, 0, $newElement);
}
private function peek(int $amount = 0) : Token {
private function peek(int $amount = 0) : Token {
$amount += $this->pointer;
$amount += $this->pointer;
if ($amount < 0 | | $ amount > = count($this->tokenStream))
if ($amount < 0 | | $ amount > = count($this->tokenStream))
@ -63,6 +71,26 @@ class Parser {
return stripslashes($text);
return stripslashes($text);
}
}
private static function LOC(array $loc) : string {
[$col, $row, $fileName] = $loc;
$file = $fileName ? $fileName : "INPUT_STRING";
return "$file:$row:$col: ERROR: ";
return " in row $row, column $col of source string";
}
private static function Assert(bool $assertion, Token $token, string $message = "") : void {
assert($assertion, new ParserError(self::LOC($token->location).$message));
}
public static function TextToSlug(string $html) : string {
$out = trim(strip_tags($html));
$out = strtolower($out);
$out = str_replace(" ", "_", $out);
return $out;
}
private function resolveReferences(DOMElement $node) : void {
private function resolveReferences(DOMElement $node) : void {
if (count($this->references) < 1 )
if (count($this->references) < 1 )
return;
return;
@ -92,34 +120,68 @@ class Parser {
private function parseBold() : DOMNode {
private function parseBold() : DOMNode {
$buffer = "";
$buffer = "";
while ($this->current()->type !== TokenType::ASTERISK & & $this->current()->type !== TokenType::EOL) {
while ($this->current()->type !== TokenType::ASTERISK) {
if ($this->current()->type === TokenType::EOL || $this->current()->type === TokenType::EOF) {
if (!$this->strict) {
[$col, $row, $fileName] = $this->current()->location;
$this->insert(new Token(TokenType::ASTERISK, "*", [$col + 1, $row, $fileName]));
$this->insert(new Token(TokenType::ASTERISK, "*", [$col + 2, $row, $fileName]));
}
break;
}
$buffer .= $this->consume()->data;
$buffer .= $this->consume()->data;
}
}
$this->consume();
if (!$this->strict & & $this->current()->type !== TokenType::ASTERISK)
$this->consume();
$this->insert(new Token(TokenType::ASTERISK, "*", $this->current()->location));
$asterisk = $this->consume();
self::Assert($asterisk->type === TokenType::ASTERISK, $asterisk, "expected asterisk, got ".$asterisk->type->name);
if (!$this->strict & & $this->current()->type !== TokenType::ASTERISK)
$this->insert(new Token(TokenType::ASTERISK, "*", $this->current()->location));
$asterisk = $this->consume();
self::Assert($asterisk->type === TokenType::ASTERISK, $asterisk, "expected asterisk, got ".$asterisk->type->name);
return $this->document->createElement("b", $buffer);
return $this->document->createElement("b", $buffer);
}
}
private function parseItalic() : DOMNode {
private function parseItalic() : DOMNode {
$buffer = "";
$buffer = "";
while ($this->current()->type !== TokenType::ASTERISK & & $this->current()->type !== TokenType::EOL) {
while ($this->current()->type !== TokenType::ASTERISK) {
if ($this->current()->type === TokenType::EOL || $this->current()->type === TokenType::EOF) {
if (!$this->strict) {
[$col, $row, $fileName] = $this->current()->location;
$this->insert(new Token(TokenType::ASTERISK, "*", [$col + 1, $row, $fileName]));
}
break;
}
$buffer .= $this->consume()->data;
$buffer .= $this->consume()->data;
}
}
$this->consume();
$asterisk = $this->consume();
self::Assert($asterisk->type === TokenType::ASTERISK, $asterisk, "expected asterisk, got ".$asterisk->type->name);
return $this->document->createElement("i", $buffer);
return $this->document->createElement("i", $buffer);
}
}
private function parseCode() : DOMNode {
private function parseCode() : DOMNode {
$buffer = "";
$buffer = "";
$this->consume();
$backtick = $this->consume();
while ($this->current()->type !== TokenType::BACKTICK & & $this->current()->type !== TokenType::EOL)
self::Assert($backtick->type === TokenType::BACKTICK, $backtick, "expected backtick, got ".$backtick->type->name);
while ($this->current()->type !== TokenType::BACKTICK) {
// we need to recover, if input is malformed
if ($this->current()->type === TokenType::EOL || $this->current()->type === TokenType::EOF) {
if (!$this->strict)
$this->insert(new Token(TokenType::BACKTICK, "`", $this->current()->location));
break;
}
$buffer .= $this->consume()->data;
$buffer .= $this->consume()->data;
}
$this->consume();
$backtick = $this->consume();
self::Assert($backtick->type === TokenType::BACKTICK, $backtick, "inline code expression not autmatically closed (expected backtick)");
return $this->document->createElement("code", $buffer);
return @ $this->document->createElement("code", $buffer);
}
}
private function parseLink() : ?DOMNode {
private function parseLink() : ?DOMNode {
@ -127,11 +189,13 @@ class Parser {
$consumption = 1;
$consumption = 1;
$lbracket = $this->consume();
$lbracket = $this->consume();
self::Assert($lbracket->type === TokenType::LBRACKET, $lbracket, "expected left bracket, got ".$lbracket->type->name);
while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::EOL)) {
while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::EOL)) {
$text .= $this->consume()->data;
$text .= $this->consume()->data;
$consumption++;
$consumption++;
}
}
$rbracket = $this->consume();
$rbracket = $this->consume();
self::Assert($rbracket->type === TokenType::RBRACKET, $rbracket, "expected right bracket, got ".$rbracket->type->name);
$consumption++;
$consumption++;
if ($this->current()->type !== TokenType::LBRACKET & & $this->current()->type !== TokenType::LPAREN) {
if ($this->current()->type !== TokenType::LBRACKET & & $this->current()->type !== TokenType::LPAREN) {
@ -139,6 +203,7 @@ class Parser {
return null;
return null;
}
}
$lbracketOrParen = $this->consume();
$lbracketOrParen = $this->consume();
self::Assert($lbracketOrParen->type === TokenType::LBRACKET || $lbracketOrParen->type === TokenType::LPAREN, $lbracketOrParen, "expected left bracket or left parenthesis, got ".$lbracketOrParen->type->name);
$index = "";
$index = "";
while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::RPAREN || $this->current()->type === TokenType::EOL))
while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::RPAREN || $this->current()->type === TokenType::EOL))
@ -184,12 +249,16 @@ class Parser {
array_push($elms, $this->parseBold());
array_push($elms, $this->parseBold());
} else {
} else {
$this->consume();
$this->consume();
array_push($elms, $this->parseItalic());
array_push($elms, $this->parseItalic());
}
}
continue;
continue;
} elseif ($this->current()->type === TokenType::BACKTICK) {
} elseif ($this->current()->type === TokenType::BACKTICK) {
$clearBuffer();
$clearBuffer();
array_push($elms, $this->parseCode());
$code = $this->parseCode();
self::Assert($code !== false, $this->current(), "malformed code block");
array_push($elms, $code);
continue;
continue;
} elseif ($this->current()->type === TokenType::LBRACKET) {
} elseif ($this->current()->type === TokenType::LBRACKET) {
$links = $this->parseLink();
$links = $this->parseLink();
@ -203,11 +272,14 @@ class Parser {
continue;
continue;
} elseif ($this->current()->type === TokenType::BANG) {
} elseif ($this->current()->type === TokenType::BANG) {
$bang = $this->consume();
$bang = $this->consume();
self::Assert($bang->type === TokenType::BANG, $bang, "expected exclamation mark, got ".$bang->type->name.", this may be a parser bug");
if ($this->current()->type !== TokenType::LBRACKET) {
if ($this->current()->type !== TokenType::LBRACKET) {
$buffer .= self::StripBackslashes($this->consume()->data);
$buffer .= self::StripBackslashes($this->consume()->data);
continue;
continue;
}
}
$lbracket = $this->consume();
$lbracket = $this->consume();
self::Assert($lbracket->type === TokenType::LBRACKET, $lbracket, "expected left bracket, got ".$lbracket->type->name);
$alt = "";
$alt = "";
while ($this->current()->type !== TokenType::RBRACKET & & $this->current()->type !== TokenType::EOL)
while ($this->current()->type !== TokenType::RBRACKET & & $this->current()->type !== TokenType::EOL)
$alt .= self::StripBackslashes($this->consume()->data);
$alt .= self::StripBackslashes($this->consume()->data);
@ -218,7 +290,10 @@ class Parser {
continue;
continue;
}
}
$rbracket = $this->consume();
$rbracket = $this->consume();
self::Assert($rbracket->type === TokenType::RBRACKET, $rbracket, "expected right bracket, got ".$rbracket->type->name);
$lparen = $this->consume();
$lparen = $this->consume();
self::Assert($lparen->type === TokenType::LPAREN, $lparen, "expected left parenthesis, got ".$lparen->type->name);
$src = "";
$src = "";
while ($this->current()->type !== TokenType::RPAREN & & $this->current()->type !== TokenType::EOL)
while ($this->current()->type !== TokenType::RPAREN & & $this->current()->type !== TokenType::EOL)
$src .= $this->consume()->data;
$src .= $this->consume()->data;
@ -228,6 +303,8 @@ class Parser {
continue;
continue;
}
}
$rparen = $this->consume();
$rparen = $this->consume();
self::Assert($rparen->type === TokenType::RPAREN, $rparen, "expected right parenthesis, got ".$rparen->type->name);
$elm = $this->document->createElement("img");
$elm = $this->document->createElement("img");
if (strlen($alt) > 0)
if (strlen($alt) > 0)
$elm->setAttribute("alt", $alt);
$elm->setAttribute("alt", $alt);
@ -235,9 +312,10 @@ class Parser {
$clearBuffer();
$clearBuffer();
array_push($elms, $elm);
array_push($elms, $elm);
continue;
continue;
} elseif ($this->current()->type === TokenType::LBRACE) {
} elseif ($this->current()->type === TokenType::LBRACE) {
$lbrace = $this->consume();
$lbrace = $this->consume();
assert($lbrace->type === TokenType::LBRACE , "expected left brace, got ".$lbrace->type->name);
self::Assert($lbrace->type === TokenType::LBRACE, $lbrace , "expected left brace, got ".$lbrace->type->name);
$content = "";
$content = "";
while ($this->current()->type !== TokenType::EOF & &
while ($this->current()->type !== TokenType::EOF & &
@ -246,7 +324,7 @@ class Parser {
$content .= $this->consume()->data;
$content .= $this->consume()->data;
}
}
$rbrace = $this->consume();
$rbrace = $this->consume();
assert($rbrace->type === TokenType::RBRACE , "expected right brace, got ".$rbrace->type->name);
self::Assert($rbrace->type === TokenType::RBRACE, $rbrace , "expected right brace, got ".$rbrace->type->name);
$attributes = array_map(function($element) {
$attributes = array_map(function($element) {
return trim($element);
return trim($element);
@ -270,6 +348,18 @@ class Parser {
}
}
array_push($elms, $obj);
array_push($elms, $obj);
} elseif ($this->current()->type === TokenType::SPACE) { // do linebreak when two spaces are at the EOL
if ($this->last()->type === TokenType::SPACE & & $this->next()->type === TokenType::EOL) {
$this->consume();
$clearBuffer();
$elm = $this->document->createElement("br");
array_push($elms, $elm);
continue;
} else {
$this->consume();
$buffer .= " ";
}
} else
} else
$buffer .= self::StripBackslashes($this->consume()->data);
$buffer .= self::StripBackslashes($this->consume()->data);
}
}
@ -296,15 +386,18 @@ class Parser {
if ($this->current()->type === TokenType::EOF)
if ($this->current()->type === TokenType::EOF)
break;
break;
// then we exce pt an asterisk or a number followed by a period
// then we expec t an asterisk or a number followed by a period
if ($type === ListType::UNORDERED) {
if ($type === ListType::UNORDERED) {
$asterisk = $this->consume();
if ($this->current()->type === TokenType::ASTERISK)
assert($asterisk->type === TokenType::ASTERISK, "expected asterisk, got ".$asterisk->type->name );
$this->consume( );
} else {
} else {
$number = $this->consume();
if ($this->current()->type === TokenType::NUMBER) {
assert($number->type === TokenType::NUMBER, "expected number, got ".$number->type->name);
$this->consume();
$period = $this->consume();
if ($this->strict & & $this->current()->type !== TokenType::DOT)
assert($period->type === TokenType::DOT, "expected period, got ".$period->type->name);
$this->insert(new Token(TokenType::DOT, ".", $this->current()->location));
$period = $this->consume();
self::Assert($period->type === TokenType::DOT, $period, "expected period, got ".$period->type->name);
}
}
}
// then we parse the node content
// then we parse the node content
@ -317,7 +410,7 @@ class Parser {
// if so, we want to append a sub list to the current item
// if so, we want to append a sub list to the current item
// here should be a EOL
// here should be a EOL
a ssert($this->current()->type === TokenType::EOL, "expected EOL, got ".$this->current()->type->name);
self::A ssert($this->current()->type === TokenType::EOL, $this->current() , "expected EOL, got ".$this->current()->type->name);
$this->consume();
$this->consume();
$nextLevel = 0;
$nextLevel = 0;
@ -378,6 +471,7 @@ class Parser {
foreach ($this->parseText() as $node)
foreach ($this->parseText() as $node)
if ($node instanceof DOMNode)
if ($node instanceof DOMNode)
$elm->appendChild($node);
$elm->appendChild($node);
$elm->setAttribute("id", self::TextToSlug($elm->textContent));
$this->document->appendChild($elm);
$this->document->appendChild($elm);
}
}
@ -502,6 +596,8 @@ class Parser {
$head = $this->parseTableHead();
$head = $this->parseTableHead();
$props = $this->parseTableAlignment();
$props = $this->parseTableAlignment();
self::Assert(count($props) === count($head->childNodes), $this->current(), "the number of alignment columns does not match the number of header columns");
$i = 0;
$i = 0;
foreach($head->childNodes as $col) {
foreach($head->childNodes as $col) {
$col->setAttribute("style", "text-align: ".$props[$i]);
$col->setAttribute("style", "text-align: ".$props[$i]);
@ -573,9 +669,11 @@ class Parser {
$this->parseHeading();
$this->parseHeading();
break;
break;
case TokenType::NUMBER:
case TokenType::NUMBER:
$list = $this->parseList(ListType::ORDERED);
if ($this->next()->type === TokenType::DOT) {
$this->document->appendChild($list);
$list = $this->parseList(ListType::ORDERED);
break;
$this->document->appendChild($list);
break;
}
case TokenType::BACKTICK:
case TokenType::BACKTICK:
$this->parseCodeBlock();
$this->parseCodeBlock();
break;
break;