|
|
@ -24,12 +24,14 @@ class Parser { |
|
|
|
private int $pointer; |
|
|
|
private int $pointer; |
|
|
|
private DOMDocument $document; |
|
|
|
private DOMDocument $document; |
|
|
|
private array $references; |
|
|
|
private array $references; |
|
|
|
|
|
|
|
private bool $strict; |
|
|
|
|
|
|
|
|
|
|
|
public function __construct(array $tokenStream) { |
|
|
|
public function __construct(array $tokenStream, $strict = false) { |
|
|
|
$this->tokenStream = $tokenStream; |
|
|
|
$this->tokenStream = $tokenStream; |
|
|
|
$this->pointer = 0; |
|
|
|
$this->pointer = 0; |
|
|
|
$this->document = new DOMDocument(); |
|
|
|
$this->document = new DOMDocument(); |
|
|
|
$this->references = []; |
|
|
|
$this->references = []; |
|
|
|
|
|
|
|
$this->strict = $strict; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private function current() : Token { |
|
|
|
private function current() : Token { |
|
|
@ -44,6 +46,11 @@ class Parser { |
|
|
|
return $this->peek(-1); |
|
|
|
return $this->peek(-1); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private function insert(Token $token, int $offset = 0) : void { |
|
|
|
|
|
|
|
$newElement = [$token]; |
|
|
|
|
|
|
|
array_splice($this->tokenStream, $this->pointer + $offset, 0, $newElement); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private function peek(int $amount = 0) : Token { |
|
|
|
private function peek(int $amount = 0) : Token { |
|
|
|
$amount += $this->pointer; |
|
|
|
$amount += $this->pointer; |
|
|
|
if ($amount < 0 || $amount >= count($this->tokenStream)) |
|
|
|
if ($amount < 0 || $amount >= count($this->tokenStream)) |
|
|
@ -63,6 +70,18 @@ class Parser { |
|
|
|
return stripslashes($text); |
|
|
|
return stripslashes($text); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static function LOC(array $loc) : string { |
|
|
|
|
|
|
|
[$col, $row, $fileName] = $loc; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
$file = $fileName ? $fileName : "INPUT_STRING"; |
|
|
|
|
|
|
|
return "$file:$row:$col: ERROR: "; |
|
|
|
|
|
|
|
return " in row $row, column $col of source string"; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
private static function Assert(bool $assertion, Token $token, string $message = "") : void { |
|
|
|
|
|
|
|
assert($assertion, self::LOC($token->location).$message); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private function resolveReferences(DOMElement $node) : void { |
|
|
|
private function resolveReferences(DOMElement $node) : void { |
|
|
|
if (count($this->references) < 1) |
|
|
|
if (count($this->references) < 1) |
|
|
|
return; |
|
|
|
return; |
|
|
@ -92,32 +111,66 @@ class Parser { |
|
|
|
|
|
|
|
|
|
|
|
private function parseBold() : DOMNode { |
|
|
|
private function parseBold() : DOMNode { |
|
|
|
$buffer = ""; |
|
|
|
$buffer = ""; |
|
|
|
while ($this->current()->type !== TokenType::ASTERISK && $this->current()->type !== TokenType::EOL) { |
|
|
|
while ($this->current()->type !== TokenType::ASTERISK) { |
|
|
|
|
|
|
|
if ($this->current()->type === TokenType::EOL || $this->current()->type === TokenType::EOF) { |
|
|
|
|
|
|
|
if (!$this->strict) { |
|
|
|
|
|
|
|
[$col, $row, $fileName] = $this->current()->location; |
|
|
|
|
|
|
|
$this->insert(new Token(TokenType::ASTERISK, "*", [$col + 1, $row, $fileName])); |
|
|
|
|
|
|
|
$this->insert(new Token(TokenType::ASTERISK, "*", [$col + 2, $row, $fileName])); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
$buffer .= $this->consume()->data; |
|
|
|
$buffer .= $this->consume()->data; |
|
|
|
} |
|
|
|
} |
|
|
|
$this->consume(); |
|
|
|
if (!$this->strict && $this->current()->type !== TokenType::ASTERISK) |
|
|
|
$this->consume(); |
|
|
|
$this->insert(new Token(TokenType::ASTERISK, "*", $this->current()->location)); |
|
|
|
|
|
|
|
$asterisk = $this->consume(); |
|
|
|
|
|
|
|
self::Assert($asterisk->type === TokenType::ASTERISK, $asterisk, "expected asterisk, got ".$asterisk->type->name); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (!$this->strict && $this->current()->type !== TokenType::ASTERISK) |
|
|
|
|
|
|
|
$this->insert(new Token(TokenType::ASTERISK, "*", $this->current()->location)); |
|
|
|
|
|
|
|
$asterisk = $this->consume(); |
|
|
|
|
|
|
|
self::Assert($asterisk->type === TokenType::ASTERISK, $asterisk, "expected asterisk, got ".$asterisk->type->name); |
|
|
|
|
|
|
|
|
|
|
|
return $this->document->createElement("b", $buffer); |
|
|
|
return $this->document->createElement("b", $buffer); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private function parseItalic() : DOMNode { |
|
|
|
private function parseItalic() : DOMNode { |
|
|
|
$buffer = ""; |
|
|
|
$buffer = ""; |
|
|
|
while ($this->current()->type !== TokenType::ASTERISK && $this->current()->type !== TokenType::EOL) { |
|
|
|
while ($this->current()->type !== TokenType::ASTERISK) { |
|
|
|
|
|
|
|
if ($this->current()->type === TokenType::EOL || $this->current()->type === TokenType::EOF) { |
|
|
|
|
|
|
|
if (!$this->strict) { |
|
|
|
|
|
|
|
[$col, $row, $fileName] = $this->current()->location; |
|
|
|
|
|
|
|
$this->insert(new Token(TokenType::ASTERISK, "*", [$col + 1, $row, $fileName])); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
} |
|
|
|
$buffer .= $this->consume()->data; |
|
|
|
$buffer .= $this->consume()->data; |
|
|
|
} |
|
|
|
} |
|
|
|
$this->consume(); |
|
|
|
$asterisk = $this->consume(); |
|
|
|
|
|
|
|
self::Assert($asterisk->type === TokenType::ASTERISK, $asterisk, "expected asterisk, got ".$asterisk->type->name); |
|
|
|
|
|
|
|
|
|
|
|
return $this->document->createElement("i", $buffer); |
|
|
|
return $this->document->createElement("i", $buffer); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
private function parseCode() : DOMNode { |
|
|
|
private function parseCode() : DOMNode { |
|
|
|
$buffer = ""; |
|
|
|
$buffer = ""; |
|
|
|
$this->consume(); |
|
|
|
$backtick = $this->consume(); |
|
|
|
while ($this->current()->type !== TokenType::BACKTICK && $this->current()->type !== TokenType::EOL) |
|
|
|
self::Assert($backtick->type === TokenType::BACKTICK, $backtick, "expected backtick, got ".$backtick->type->name); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
while ($this->current()->type !== TokenType::BACKTICK) { |
|
|
|
|
|
|
|
// we need to recover, if input is malformed |
|
|
|
|
|
|
|
if ($this->current()->type === TokenType::EOL || $this->current()->type === TokenType::EOF) { |
|
|
|
|
|
|
|
if (!$this->strict) |
|
|
|
|
|
|
|
$this->insert(new Token(TokenType::BACKTICK, "`", $this->current()->location)); |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
} |
|
|
|
$buffer .= $this->consume()->data; |
|
|
|
$buffer .= $this->consume()->data; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
$this->consume(); |
|
|
|
$backtick = $this->consume(); |
|
|
|
|
|
|
|
self::Assert($backtick->type === TokenType::BACKTICK, $backtick, "inline code expression not autmatically closed (expected backtick)"); |
|
|
|
|
|
|
|
|
|
|
|
return $this->document->createElement("code", $buffer); |
|
|
|
return $this->document->createElement("code", $buffer); |
|
|
|
} |
|
|
|
} |
|
|
@ -127,11 +180,13 @@ class Parser { |
|
|
|
$consumption = 1; |
|
|
|
$consumption = 1; |
|
|
|
|
|
|
|
|
|
|
|
$lbracket = $this->consume(); |
|
|
|
$lbracket = $this->consume(); |
|
|
|
|
|
|
|
self::Assert($lbracket->type === TokenType::LBRACKET, $lbracket, "expected left bracket, got ".$lbracket->type->name); |
|
|
|
while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::EOL)) { |
|
|
|
while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::EOL)) { |
|
|
|
$text .= $this->consume()->data; |
|
|
|
$text .= $this->consume()->data; |
|
|
|
$consumption++; |
|
|
|
$consumption++; |
|
|
|
} |
|
|
|
} |
|
|
|
$rbracket = $this->consume(); |
|
|
|
$rbracket = $this->consume(); |
|
|
|
|
|
|
|
self::Assert($rbracket->type === TokenType::RBRACKET, $rbracket, "expected right bracket, got ".$rbracket->type->name); |
|
|
|
$consumption++; |
|
|
|
$consumption++; |
|
|
|
|
|
|
|
|
|
|
|
if ($this->current()->type !== TokenType::LBRACKET && $this->current()->type !== TokenType::LPAREN) { |
|
|
|
if ($this->current()->type !== TokenType::LBRACKET && $this->current()->type !== TokenType::LPAREN) { |
|
|
@ -139,6 +194,7 @@ class Parser { |
|
|
|
return null; |
|
|
|
return null; |
|
|
|
} |
|
|
|
} |
|
|
|
$lbracketOrParen = $this->consume(); |
|
|
|
$lbracketOrParen = $this->consume(); |
|
|
|
|
|
|
|
self::Assert($lbracketOrParen->type === TokenType::LBRACKET || $lbracketOrParen->type === TokenType::LPAREN, $lbracketOrParen, "expected left bracket or left parenthesis, got ".$lbracketOrParen->type->name); |
|
|
|
|
|
|
|
|
|
|
|
$index = ""; |
|
|
|
$index = ""; |
|
|
|
while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::RPAREN || $this->current()->type === TokenType::EOL)) |
|
|
|
while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::RPAREN || $this->current()->type === TokenType::EOL)) |
|
|
@ -203,11 +259,14 @@ class Parser { |
|
|
|
continue; |
|
|
|
continue; |
|
|
|
} elseif ($this->current()->type === TokenType::BANG) { |
|
|
|
} elseif ($this->current()->type === TokenType::BANG) { |
|
|
|
$bang = $this->consume(); |
|
|
|
$bang = $this->consume(); |
|
|
|
|
|
|
|
self::Assert($bang->type === TokenType::BANG, $bang, "expected exclamation mark, got ".$bang->type->name.", this may be a parser bug"); |
|
|
|
if ($this->current()->type !== TokenType::LBRACKET) { |
|
|
|
if ($this->current()->type !== TokenType::LBRACKET) { |
|
|
|
$buffer .= self::StripBackslashes($this->consume()->data); |
|
|
|
$buffer .= self::StripBackslashes($this->consume()->data); |
|
|
|
continue; |
|
|
|
continue; |
|
|
|
} |
|
|
|
} |
|
|
|
$lbracket = $this->consume(); |
|
|
|
$lbracket = $this->consume(); |
|
|
|
|
|
|
|
self::Assert($lbracket->type === TokenType::LBRACKET, $lbracket, "expected left bracket, got ".$lbracket->type->name); |
|
|
|
|
|
|
|
|
|
|
|
$alt = ""; |
|
|
|
$alt = ""; |
|
|
|
while ($this->current()->type !== TokenType::RBRACKET && $this->current()->type !== TokenType::EOL) |
|
|
|
while ($this->current()->type !== TokenType::RBRACKET && $this->current()->type !== TokenType::EOL) |
|
|
|
$alt .= self::StripBackslashes($this->consume()->data); |
|
|
|
$alt .= self::StripBackslashes($this->consume()->data); |
|
|
@ -218,7 +277,10 @@ class Parser { |
|
|
|
continue; |
|
|
|
continue; |
|
|
|
} |
|
|
|
} |
|
|
|
$rbracket = $this->consume(); |
|
|
|
$rbracket = $this->consume(); |
|
|
|
|
|
|
|
self::Assert($rbracket->type === TokenType::RBRACKET, $rbracket, "expected right bracket, got ".$rbracket->type->name); |
|
|
|
$lparen = $this->consume(); |
|
|
|
$lparen = $this->consume(); |
|
|
|
|
|
|
|
self::Assert($lparen->type === TokenType::LPAREN, $lparen, "expected left parenthesis, got ".$lparen->type->name); |
|
|
|
|
|
|
|
|
|
|
|
$src = ""; |
|
|
|
$src = ""; |
|
|
|
while ($this->current()->type !== TokenType::RPAREN && $this->current()->type !== TokenType::EOL) |
|
|
|
while ($this->current()->type !== TokenType::RPAREN && $this->current()->type !== TokenType::EOL) |
|
|
|
$src .= $this->consume()->data; |
|
|
|
$src .= $this->consume()->data; |
|
|
@ -228,6 +290,8 @@ class Parser { |
|
|
|
continue; |
|
|
|
continue; |
|
|
|
} |
|
|
|
} |
|
|
|
$rparen = $this->consume(); |
|
|
|
$rparen = $this->consume(); |
|
|
|
|
|
|
|
self::Assert($rparen->type === TokenType::RPAREN, $rparen, "expected right parenthesis, got ".$rparen->type->name); |
|
|
|
|
|
|
|
|
|
|
|
$elm = $this->document->createElement("img"); |
|
|
|
$elm = $this->document->createElement("img"); |
|
|
|
if (strlen($alt) > 0) |
|
|
|
if (strlen($alt) > 0) |
|
|
|
$elm->setAttribute("alt", $alt); |
|
|
|
$elm->setAttribute("alt", $alt); |
|
|
@ -235,9 +299,10 @@ class Parser { |
|
|
|
$clearBuffer(); |
|
|
|
$clearBuffer(); |
|
|
|
array_push($elms, $elm); |
|
|
|
array_push($elms, $elm); |
|
|
|
continue; |
|
|
|
continue; |
|
|
|
|
|
|
|
|
|
|
|
} elseif ($this->current()->type === TokenType::LBRACE) { |
|
|
|
} elseif ($this->current()->type === TokenType::LBRACE) { |
|
|
|
$lbrace = $this->consume(); |
|
|
|
$lbrace = $this->consume(); |
|
|
|
assert($lbrace->type === TokenType::LBRACE, "expected left brace, got ".$lbrace->type->name); |
|
|
|
self::Assert($lbrace->type === TokenType::LBRACE, $lbrace, "expected left brace, got ".$lbrace->type->name); |
|
|
|
|
|
|
|
|
|
|
|
$content = ""; |
|
|
|
$content = ""; |
|
|
|
while ($this->current()->type !== TokenType::EOF && |
|
|
|
while ($this->current()->type !== TokenType::EOF && |
|
|
@ -246,7 +311,7 @@ class Parser { |
|
|
|
$content .= $this->consume()->data; |
|
|
|
$content .= $this->consume()->data; |
|
|
|
} |
|
|
|
} |
|
|
|
$rbrace = $this->consume(); |
|
|
|
$rbrace = $this->consume(); |
|
|
|
assert($rbrace->type === TokenType::RBRACE, "expected right brace, got ".$rbrace->type->name); |
|
|
|
self::Assert($rbrace->type === TokenType::RBRACE, $rbrace, "expected right brace, got ".$rbrace->type->name); |
|
|
|
|
|
|
|
|
|
|
|
$attributes = array_map(function($element) { |
|
|
|
$attributes = array_map(function($element) { |
|
|
|
return trim($element); |
|
|
|
return trim($element); |
|
|
@ -299,12 +364,12 @@ class Parser { |
|
|
|
// then we except an asterisk or a number followed by a period |
|
|
|
// then we except an asterisk or a number followed by a period |
|
|
|
if ($type === ListType::UNORDERED) { |
|
|
|
if ($type === ListType::UNORDERED) { |
|
|
|
$asterisk = $this->consume(); |
|
|
|
$asterisk = $this->consume(); |
|
|
|
assert($asterisk->type === TokenType::ASTERISK, "expected asterisk, got ".$asterisk->type->name); |
|
|
|
self::Assert($asterisk->type === TokenType::ASTERISK, $asterisk, "expected asterisk, got ".$asterisk->type->name); |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
$number = $this->consume(); |
|
|
|
$number = $this->consume(); |
|
|
|
assert($number->type === TokenType::NUMBER, "expected number, got ".$number->type->name); |
|
|
|
self::Assert($number->type === TokenType::NUMBER, $number, "expected number, got ".$number->type->name); |
|
|
|
$period = $this->consume(); |
|
|
|
$period = $this->consume(); |
|
|
|
assert($period->type === TokenType::DOT, "expected period, got ".$period->type->name); |
|
|
|
self::Assert($period->type === TokenType::DOT, $period, "expected period, got ".$period->type->name); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// then we parse the node content |
|
|
|
// then we parse the node content |
|
|
@ -317,7 +382,7 @@ class Parser { |
|
|
|
// if so, we want to append a sub list to the current item |
|
|
|
// if so, we want to append a sub list to the current item |
|
|
|
|
|
|
|
|
|
|
|
// here should be a EOL |
|
|
|
// here should be a EOL |
|
|
|
assert($this->current()->type === TokenType::EOL, "expected EOL, got ".$this->current()->type->name); |
|
|
|
self::Assert($this->current()->type === TokenType::EOL, $this->current(), "expected EOL, got ".$this->current()->type->name); |
|
|
|
$this->consume(); |
|
|
|
$this->consume(); |
|
|
|
|
|
|
|
|
|
|
|
$nextLevel = 0; |
|
|
|
$nextLevel = 0; |
|
|
|