better asserts; start of implementing strict/non strict mode

feature/strict-mode
Michael Ochmann 3 years ago
parent 81de125831
commit 4eb24000e1
  1. 95
      src/Parser.php

@ -24,12 +24,14 @@ class Parser {
private int $pointer; private int $pointer;
private DOMDocument $document; private DOMDocument $document;
private array $references; private array $references;
private bool $strict;
public function __construct(array $tokenStream) { public function __construct(array $tokenStream, $strict = false) {
$this->tokenStream = $tokenStream; $this->tokenStream = $tokenStream;
$this->pointer = 0; $this->pointer = 0;
$this->document = new DOMDocument(); $this->document = new DOMDocument();
$this->references = []; $this->references = [];
$this->strict = $strict;
} }
private function current() : Token { private function current() : Token {
@ -44,6 +46,11 @@ class Parser {
return $this->peek(-1); return $this->peek(-1);
} }
private function insert(Token $token, int $offset = 0) : void {
$newElement = [$token];
array_splice($this->tokenStream, $this->pointer + $offset, 0, $newElement);
}
private function peek(int $amount = 0) : Token { private function peek(int $amount = 0) : Token {
$amount += $this->pointer; $amount += $this->pointer;
if ($amount < 0 || $amount >= count($this->tokenStream)) if ($amount < 0 || $amount >= count($this->tokenStream))
@ -63,6 +70,18 @@ class Parser {
return stripslashes($text); return stripslashes($text);
} }
private static function LOC(array $loc) : string {
[$col, $row, $fileName] = $loc;
$file = $fileName ? $fileName : "INPUT_STRING";
return "$file:$row:$col: ERROR: ";
return " in row $row, column $col of source string";
}
private static function Assert(bool $assertion, Token $token, string $message = "") : void {
assert($assertion, self::LOC($token->location).$message);
}
private function resolveReferences(DOMElement $node) : void { private function resolveReferences(DOMElement $node) : void {
if (count($this->references) < 1) if (count($this->references) < 1)
return; return;
@ -92,32 +111,66 @@ class Parser {
private function parseBold() : DOMNode { private function parseBold() : DOMNode {
$buffer = ""; $buffer = "";
while ($this->current()->type !== TokenType::ASTERISK && $this->current()->type !== TokenType::EOL) { while ($this->current()->type !== TokenType::ASTERISK) {
if ($this->current()->type === TokenType::EOL || $this->current()->type === TokenType::EOF) {
if (!$this->strict) {
[$col, $row, $fileName] = $this->current()->location;
$this->insert(new Token(TokenType::ASTERISK, "*", [$col + 1, $row, $fileName]));
$this->insert(new Token(TokenType::ASTERISK, "*", [$col + 2, $row, $fileName]));
}
break;
}
$buffer .= $this->consume()->data; $buffer .= $this->consume()->data;
} }
$this->consume(); if (!$this->strict && $this->current()->type !== TokenType::ASTERISK)
$this->consume(); $this->insert(new Token(TokenType::ASTERISK, "*", $this->current()->location));
$asterisk = $this->consume();
self::Assert($asterisk->type === TokenType::ASTERISK, $asterisk, "expected asterisk, got ".$asterisk->type->name);
if (!$this->strict && $this->current()->type !== TokenType::ASTERISK)
$this->insert(new Token(TokenType::ASTERISK, "*", $this->current()->location));
$asterisk = $this->consume();
self::Assert($asterisk->type === TokenType::ASTERISK, $asterisk, "expected asterisk, got ".$asterisk->type->name);
return $this->document->createElement("b", $buffer); return $this->document->createElement("b", $buffer);
} }
private function parseItalic() : DOMNode { private function parseItalic() : DOMNode {
$buffer = ""; $buffer = "";
while ($this->current()->type !== TokenType::ASTERISK && $this->current()->type !== TokenType::EOL) { while ($this->current()->type !== TokenType::ASTERISK) {
if ($this->current()->type === TokenType::EOL || $this->current()->type === TokenType::EOF) {
if (!$this->strict) {
[$col, $row, $fileName] = $this->current()->location;
$this->insert(new Token(TokenType::ASTERISK, "*", [$col + 1, $row, $fileName]));
}
break;
}
$buffer .= $this->consume()->data; $buffer .= $this->consume()->data;
} }
$this->consume(); $asterisk = $this->consume();
self::Assert($asterisk->type === TokenType::ASTERISK, $asterisk, "expected asterisk, got ".$asterisk->type->name);
return $this->document->createElement("i", $buffer); return $this->document->createElement("i", $buffer);
} }
private function parseCode() : DOMNode { private function parseCode() : DOMNode {
$buffer = ""; $buffer = "";
$this->consume(); $backtick = $this->consume();
while ($this->current()->type !== TokenType::BACKTICK && $this->current()->type !== TokenType::EOL) self::Assert($backtick->type === TokenType::BACKTICK, $backtick, "expected backtick, got ".$backtick->type->name);
while ($this->current()->type !== TokenType::BACKTICK) {
// we need to recover, if input is malformed
if ($this->current()->type === TokenType::EOL || $this->current()->type === TokenType::EOF) {
if (!$this->strict)
$this->insert(new Token(TokenType::BACKTICK, "`", $this->current()->location));
break;
}
$buffer .= $this->consume()->data; $buffer .= $this->consume()->data;
}
$this->consume(); $backtick = $this->consume();
self::Assert($backtick->type === TokenType::BACKTICK, $backtick, "inline code expression not autmatically closed (expected backtick)");
return $this->document->createElement("code", $buffer); return $this->document->createElement("code", $buffer);
} }
@ -127,11 +180,13 @@ class Parser {
$consumption = 1; $consumption = 1;
$lbracket = $this->consume(); $lbracket = $this->consume();
self::Assert($lbracket->type === TokenType::LBRACKET, $lbracket, "expected left bracket, got ".$lbracket->type->name);
while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::EOL)) { while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::EOL)) {
$text .= $this->consume()->data; $text .= $this->consume()->data;
$consumption++; $consumption++;
} }
$rbracket = $this->consume(); $rbracket = $this->consume();
self::Assert($rbracket->type === TokenType::RBRACKET, $rbracket, "expected right bracket, got ".$rbracket->type->name);
$consumption++; $consumption++;
if ($this->current()->type !== TokenType::LBRACKET && $this->current()->type !== TokenType::LPAREN) { if ($this->current()->type !== TokenType::LBRACKET && $this->current()->type !== TokenType::LPAREN) {
@ -139,6 +194,7 @@ class Parser {
return null; return null;
} }
$lbracketOrParen = $this->consume(); $lbracketOrParen = $this->consume();
self::Assert($lbracketOrParen->type === TokenType::LBRACKET || $lbracketOrParen->type === TokenType::LPAREN, $lbracketOrParen, "expected left bracket or left parenthesis, got ".$lbracketOrParen->type->name);
$index = ""; $index = "";
while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::RPAREN || $this->current()->type === TokenType::EOL)) while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::RPAREN || $this->current()->type === TokenType::EOL))
@ -203,11 +259,14 @@ class Parser {
continue; continue;
} elseif ($this->current()->type === TokenType::BANG) { } elseif ($this->current()->type === TokenType::BANG) {
$bang = $this->consume(); $bang = $this->consume();
self::Assert($bang->type === TokenType::BANG, $bang, "expected exclamation mark, got ".$bang->type->name.", this may be a parser bug");
if ($this->current()->type !== TokenType::LBRACKET) { if ($this->current()->type !== TokenType::LBRACKET) {
$buffer .= self::StripBackslashes($this->consume()->data); $buffer .= self::StripBackslashes($this->consume()->data);
continue; continue;
} }
$lbracket = $this->consume(); $lbracket = $this->consume();
self::Assert($lbracket->type === TokenType::LBRACKET, $lbracket, "expected left bracket, got ".$lbracket->type->name);
$alt = ""; $alt = "";
while ($this->current()->type !== TokenType::RBRACKET && $this->current()->type !== TokenType::EOL) while ($this->current()->type !== TokenType::RBRACKET && $this->current()->type !== TokenType::EOL)
$alt .= self::StripBackslashes($this->consume()->data); $alt .= self::StripBackslashes($this->consume()->data);
@ -218,7 +277,10 @@ class Parser {
continue; continue;
} }
$rbracket = $this->consume(); $rbracket = $this->consume();
self::Assert($rbracket->type === TokenType::RBRACKET, $rbracket, "expected right bracket, got ".$rbracket->type->name);
$lparen = $this->consume(); $lparen = $this->consume();
self::Assert($lparen->type === TokenType::LPAREN, $lparen, "expected left parenthesis, got ".$lparen->type->name);
$src = ""; $src = "";
while ($this->current()->type !== TokenType::RPAREN && $this->current()->type !== TokenType::EOL) while ($this->current()->type !== TokenType::RPAREN && $this->current()->type !== TokenType::EOL)
$src .= $this->consume()->data; $src .= $this->consume()->data;
@ -228,6 +290,8 @@ class Parser {
continue; continue;
} }
$rparen = $this->consume(); $rparen = $this->consume();
self::Assert($rparen->type === TokenType::RPAREN, $rparen, "expected right parenthesis, got ".$rparen->type->name);
$elm = $this->document->createElement("img"); $elm = $this->document->createElement("img");
if (strlen($alt) > 0) if (strlen($alt) > 0)
$elm->setAttribute("alt", $alt); $elm->setAttribute("alt", $alt);
@ -235,9 +299,10 @@ class Parser {
$clearBuffer(); $clearBuffer();
array_push($elms, $elm); array_push($elms, $elm);
continue; continue;
} elseif ($this->current()->type === TokenType::LBRACE) { } elseif ($this->current()->type === TokenType::LBRACE) {
$lbrace = $this->consume(); $lbrace = $this->consume();
assert($lbrace->type === TokenType::LBRACE, "expected left brace, got ".$lbrace->type->name); self::Assert($lbrace->type === TokenType::LBRACE, $lbrace, "expected left brace, got ".$lbrace->type->name);
$content = ""; $content = "";
while ($this->current()->type !== TokenType::EOF && while ($this->current()->type !== TokenType::EOF &&
@ -246,7 +311,7 @@ class Parser {
$content .= $this->consume()->data; $content .= $this->consume()->data;
} }
$rbrace = $this->consume(); $rbrace = $this->consume();
assert($rbrace->type === TokenType::RBRACE, "expected right brace, got ".$rbrace->type->name); self::Assert($rbrace->type === TokenType::RBRACE, $rbrace, "expected right brace, got ".$rbrace->type->name);
$attributes = array_map(function($element) { $attributes = array_map(function($element) {
return trim($element); return trim($element);
@ -299,12 +364,12 @@ class Parser {
// then we except an asterisk or a number followed by a period // then we except an asterisk or a number followed by a period
if ($type === ListType::UNORDERED) { if ($type === ListType::UNORDERED) {
$asterisk = $this->consume(); $asterisk = $this->consume();
assert($asterisk->type === TokenType::ASTERISK, "expected asterisk, got ".$asterisk->type->name); self::Assert($asterisk->type === TokenType::ASTERISK, $asterisk, "expected asterisk, got ".$asterisk->type->name);
} else { } else {
$number = $this->consume(); $number = $this->consume();
assert($number->type === TokenType::NUMBER, "expected number, got ".$number->type->name); self::Assert($number->type === TokenType::NUMBER, $number, "expected number, got ".$number->type->name);
$period = $this->consume(); $period = $this->consume();
assert($period->type === TokenType::DOT, "expected period, got ".$period->type->name); self::Assert($period->type === TokenType::DOT, $period, "expected period, got ".$period->type->name);
} }
// then we parse the node content // then we parse the node content
@ -317,7 +382,7 @@ class Parser {
// if so, we want to append a sub list to the current item // if so, we want to append a sub list to the current item
// here should be a EOL // here should be a EOL
assert($this->current()->type === TokenType::EOL, "expected EOL, got ".$this->current()->type->name); self::Assert($this->current()->type === TokenType::EOL, $this->current(), "expected EOL, got ".$this->current()->type->name);
$this->consume(); $this->consume();
$nextLevel = 0; $nextLevel = 0;

Loading…
Cancel
Save