added support for links and references

feature/tests
Michael Ochmann 3 years ago
parent b917c74bdf
commit 2d7cf901b9
  1. 17
      src/Lexer.php
  2. 90
      src/Parser.php
  3. 3
      src/Token.php
  4. 5
      test/test1.md

@ -55,6 +55,23 @@ class Lexer {
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::BACKTICK, $char)); array_push($tokens, new Token(TokenType::BACKTICK, $char));
break; break;
case '[':
$clearBuffer();
array_push($tokens, new Token(TokenType::LBRACKET, $char));
break;
case ']':
$clearBuffer();
array_push($tokens, new Token(TokenType::RBRACKET, $char));
break;
case ':':
if (str_ends_with($buffer, "http") || str_ends_with($buffer, "https")) {
$buffer .= $char;
continue;
}
$clearBuffer();
array_push($tokens, new Token(TokenType::COLON, $char));
break;
default: default:
$buffer .= $char; $buffer .= $char;
break; break;

@ -3,18 +3,23 @@
namespace parkdown; namespace parkdown;
use DOMDocument; use DOMDocument;
use DOMElement;
use DOMNode; use DOMNode;
use DOMText; use DOMText;
class Parser { class Parser {
const MAGIC_CHAR = "*";
private array $tokenStream; private array $tokenStream;
private int $pointer; private int $pointer;
private DOMDocument $document; private DOMDocument $document;
private array $references;
public function __construct(array $tokenStream) { public function __construct(array $tokenStream) {
$this->tokenStream = $tokenStream; $this->tokenStream = $tokenStream;
$this->pointer = 0; $this->pointer = 0;
$this->document = new DOMDocument(); $this->document = new DOMDocument();
$this->references = [];
} }
private function current() : Token { private function current() : Token {
@ -72,6 +77,37 @@ class Parser {
return $this->document->createElement("code", $buffer); return $this->document->createElement("code", $buffer);
} }
private function parseLink() : ?DOMNode {
$text = "";
$consumption = 1;
$lbracket = $this->consume();
while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::EOL)) {
$text .= $this->consume()->data;
$consumption++;
}
$rbracket = $this->consume();
$consumption++;
if ($this->current()->type !== TokenType::LBRACKET) {
$this->pointer -= $consumption;
return null;
}
$lbracket = $this->consume();
$index = "";
while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::EOL))
$index .= $this->consume()->data;
$rbracket = $this->consume();
$elm = $this->document->createElement("a", $text);
$href = str_starts_with($index, "http") ? $index :
(array_key_exists($index, $this->references) ? $this->references[$index] : self::MAGIC_CHAR.$index.self::MAGIC_CHAR);
$elm->setAttribute("href", $href);
return $elm;
}
private function parseText() : array { private function parseText() : array {
$elms = []; $elms = [];
$buffer = ""; $buffer = "";
@ -97,6 +133,16 @@ class Parser {
$clearBuffer(); $clearBuffer();
array_push($elms, $this->parseCode()); array_push($elms, $this->parseCode());
continue; continue;
} elseif ($this->current()->type === TokenType::LBRACKET) {
$links = $this->parseLink();
if ($links !== null) {
$clearBuffer();
array_push($elms, $links);
} else {
$buffer .= $this->consume()->data;
continue;
}
continue;
} else } else
$buffer .= $this->consume()->data; $buffer .= $this->consume()->data;
} }
@ -196,7 +242,6 @@ class Parser {
$this->peek(2)->type === TokenType::BACKTICK) && $this->current()->type !== TokenType::EOF) { $this->peek(2)->type === TokenType::BACKTICK) && $this->current()->type !== TokenType::EOF) {
$buffer .= $this->consume()->data; $buffer .= $this->consume()->data;
} }
echo $buffer;
if ($this->current()->type !== TokenType::EOF) { if ($this->current()->type !== TokenType::EOF) {
$this->consume(); $this->consume();
$this->consume(); $this->consume();
@ -209,6 +254,44 @@ class Parser {
$this->consume(); $this->consume();
} }
private function parseReference() : void {
if (($this->next()->type !== TokenType::NUMBER && $this->next()->type !== TokenType::TEXT) ||
$this->peek(2)->type !== TokenType::RBRACKET ||
$this->peek(3)->type !== TokenType::COLON) {
$this->buildParagraph($this->parseText());
return;
}
$lbracket = $this->consume();
$index = $this->consume()->data;
$rbracket = $this->consume();
$colon = $this->consume();
$buffer = "";
while ($this->current()->type !== TokenType::EOL && $this->current()->type !== TokenType::EOF) {
$buffer .= $this->consume()->data;
}
$this->consume();
$this->references[$index] = trim($buffer);
}
private function resolveReferences(DOMElement $node) : void {
if ($node->hasAttribute("href")) {
$href = $node->getAttribute("href");
if (substr($href, 0, 1) === self::MAGIC_CHAR) {
$index = substr($href, 1, strlen($href) - 2);
if (array_key_exists($index, $this->references))
$node->setAttribute("href", $this->references[$index]);
}
}
if ($node->hasChildNodes()) {
foreach ($node->childNodes as $child) {
if ($child->nodeType === XML_ELEMENT_NODE)
$this->resolveReferences($child);
}
}
}
public function parse() : string { public function parse() : string {
while ($this->current()->type !== TokenType::EOF) { while ($this->current()->type !== TokenType::EOF) {
switch($this->current()->type) { switch($this->current()->type) {
@ -233,6 +316,9 @@ class Parser {
case TokenType::EOL: case TokenType::EOL:
$this->consume(); $this->consume();
break; break;
case TokenType::LBRACKET:
$this->parseReference();
break;
default: default:
$c = $this->consume(); $c = $this->consume();
echo "::".$c->type->name."::"; echo "::".$c->type->name."::";
@ -240,6 +326,8 @@ class Parser {
} }
} }
foreach($this->document->childNodes as $node)
$this->resolveReferences($node);
return $this->document->saveHTML(); return $this->document->saveHTML();
} }
} }

@ -11,6 +11,9 @@ enum TokenType {
case EOL ; case EOL ;
case EOF ; case EOF ;
case BACKTICK; case BACKTICK;
case LBRACKET;
case RBRACKET;
case COLON;
} }
class Token { class Token {

@ -13,6 +13,8 @@
Lorem **ipsum** dolor sit *amet*, `consetetur` sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. Lorem **ipsum** dolor sit *amet*, `consetetur` sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua.
And we test: all [special chars][1] in random [text][hallo]. this [word][http://lol.de] is a link.
At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
``` ```
@ -23,3 +25,6 @@ At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergr
helloWorld(); helloWorld();
``` ```
[1]: https://these-are-references:0
[hallo]: ulululu
Loading…
Cancel
Save