added support for links and references

feature/tests
Michael Ochmann 3 years ago
parent b917c74bdf
commit 2d7cf901b9
  1. 17
      src/Lexer.php
  2. 92
      src/Parser.php
  3. 3
      src/Token.php
  4. 7
      test/test1.md

@ -55,6 +55,23 @@ class Lexer {
$clearBuffer();
array_push($tokens, new Token(TokenType::BACKTICK, $char));
break;
case '[':
$clearBuffer();
array_push($tokens, new Token(TokenType::LBRACKET, $char));
break;
case ']':
$clearBuffer();
array_push($tokens, new Token(TokenType::RBRACKET, $char));
break;
case ':':
if (str_ends_with($buffer, "http") || str_ends_with($buffer, "https")) {
$buffer .= $char;
continue;
}
$clearBuffer();
array_push($tokens, new Token(TokenType::COLON, $char));
break;
default:
$buffer .= $char;
break;

@ -3,18 +3,23 @@
namespace parkdown;
use DOMDocument;
use DOMElement;
use DOMNode;
use DOMText;
class Parser {
const MAGIC_CHAR = "*";
private array $tokenStream;
private int $pointer;
private DOMDocument $document;
private array $references;
public function __construct(array $tokenStream) {
$this->tokenStream = $tokenStream;
$this->pointer = 0;
$this->document = new DOMDocument();
$this->references = [];
}
private function current() : Token {
@ -72,6 +77,37 @@ class Parser {
return $this->document->createElement("code", $buffer);
}
private function parseLink() : ?DOMNode {
$text = "";
$consumption = 1;
$lbracket = $this->consume();
while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::EOL)) {
$text .= $this->consume()->data;
$consumption++;
}
$rbracket = $this->consume();
$consumption++;
if ($this->current()->type !== TokenType::LBRACKET) {
$this->pointer -= $consumption;
return null;
}
$lbracket = $this->consume();
$index = "";
while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::EOL))
$index .= $this->consume()->data;
$rbracket = $this->consume();
$elm = $this->document->createElement("a", $text);
$href = str_starts_with($index, "http") ? $index :
(array_key_exists($index, $this->references) ? $this->references[$index] : self::MAGIC_CHAR.$index.self::MAGIC_CHAR);
$elm->setAttribute("href", $href);
return $elm;
}
private function parseText() : array {
$elms = [];
$buffer = "";
@ -97,6 +133,16 @@ class Parser {
$clearBuffer();
array_push($elms, $this->parseCode());
continue;
} elseif ($this->current()->type === TokenType::LBRACKET) {
$links = $this->parseLink();
if ($links !== null) {
$clearBuffer();
array_push($elms, $links);
} else {
$buffer .= $this->consume()->data;
continue;
}
continue;
} else
$buffer .= $this->consume()->data;
}
@ -196,7 +242,6 @@ class Parser {
$this->peek(2)->type === TokenType::BACKTICK) && $this->current()->type !== TokenType::EOF) {
$buffer .= $this->consume()->data;
}
echo $buffer;
if ($this->current()->type !== TokenType::EOF) {
$this->consume();
$this->consume();
@ -209,6 +254,44 @@ class Parser {
$this->consume();
}
private function parseReference() : void {
if (($this->next()->type !== TokenType::NUMBER && $this->next()->type !== TokenType::TEXT) ||
$this->peek(2)->type !== TokenType::RBRACKET ||
$this->peek(3)->type !== TokenType::COLON) {
$this->buildParagraph($this->parseText());
return;
}
$lbracket = $this->consume();
$index = $this->consume()->data;
$rbracket = $this->consume();
$colon = $this->consume();
$buffer = "";
while ($this->current()->type !== TokenType::EOL && $this->current()->type !== TokenType::EOF) {
$buffer .= $this->consume()->data;
}
$this->consume();
$this->references[$index] = trim($buffer);
}
private function resolveReferences(DOMElement $node) : void {
if ($node->hasAttribute("href")) {
$href = $node->getAttribute("href");
if (substr($href, 0, 1) === self::MAGIC_CHAR) {
$index = substr($href, 1, strlen($href) - 2);
if (array_key_exists($index, $this->references))
$node->setAttribute("href", $this->references[$index]);
}
}
if ($node->hasChildNodes()) {
foreach ($node->childNodes as $child) {
if ($child->nodeType === XML_ELEMENT_NODE)
$this->resolveReferences($child);
}
}
}
public function parse() : string {
while ($this->current()->type !== TokenType::EOF) {
switch($this->current()->type) {
@ -233,13 +316,18 @@ class Parser {
case TokenType::EOL:
$this->consume();
break;
case TokenType::LBRACKET:
$this->parseReference();
break;
default:
$c = $this->consume();
echo "::".$c->type->name."::";
break;
}
}
}
foreach($this->document->childNodes as $node)
$this->resolveReferences($node);
return $this->document->saveHTML();
}
}

@ -11,6 +11,9 @@ enum TokenType {
case EOL ;
case EOF ;
case BACKTICK;
case LBRACKET;
case RBRACKET;
case COLON;
}
class Token {

@ -13,6 +13,8 @@
Lorem **ipsum** dolor sit *amet*, `consetetur` sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua.
And we test: all [special chars][1] in random [text][hallo]. this [word][http://lol.de] is a link.
At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
```
@ -22,4 +24,7 @@ At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergr
}
helloWorld();
```
```
[1]: https://these-are-references:0
[hallo]: ulululu
Loading…
Cancel
Save