diff --git a/src/Lexer.php b/src/Lexer.php index 51a4ac7..6187e62 100644 --- a/src/Lexer.php +++ b/src/Lexer.php @@ -55,6 +55,23 @@ class Lexer { $clearBuffer(); array_push($tokens, new Token(TokenType::BACKTICK, $char)); break; + case '[': + $clearBuffer(); + array_push($tokens, new Token(TokenType::LBRACKET, $char)); + break; + case ']': + $clearBuffer(); + array_push($tokens, new Token(TokenType::RBRACKET, $char)); + break; + case ':': + if (str_ends_with($buffer, "http") || str_ends_with($buffer, "https")) { + $buffer .= $char; + continue; + } + + $clearBuffer(); + array_push($tokens, new Token(TokenType::COLON, $char)); + break; default: $buffer .= $char; break; diff --git a/src/Parser.php b/src/Parser.php index c95364c..7c8c1d7 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -3,18 +3,23 @@ namespace parkdown; use DOMDocument; +use DOMElement; use DOMNode; use DOMText; class Parser { + const MAGIC_CHAR = "*"; + private array $tokenStream; private int $pointer; private DOMDocument $document; + private array $references; public function __construct(array $tokenStream) { $this->tokenStream = $tokenStream; $this->pointer = 0; $this->document = new DOMDocument(); + $this->references = []; } private function current() : Token { @@ -72,6 +77,37 @@ class Parser { return $this->document->createElement("code", $buffer); } + private function parseLink() : ?DOMNode { + $text = ""; + $consumption = 1; + + $lbracket = $this->consume(); + while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::EOL)) { + $text .= $this->consume()->data; + $consumption++; + } + $rbracket = $this->consume(); + $consumption++; + + if ($this->current()->type !== TokenType::LBRACKET) { + $this->pointer -= $consumption; + return null; + } + $lbracket = $this->consume(); + + $index = ""; + while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::EOL)) + $index .= $this->consume()->data; + $rbracket = $this->consume(); + + $elm = $this->document->createElement("a", $text); + $href = str_starts_with($index, "http") ? $index : + (array_key_exists($index, $this->references) ? $this->references[$index] : self::MAGIC_CHAR.$index.self::MAGIC_CHAR); + $elm->setAttribute("href", $href); + + return $elm; + } + private function parseText() : array { $elms = []; $buffer = ""; @@ -97,6 +133,16 @@ class Parser { $clearBuffer(); array_push($elms, $this->parseCode()); continue; + } elseif ($this->current()->type === TokenType::LBRACKET) { + $links = $this->parseLink(); + if ($links !== null) { + $clearBuffer(); + array_push($elms, $links); + } else { + $buffer .= $this->consume()->data; + continue; + } + continue; } else $buffer .= $this->consume()->data; } @@ -196,7 +242,6 @@ class Parser { $this->peek(2)->type === TokenType::BACKTICK) && $this->current()->type !== TokenType::EOF) { $buffer .= $this->consume()->data; } - echo $buffer; if ($this->current()->type !== TokenType::EOF) { $this->consume(); $this->consume(); @@ -209,6 +254,44 @@ class Parser { $this->consume(); } + private function parseReference() : void { + if (($this->next()->type !== TokenType::NUMBER && $this->next()->type !== TokenType::TEXT) || + $this->peek(2)->type !== TokenType::RBRACKET || + $this->peek(3)->type !== TokenType::COLON) { + $this->buildParagraph($this->parseText()); + return; + } + $lbracket = $this->consume(); + $index = $this->consume()->data; + $rbracket = $this->consume(); + $colon = $this->consume(); + + $buffer = ""; + while ($this->current()->type !== TokenType::EOL && $this->current()->type !== TokenType::EOF) { + $buffer .= $this->consume()->data; + } + $this->consume(); + + $this->references[$index] = trim($buffer); + } + + private function resolveReferences(DOMElement $node) : void { + if ($node->hasAttribute("href")) { + $href = $node->getAttribute("href"); + if (substr($href, 0, 1) === self::MAGIC_CHAR) { + $index = substr($href, 1, strlen($href) - 2); + if (array_key_exists($index, $this->references)) + $node->setAttribute("href", $this->references[$index]); + } + } + if ($node->hasChildNodes()) { + foreach ($node->childNodes as $child) { + if ($child->nodeType === XML_ELEMENT_NODE) + $this->resolveReferences($child); + } + } + } + public function parse() : string { while ($this->current()->type !== TokenType::EOF) { switch($this->current()->type) { @@ -233,13 +316,18 @@ class Parser { case TokenType::EOL: $this->consume(); break; + case TokenType::LBRACKET: + $this->parseReference(); + break; default: $c = $this->consume(); echo "::".$c->type->name."::"; break; } - } + } + foreach($this->document->childNodes as $node) + $this->resolveReferences($node); return $this->document->saveHTML(); } } \ No newline at end of file diff --git a/src/Token.php b/src/Token.php index 5bf1172..a2bb4ff 100644 --- a/src/Token.php +++ b/src/Token.php @@ -11,6 +11,9 @@ enum TokenType { case EOL ; case EOF ; case BACKTICK; + case LBRACKET; + case RBRACKET; + case COLON; } class Token { diff --git a/test/test1.md b/test/test1.md index 80e0945..32fcdb4 100644 --- a/test/test1.md +++ b/test/test1.md @@ -13,6 +13,8 @@ Lorem **ipsum** dolor sit *amet*, `consetetur` sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. +And we test: all [special chars][1] in random [text][hallo]. this [word][http://lol.de] is a link. + At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. ``` @@ -22,4 +24,7 @@ At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergr } helloWorld(); -``` \ No newline at end of file +``` + +[1]: https://these-are-references:0 +[hallo]: ulululu \ No newline at end of file