commit
9ff845fb09
9 changed files with 378 additions and 0 deletions
@ -0,0 +1,5 @@ |
||||
.idea |
||||
.vscode |
||||
.DS_Store |
||||
|
||||
vendor |
@ -0,0 +1,17 @@ |
||||
{ |
||||
"name": "massivedynamic/parkdown", |
||||
"type": "library", |
||||
"license": "MIT", |
||||
"autoload": { |
||||
"psr-4": { |
||||
"parkdown\\": "src/" |
||||
} |
||||
}, |
||||
"authors": [ |
||||
{ |
||||
"name": "Michael Ochmann", |
||||
"email": "miko@massivedynamic.eu" |
||||
} |
||||
], |
||||
"require": {} |
||||
} |
@ -0,0 +1,18 @@ |
||||
{ |
||||
"_readme": [ |
||||
"This file locks the dependencies of your project to a known state", |
||||
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", |
||||
"This file is @generated automatically" |
||||
], |
||||
"content-hash": "cdece622f692fc25b3cb5a87fd3368e3", |
||||
"packages": [], |
||||
"packages-dev": [], |
||||
"aliases": [], |
||||
"minimum-stability": "stable", |
||||
"stability-flags": [], |
||||
"prefer-stable": false, |
||||
"prefer-lowest": false, |
||||
"platform": [], |
||||
"platform-dev": [], |
||||
"plugin-api-version": "2.1.0" |
||||
} |
@ -0,0 +1,9 @@ |
||||
<?php |
||||
|
||||
require __DIR__."/vendor/autoload.php"; |
||||
|
||||
|
||||
$source = file_get_contents(dirname(__FILE__)."/test/test1.md"); |
||||
|
||||
$Instance = new parkdown\Parkdown($source); |
||||
$Instance->html(); |
@ -0,0 +1,69 @@ |
||||
<?php declare(strict_types=1); |
||||
|
||||
namespace parkdown; |
||||
|
||||
class Lexer { |
||||
private array $source; |
||||
|
||||
public function __construct(string $sourceCode) { |
||||
$unifiedSource = str_replace(["\r\n", "\r"], "\n", $sourceCode); |
||||
$this->source = explode("\n", trim($unifiedSource, "\n")); |
||||
} |
||||
|
||||
public function tokenize() : array { |
||||
$tokens = []; |
||||
|
||||
foreach ($this->source as $line) { |
||||
if (strlen($line) < 1) |
||||
continue; |
||||
|
||||
$buffer = ""; |
||||
$number = false; |
||||
|
||||
$clearBuffer = function() use (&$buffer, &$tokens) { |
||||
if (strlen($buffer) < 1) |
||||
return; |
||||
array_push($tokens, new Token(TokenType::TEXT, $buffer)); |
||||
$buffer = ""; |
||||
}; |
||||
|
||||
foreach(str_split($line) as $char) { |
||||
if (is_numeric($char) && !$number) { |
||||
$clearBuffer(); |
||||
$number = true; |
||||
} else if (!is_numeric($char) && $number) { |
||||
array_push($tokens, new Token(TokenType::NUMBER, $buffer)); |
||||
$buffer = ""; |
||||
$number = false; |
||||
} |
||||
switch($char) { |
||||
case '#': |
||||
$clearBuffer(); |
||||
array_push($tokens, new Token(TokenType::HASH, $char)); |
||||
break; |
||||
case '*': |
||||
$clearBuffer(); |
||||
array_push($tokens, new Token(TokenType::ASTERISK, $char)); |
||||
break; |
||||
case '.': |
||||
$clearBuffer(); |
||||
array_push($tokens, new Token(TokenType::DOT, $char)); |
||||
break; |
||||
case '`': |
||||
$clearBuffer(); |
||||
array_push($tokens, new Token(TokenType::BACKTICK, $char)); |
||||
break; |
||||
default: |
||||
$buffer .= $char; |
||||
break; |
||||
} |
||||
} |
||||
$clearBuffer(); |
||||
array_push($tokens, new Token(TokenType::EOL)); |
||||
} |
||||
$clearBuffer(); |
||||
array_push($tokens, new Token(TokenType::EOF)); |
||||
|
||||
return $tokens; |
||||
} |
||||
} |
@ -0,0 +1,18 @@ |
||||
<?php declare(strict_types=1); |
||||
|
||||
namespace parkdown; |
||||
|
||||
class Parkdown { |
||||
private string $sourceCode; |
||||
|
||||
public function __construct(string $sourceCode) { |
||||
$this->sourceCode= $sourceCode; |
||||
} |
||||
|
||||
public function html() : void { |
||||
$lexer = new Lexer($this->sourceCode); |
||||
$parser = new Parser($lexer->tokenize()); |
||||
|
||||
echo $parser->parse(); |
||||
} |
||||
} |
@ -0,0 +1,202 @@ |
||||
<?php declare(strict_types=1); |
||||
|
||||
namespace parkdown; |
||||
|
||||
use DOMDocument; |
||||
use DOMNode; |
||||
use DOMText; |
||||
|
||||
class Parser { |
||||
private array $tokenStream; |
||||
private int $pointer; |
||||
private DOMDocument $document; |
||||
|
||||
public function __construct(array $tokenStream) { |
||||
$this->tokenStream = $tokenStream; |
||||
$this->pointer = 0; |
||||
$this->document = new DOMDocument(); |
||||
} |
||||
|
||||
private function current() : Token { |
||||
return $this->peek(); |
||||
} |
||||
|
||||
private function next() : Token { |
||||
return $this->peek(1); |
||||
} |
||||
|
||||
private function peek(int $amount = 0) : Token { |
||||
$amount += $this->pointer; |
||||
if ($amount < 0 || $amount >= count($this->tokenStream)) |
||||
return new Token(TokenType::EOF); |
||||
|
||||
return $this->tokenStream[$amount]; |
||||
} |
||||
|
||||
private function consume() : Token { |
||||
$char = $this->current(); |
||||
$this->pointer++; |
||||
|
||||
return $char; |
||||
} |
||||
|
||||
private function parseBold() : DOMNode { |
||||
$buffer = ""; |
||||
while ($this->current()->type !== TokenType::ASTERISK) { |
||||
$buffer .= $this->consume()->data; |
||||
} |
||||
$this->consume(); |
||||
$this->consume(); |
||||
|
||||
return $this->document->createElement("b", $buffer); |
||||
} |
||||
|
||||
private function parseItalic() : DOMNode { |
||||
$buffer = ""; |
||||
while ($this->current()->type !== TokenType::ASTERISK) { |
||||
$buffer .= $this->consume()->data; |
||||
} |
||||
$this->consume(); |
||||
|
||||
return $this->document->createElement("i", $buffer); |
||||
} |
||||
|
||||
private function parseCode() : DOMNode { |
||||
$buffer = ""; |
||||
$this->consume(); |
||||
while ($this->current()->type !== TokenType::BACKTICK && $this->current()->type !== TokenType::EOL) |
||||
$buffer .= $this->consume()->data; |
||||
|
||||
$this->consume(); |
||||
|
||||
return $this->document->createElement("code", $buffer); |
||||
} |
||||
|
||||
private function parseText() : array { |
||||
$elms = []; |
||||
$buffer = ""; |
||||
|
||||
$clearBuffer = function() use (&$elms, &$buffer) { |
||||
array_push($elms, $this->document->createTextNode($buffer)); |
||||
$buffer = ""; |
||||
}; |
||||
|
||||
while ($this->current()->type !== TokenType::EOL) { |
||||
if ($this->current()->type === TokenType::ASTERISK) { |
||||
$clearBuffer(); |
||||
if ($this->next()->type === TokenType::ASTERISK) { |
||||
$this->consume(); |
||||
$this->consume(); |
||||
array_push($elms, $this->parseBold()); |
||||
} else { |
||||
$this->consume(); |
||||
array_push($elms, $this->parseItalic()); |
||||
} |
||||
continue; |
||||
} elseif ($this->current()->type === TokenType::BACKTICK) { |
||||
$clearBuffer(); |
||||
array_push($elms, $this->parseCode()); |
||||
continue; |
||||
} else |
||||
$buffer .= $this->consume()->data; |
||||
} |
||||
if (strlen($buffer) > 0) |
||||
array_push($elms, $this->document->createTextNode($buffer)); |
||||
|
||||
return $elms; |
||||
} |
||||
|
||||
private function parseUnorderedList() : void { |
||||
$list = $this->document->createElement("ul"); |
||||
|
||||
while (!($this->current()->type === TokenType::EOL && $this->next()->type !== TokenType::ASTERISK) && $this->current()->type !== TokenType::EOF) { |
||||
if ($this->current()->type === TokenType::EOL) { |
||||
$this->consume(); |
||||
continue; |
||||
} |
||||
if ($this->current()->type === TokenType::ASTERISK) { |
||||
$asterisk = $this->consume(); |
||||
$elm = $this->document->createElement("li"); |
||||
foreach($this->parseText() as $node) |
||||
$elm->appendChild($node); |
||||
$list->appendChild($elm); |
||||
} |
||||
} |
||||
$this->consume(); |
||||
$this->document->appendChild($list); |
||||
} |
||||
|
||||
private function parseOrderedList() : void { |
||||
$list = $this->document->createElement("ol"); |
||||
|
||||
while (!($this->current()->type === TokenType::EOL && $this->next()->type !== TokenType::NUMBER) && |
||||
$this->current()->type !== TokenType::EOF) { |
||||
if ($this->current()->type === TokenType::EOL) { |
||||
$this->consume(); |
||||
continue; |
||||
} |
||||
if ($this->current()->type === TokenType::NUMBER && |
||||
$this->next()->type === TokenType::DOT) { |
||||
$number = $this->consume(); |
||||
$dot = $this->consume(); |
||||
$elm = $this->document->createElement("li"); |
||||
foreach($this->parseText() as $node) |
||||
$elm->appendChild($node); |
||||
$list->appendChild($elm); |
||||
} else { |
||||
$elm = $this->document->createElement("p"); |
||||
$elms = $this->parseText(); |
||||
foreach ($elms as $node) |
||||
$elm->appendChild($node); |
||||
$this->document->appendChild($elm); |
||||
continue; |
||||
} |
||||
} |
||||
$this->consume(); |
||||
$this->document->appendChild($list); |
||||
} |
||||
|
||||
private function parseHeading() : void { |
||||
$level = 0; |
||||
|
||||
while ($this->current()->type === TokenType::HASH) { |
||||
$level++; |
||||
$this->consume(); |
||||
} |
||||
$elm = $this->document->createElement("h".$level); |
||||
foreach ($this->parseText() as $node) |
||||
$elm->appendChild($node); |
||||
$this->document->appendChild($elm); |
||||
} |
||||
|
||||
public function parse() : string { |
||||
while ($this->current()->type !== TokenType::EOF) { |
||||
switch($this->current()->type) { |
||||
case TokenType::ASTERISK: |
||||
$this->parseUnorderedList(); |
||||
break; |
||||
case TokenType::HASH: |
||||
$this->parseHeading(); |
||||
break; |
||||
case TokenType::TEXT: |
||||
$elm = $this->document->createElement("p"); |
||||
foreach ($this->parseText() as $node) |
||||
$elm->appendChild($node); |
||||
$this->document->appendChild($elm); |
||||
break; |
||||
case TokenType::NUMBER: |
||||
$this->parseOrderedList(); |
||||
break; |
||||
case TokenType::EOL: |
||||
$this->consume(); |
||||
break; |
||||
default: |
||||
$c = $this->consume(); |
||||
echo "::".$c->type->name."::"; |
||||
break; |
||||
} |
||||
|
||||
} |
||||
return $this->document->saveHTML(); |
||||
} |
||||
} |
@ -0,0 +1,24 @@ |
||||
<?php declare(strict_types=1); |
||||
|
||||
namespace parkdown; |
||||
|
||||
enum TokenType { |
||||
case HASH ; |
||||
case ASTERISK; |
||||
case TEXT ; |
||||
case DOT ; |
||||
case NUMBER ; |
||||
case EOL ; |
||||
case EOF ; |
||||
case BACKTICK; |
||||
} |
||||
|
||||
class Token { |
||||
public TokenType $type; |
||||
public string $data; |
||||
|
||||
public function __construct(TokenType $type, string $data = "") { |
||||
$this->type = $type; |
||||
$this->data = $data; |
||||
} |
||||
} |
@ -0,0 +1,16 @@ |
||||
# Heading 1 |
||||
#### lol **lol** |
||||
|
||||
* this **bold** and somewhat |
||||
* kind of *italic* thing |
||||
* is |
||||
* a |
||||
* list |
||||
|
||||
1. this is |
||||
2. an ordered |
||||
3. list |
||||
|
||||
Lorem **ipsum** dolor sit *amet*, `consetetur` sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. |
||||
|
||||
At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. |
Loading…
Reference in new issue