completely rewrote parsing of lists

nesting is now possible. we also now have a single function for parsing
odered and unoredered lists
feature/tests
Michael Ochmann 3 years ago
parent b61060302c
commit f0c6e1484c
  1. 6
      README.md
  2. 3
      index.php
  3. 6
      src/Lexer.php
  4. 104
      src/Parser.php
  5. 1
      src/Token.php
  6. 19
      tests/list.md

@ -12,12 +12,16 @@ Parkdown currently support the following block types:
* tables *(with alignment specification)*
* paragraphs
* block quotes
* lists *(like this one)*
* also nested
* horizontal rules `---`
* lol
* bar
### Supported inline types
Parkdown currently support the following block types:
* bold text (`**bold**`)
* bold text (`**bold**`)
* italic text (`*italic*`)
* code snippets
* images (`![alt text](src url)`)

@ -4,7 +4,8 @@ require __DIR__."/vendor/autoload.php";
$source = file_get_contents(dirname(__FILE__)."/README.md");
//$source = file_get_contents(dirname(__FILE__)."/test/paragraph.md");
//$source = file_get_contents(dirname(__FILE__)."/tests/paragraph.md");
//$source = file_get_contents(dirname(__FILE__)."/tests/list.md");
echo "
<style>

@ -91,10 +91,14 @@ class Lexer {
$clearBuffer();
array_push($tokens, new Token(TokenType::GT, $char));
break;
case ' ':
$clearBuffer();
array_push($tokens, new Token(TokenType::TAB, $char));
break;
case ':':
if (str_ends_with($buffer, "http") || str_ends_with($buffer, "https")) {
$buffer .= $char;
continue;
continue 2;
}
$clearBuffer();

@ -6,6 +6,11 @@ use DOMDocument;
use DOMElement;
use DOMNode;
enum ListType {
case ORDERED;
case UNORDERED;
}
class Parser {
const MAGIC_CHAR = "*";
@ -29,6 +34,10 @@ class Parser {
return $this->peek(1);
}
private function last() : Token {
return $this->peek(-1);
}
private function peek(int $amount = 0) : Token {
$amount += $this->pointer;
if ($amount < 0 || $amount >= count($this->tokenStream))
@ -228,58 +237,67 @@ class Parser {
return $elms;
}
private function parseUnorderedList() : void {
$list = $this->document->createElement("ul");
private function parseList(ListType $type = ListType::UNORDERED, int $level = 0) : DOMNode {
$list = $this->document->createElement($type === ListType::UNORDERED ? "ul" : "ol");
if (!str_starts_with($this->next()->data, " ")) {
$this->buildParagraph($this->parseText());
return;
}
while (!($this->current()->type === TokenType::EOL && $this->next()->type !== TokenType::ASTERISK) && $this->current()->type !== TokenType::EOF) {
while ($this->current()->type !== TokenType::EOF &&
($this->current()->type !== TokenType::EOL && $this->next()->type !== TokenType::EOL)) {
// if we encounter a single linebreak, we are done with the current item
if ($this->current()->type === TokenType::EOL) {
$this->consume();
continue;
}
if ($this->current()->type === TokenType::ASTERISK) {
// first we remove leading tabs
while ($this->current()->type === TokenType::TAB)
$this->consume();
if ($this->current()->type === TokenType::EOF)
break;
// then we except an asterisk or a number followed by a period
if ($type === ListType::UNORDERED) {
$asterisk = $this->consume();
$elm = $this->document->createElement("li");
foreach($this->parseText() as $node)
$elm->appendChild($node);
$list->appendChild($elm);
assert($asterisk->type === TokenType::ASTERISK, "expected asterisk, got ".$asterisk->type->name);
} else {
break;
$number = $this->consume();
assert($number->type === TokenType::NUMBER, "expected number, got ".$number->type->name);
$period = $this->consume();
assert($period->type === TokenType::DOT, "expected period, got ".$period->type->name);
}
}
$this->consume();
$this->document->appendChild($list);
}
// then we parse the node content
$elm = $this->document->createElement("li");
foreach ($this->parseText() as $node)
$elm->appendChild($node);
private function parseOrderedList() : void {
$list = $this->document->createElement("ol");
// now we check, if the level of the next line is higher than the current level.
// if so, we want to append a sub list to the current item
while (!($this->current()->type === TokenType::EOL && $this->next()->type !== TokenType::NUMBER) &&
$this->current()->type !== TokenType::EOF) {
if ($this->current()->type === TokenType::EOL) {
// here should be a EOL
assert($this->current()->type === TokenType::EOL, "expected EOL, got ".$this->current()->type->name);
$this->consume();
$nextLevel = 0;
while ($this->current()->type === TokenType::TAB) {
$this->consume();
continue;
}
if ($this->current()->type === TokenType::NUMBER &&
$this->next()->type === TokenType::DOT) {
$number = $this->consume();
$dot = $this->consume();
$elm = $this->document->createElement("li");
foreach($this->parseText() as $node)
$elm->appendChild($node);
$list->appendChild($elm);
} else {
$elms = $this->parseText();
$this->buildParagraph($elms);
continue;
$nextLevel++;
}
// reset pointer, as we did not really want to consume the tokens, but did for
// convenience
$this->pointer -= $nextLevel;
if ($nextLevel > $level)
$elm->appendChild($this->parseList($type, $nextLevel));
// then we append the list item to the list
$list->appendChild($elm);
// if next level is lower than current, we are done with the current sub list
if ($nextLevel < $level)
break;
}
$this->consume();
$this->document->appendChild($list);
return $list;
}
private function buildParagraph(array $elms) : void {
@ -477,13 +495,15 @@ class Parser {
while ($this->current()->type !== TokenType::EOF) {
switch($this->current()->type) {
case TokenType::ASTERISK:
$this->parseUnorderedList();
$list = $this->parseList();
$this->document->appendChild($list);
break;
case TokenType::HASH:
$this->parseHeading();
break;
case TokenType::NUMBER:
$this->parseOrderedList();
$list = $this->parseList(ListType::ORDERED);
$this->document->appendChild($list);
break;
case TokenType::BACKTICK:
$this->parseCodeBlock();

@ -21,6 +21,7 @@ enum TokenType {
case BACKSLASH;
case PIPE ;
case GT ;
case TAB ;
}
class Token {

@ -0,0 +1,19 @@
* point A
* point B
* sub A
* sub B
* sub sub A
* sub C
* sub D
* point C
* point D
* point E
* point F
1. point 1
2. sub 1
1. sub sub 1
1. sub sub 2
3. sub 2
3. point 2
4. point 3
Loading…
Cancel
Save