completely rewrote parsing of lists

nesting is now possible. we also now have a single function for parsing
odered and unoredered lists
feature/tests
Michael Ochmann 3 years ago
parent b61060302c
commit f0c6e1484c
  1. 4
      README.md
  2. 3
      index.php
  3. 6
      src/Lexer.php
  4. 100
      src/Parser.php
  5. 1
      src/Token.php
  6. 19
      tests/list.md

@ -12,7 +12,11 @@ Parkdown currently support the following block types:
* tables *(with alignment specification)* * tables *(with alignment specification)*
* paragraphs * paragraphs
* block quotes * block quotes
* lists *(like this one)*
* also nested
* horizontal rules `---` * horizontal rules `---`
* lol
* bar
### Supported inline types ### Supported inline types
Parkdown currently support the following block types: Parkdown currently support the following block types:

@ -4,7 +4,8 @@ require __DIR__."/vendor/autoload.php";
$source = file_get_contents(dirname(__FILE__)."/README.md"); $source = file_get_contents(dirname(__FILE__)."/README.md");
//$source = file_get_contents(dirname(__FILE__)."/test/paragraph.md"); //$source = file_get_contents(dirname(__FILE__)."/tests/paragraph.md");
//$source = file_get_contents(dirname(__FILE__)."/tests/list.md");
echo " echo "
<style> <style>

@ -91,10 +91,14 @@ class Lexer {
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::GT, $char)); array_push($tokens, new Token(TokenType::GT, $char));
break; break;
case ' ':
$clearBuffer();
array_push($tokens, new Token(TokenType::TAB, $char));
break;
case ':': case ':':
if (str_ends_with($buffer, "http") || str_ends_with($buffer, "https")) { if (str_ends_with($buffer, "http") || str_ends_with($buffer, "https")) {
$buffer .= $char; $buffer .= $char;
continue; continue 2;
} }
$clearBuffer(); $clearBuffer();

@ -6,6 +6,11 @@ use DOMDocument;
use DOMElement; use DOMElement;
use DOMNode; use DOMNode;
enum ListType {
case ORDERED;
case UNORDERED;
}
class Parser { class Parser {
const MAGIC_CHAR = "*"; const MAGIC_CHAR = "*";
@ -29,6 +34,10 @@ class Parser {
return $this->peek(1); return $this->peek(1);
} }
private function last() : Token {
return $this->peek(-1);
}
private function peek(int $amount = 0) : Token { private function peek(int $amount = 0) : Token {
$amount += $this->pointer; $amount += $this->pointer;
if ($amount < 0 || $amount >= count($this->tokenStream)) if ($amount < 0 || $amount >= count($this->tokenStream))
@ -228,58 +237,67 @@ class Parser {
return $elms; return $elms;
} }
private function parseUnorderedList() : void { private function parseList(ListType $type = ListType::UNORDERED, int $level = 0) : DOMNode {
$list = $this->document->createElement("ul"); $list = $this->document->createElement($type === ListType::UNORDERED ? "ul" : "ol");
if (!str_starts_with($this->next()->data, " ")) { while ($this->current()->type !== TokenType::EOF &&
$this->buildParagraph($this->parseText()); ($this->current()->type !== TokenType::EOL && $this->next()->type !== TokenType::EOL)) {
return;
}
while (!($this->current()->type === TokenType::EOL && $this->next()->type !== TokenType::ASTERISK) && $this->current()->type !== TokenType::EOF) { // if we encounter a single linebreak, we are done with the current item
if ($this->current()->type === TokenType::EOL) { if ($this->current()->type === TokenType::EOL) {
$this->consume(); $this->consume();
continue;
} }
if ($this->current()->type === TokenType::ASTERISK) {
// first we remove leading tabs
while ($this->current()->type === TokenType::TAB)
$this->consume();
if ($this->current()->type === TokenType::EOF)
break;
// then we except an asterisk or a number followed by a period
if ($type === ListType::UNORDERED) {
$asterisk = $this->consume(); $asterisk = $this->consume();
$elm = $this->document->createElement("li"); assert($asterisk->type === TokenType::ASTERISK, "expected asterisk, got ".$asterisk->type->name);
foreach($this->parseText() as $node)
$elm->appendChild($node);
$list->appendChild($elm);
} else { } else {
break; $number = $this->consume();
} assert($number->type === TokenType::NUMBER, "expected number, got ".$number->type->name);
} $period = $this->consume();
$this->consume(); assert($period->type === TokenType::DOT, "expected period, got ".$period->type->name);
$this->document->appendChild($list);
} }
private function parseOrderedList() : void { // then we parse the node content
$list = $this->document->createElement("ol"); $elm = $this->document->createElement("li");
foreach ($this->parseText() as $node)
$elm->appendChild($node);
while (!($this->current()->type === TokenType::EOL && $this->next()->type !== TokenType::NUMBER) && // now we check, if the level of the next line is higher than the current level.
$this->current()->type !== TokenType::EOF) { // if so, we want to append a sub list to the current item
if ($this->current()->type === TokenType::EOL) {
// here should be a EOL
assert($this->current()->type === TokenType::EOL, "expected EOL, got ".$this->current()->type->name);
$this->consume(); $this->consume();
continue;
$nextLevel = 0;
while ($this->current()->type === TokenType::TAB) {
$this->consume();
$nextLevel++;
} }
if ($this->current()->type === TokenType::NUMBER && // reset pointer, as we did not really want to consume the tokens, but did for
$this->next()->type === TokenType::DOT) { // convenience
$number = $this->consume(); $this->pointer -= $nextLevel;
$dot = $this->consume();
$elm = $this->document->createElement("li"); if ($nextLevel > $level)
foreach($this->parseText() as $node) $elm->appendChild($this->parseList($type, $nextLevel));
$elm->appendChild($node);
// then we append the list item to the list
$list->appendChild($elm); $list->appendChild($elm);
} else {
$elms = $this->parseText(); // if next level is lower than current, we are done with the current sub list
$this->buildParagraph($elms); if ($nextLevel < $level)
continue; break;
}
} }
$this->consume();
$this->document->appendChild($list); return $list;
} }
private function buildParagraph(array $elms) : void { private function buildParagraph(array $elms) : void {
@ -477,13 +495,15 @@ class Parser {
while ($this->current()->type !== TokenType::EOF) { while ($this->current()->type !== TokenType::EOF) {
switch($this->current()->type) { switch($this->current()->type) {
case TokenType::ASTERISK: case TokenType::ASTERISK:
$this->parseUnorderedList(); $list = $this->parseList();
$this->document->appendChild($list);
break; break;
case TokenType::HASH: case TokenType::HASH:
$this->parseHeading(); $this->parseHeading();
break; break;
case TokenType::NUMBER: case TokenType::NUMBER:
$this->parseOrderedList(); $list = $this->parseList(ListType::ORDERED);
$this->document->appendChild($list);
break; break;
case TokenType::BACKTICK: case TokenType::BACKTICK:
$this->parseCodeBlock(); $this->parseCodeBlock();

@ -21,6 +21,7 @@ enum TokenType {
case BACKSLASH; case BACKSLASH;
case PIPE ; case PIPE ;
case GT ; case GT ;
case TAB ;
} }
class Token { class Token {

@ -0,0 +1,19 @@
* point A
* point B
* sub A
* sub B
* sub sub A
* sub C
* sub D
* point C
* point D
* point E
* point F
1. point 1
2. sub 1
1. sub sub 1
1. sub sub 2
3. sub 2
3. point 2
4. point 3
Loading…
Cancel
Save