Compare commits

...

21 Commits

Author SHA1 Message Date
Michael Ochmann 21713e2209 added functionality to force a linebreak 3 years ago
Michael Ochmann 3b7e772fc7 now tokenizing spaces 3 years ago
Michael Ochmann 594b85a81e fixed bug where ordered lists and tables were wrongly detected 3 years ago
Michael Ochmann f3bbcb07e7 removed unnecessary console.log 3 years ago
Michael Ochmann af61963440 better editor features: 3 years ago
Michael Ochmann 549c6baa96 fixed bottom padding 3 years ago
Michael Ochmann bb1ba19757 fixed linenumber alignment on mobile 3 years ago
Michael Ochmann fb4cfc9486 more mobile optimizations 3 years ago
Michael Ochmann ea10ed0a30 fixed linebreak in code blocks 3 years ago
Michael Ochmann 7c95ce46fe fix `white-space: nowrap` for safari 3 years ago
Michael Ochmann e11d3989d1 optimized mobile view 3 years ago
Michael Ochmann 610ce2c0d3 fixed test 3 years ago
Michael Ochmann 1d55913f85 added new default text 3 years ago
Michael Ochmann fb5df71101 not using strict mode in playground 3 years ago
Michael Ochmann aaa15ce893 added playground for debuggin purposes 3 years ago
Michael Ochmann 5acd3b362e fixed minor bugs 3 years ago
Michael Ochmann 7e19ede470 added custom assertion error 3 years ago
Michael Ochmann 543112f89e added better debug tools in strict mode 3 years ago
Michael Ochmann 4eb24000e1 better asserts; start of implementing strict/non strict mode 3 years ago
Michael Ochmann 81de125831 added filename to Token location 3 years ago
Michael Ochmann 18067b6e56 added `strict` mode 3 years ago
  1. 37
      index.php
  2. 35
      playground/ajax.php
  3. 473
      playground/index.php
  4. 70
      src/Lexer.php
  5. 6
      src/Parkdown.php
  6. 148
      src/Parser.php
  7. 7
      src/ParserError.php
  8. 9
      src/Token.php
  9. 10
      tests/AnnotationsTest.php
  10. 10
      tests/HeadingsTest.php

@ -2,9 +2,6 @@
require __DIR__."/vendor/autoload.php"; require __DIR__."/vendor/autoload.php";
$source = file_get_contents(dirname(__FILE__)."/README.md");
echo " echo "
<style> <style>
body { body {
@ -28,5 +25,35 @@ echo "
</style> </style>
"; ";
$Instance = new parkdown\Parkdown($source); $file = dirname(__FILE__)."/README.md";
echo $Instance->html(); $source = file_get_contents($file);
try {
$Instance = new parkdown\Parkdown($source, true, $file);
echo $Instance->html();
} catch (parkdown\ParserError $error) {
echo "<pre>";
$message = explode(" ", $error->getMessage());
$location = array_shift($message);
$file = explode(":", $location)[0];
if ($file === "INPUT_STRING")
echo "$location ".implode(" ", $message);
else
echo "<a href='vscode://file/".substr($location, 0, -1)."'>$location</a> ".implode(" ", $message);
$stackTrace = explode("\n", $error->getTraceAsString());
echo "<p><small>";
foreach ($stackTrace as $step) {
$step = explode(" ", $step);
array_shift($step);
$location = array_shift($step);
$location = preg_replace("/\(([0-9]+)\):/", ":\$1:", $location);
echo "<a href='vscode://file/".substr($location, 0, -1)."'>$location</a> ".implode(" ", $step)."<br>";
}
echo "</small></p>";
}

@ -0,0 +1,35 @@
<?php declare(strict_types=1);
require __DIR__."/../vendor/autoload.php";
$source = file_get_contents("php://input");
try {
$Instance = new parkdown\Parkdown($source, false);
echo $Instance->html();
} catch (parkdown\ParserError $error) {
echo "<pre>";
$message = explode(" ", $error->getMessage());
$location = array_shift($message);
$loc = explode(":", $location);
$file = array_shift($loc);
$line = substr(implode(":", $loc), 0, -1);
[$row, $col] = explode(":", $line);
echo "<a class='error' href=\"javascript: highlight($col, $row);\">$location</a> ".implode(" ", $message);
$stackTrace = explode("\n", $error->getTraceAsString());
echo "<p><small>";
foreach ($stackTrace as $step) {
$step = explode(" ", $step);
array_shift($step);
$location = array_shift($step);
$location = preg_replace("/\(([0-9]+)\):/", ":\$1:", $location);
echo "<a class='error' href='vscode://file/".substr($location, 0, -1)."'>$location</a> ".implode(" ", $step)."<br>";
}
echo "</small></p>";
}

@ -0,0 +1,473 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no">
<title>parkdown playground</title>
<script type="module">
const $ = selector => {
const elements = document.querySelectorAll(selector);
return elements.length < 2 ? elements[0] : elements;
};
const output = $("#output");
const input = $("#input");
const numbers = $(".linenumbers");
const position = $("#position");
const percent = $("#percent");
const editor = $(".editor");
let lastLine = 1;
let timeout;
let lastSelection;
const onLineChange = event => {
const start = input.selectionStart;
if (start === lastSelection)
return;
const linesToCursor = input.value.substr(0, start).split("\n");
const currentLine = linesToCursor.length;
const char = linesToCursor[linesToCursor.length - 1].length;
const ll = $(`#line_${lastLine}`);
if (ll)
ll.classList.remove("active");
$(`#line_${currentLine}`).classList.add("active");
position.innerHTML = `[${currentLine}:${char}]`;
lastLine = currentLine;
lastSelection = start;
}
const updateOutput = (event) => {
const text = event ? event.target.value : input.value;
const lines = text.split("\n").length;
let html = "";
for (let i = 1; i <= lines; i++) {
html += `<span id="line_${i}">${i}</span>`;
}
numbers.innerHTML = html;
onLineChange();
timeout = setTimeout(() => {
fetch("ajax.php", {
method : "POST",
headers: {
"Content-Type" : "application/json"
},
body : text
}).then(response => response.text()).then(text => output.innerHTML = text);
}, 1000);
}
document.addEventListener("DOMContentLoaded", () => {
updateOutput();
for (event of ["click", "change", "keydown", "focus"])
input.addEventListener(event, () => onLineChange());
input.addEventListener("keydown", event => {
if (event.key !== "Tab")
return;
event.preventDefault();
const start = input.selectionStart;
const end = input.selectionEnd;
const value = input.value;
input.value = value.substring(0, start) + "\t" + value.substring(end);
input.selectionStart = input.selectionEnd = (start + 1);
});
input.addEventListener("input", event => {
clearTimeout(timeout);
updateOutput(event);
});
editor.addEventListener("scroll", () => {
const height = input.clientHeight - editor.clientHeight;
const top = editor.scrollTop;
const fromTop = Math.min(100, Math.max(0, Math.round(top * 100 / height)));
percent.innerHTML = `${fromTop}%`;
});
});
window.highlight = (col, row) => {
const lines = input.value.split("\n");
let start = 0;
let end = 0;
let i = 0;
for (const line of lines) {
if (++i === row) {
end = start + line.length;
break;
}
start += line.length + 1;
}
start = start + col - 1 === end ? start : start + col;
input.focus();
input.setSelectionRange(start, end);
const lineHeight = input.clientHeight / lines.length;
$(".editor").scrollTop = lineHeight * (row - 10);
input.scrollLeft = 0;
};
</script>
<style rel="stylesheet">
* {
box-sizing: border-box;
outline: 0 !important;
}
a {
color: dodgerblue;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
a.error {
color: palevioletred;
}
body {
display: grid;
grid-template-columns: 1fr 1fr;
grid-template-rows: auto 30px;
font-family: sans-serif;
margin: 0;
padding: 0;
height: 100vh;
overflow: hidden;
background: #333;
color: #eee;
}
body > * {
width: 100%;
border: solid 1px #111;
padding: 0;
margin: 0;
}
textarea {
width: 100%;
height: 100%;
font-family: monospace;
box-sizing: content-box;
background: transparent;
tab-size: 4;
border: none;
font-size: 1.2rem;
overflow-y: clip;
overflow-x: scroll;
white-space: pre;
resize: none;
margin: 1rem 0;
color: #FAF08B;
}
textarea::selection {
background-color: dodgerblue;
color: white;
}
.editor {
display: grid;
grid-template-columns: 80px auto;
gap: 2rem;
height: 100%;
border-right: none;
overflow-y: auto;
border-bottom: none;
overflow-x: hidden;
}
.linenumbers {
font-size: 1.2rem;
text-align: right;
padding: 1rem 0;
color: #aaa;
font-family: monospace;
background: #222;
}
.linenumbers span {
display: block;
padding: 0 1rem;
}
.linenumbers span.active {
color: yellow;
background: rgba(255,255,255,0.05);
}
.statusbar {
font-size: 0.6rem;
line-height: 30px;
padding: 0 1rem;
background: rgba(0,0,0,0.2);
text-align: right;
border: none;
color: #888;
}
.statusbar > * {
margin-left: 0.5rem;
}
#position {
color: dodgerblue;
font-weight: bold;
}
#output {
overflow-y: auto;
padding: 4rem;
max-width: 100%;
grid-row: span 2;
}
#output code {
word-break: break-word;
white-space: break-spaces;
}
#output table {
width: 100%;
}
#output img {
max-width: 100%;
height: auto;
}
::-webkit-scrollbar {
background-color: transparent;
width: 16px;
}
::-webkit-scrollbar-track {
background-color: transparent;
}
::-webkit-scrollbar-thumb {
background-color: rgba(255,255,255,0.1);
border-radius: 16px;
border: 4px solid #333;
}
::-webkit-scrollbar-button {
display:none;
}
@media (max-width: 920px) {
body {
grid-template-columns: 1fr !important;
grid-template-rows: 1fr 1fr;
}
textarea, .linenumbers {
font-size: 1rem;
}
textarea {
margin: 0.5rem 0;
}
.linenumbers {
padding: 0.5rem;
}
.editor {
grid-template-columns: 50px auto;
gap: 1rem;
}
#output {
padding: 1rem;
}
}
</style>
</head>
<body>
<section class="editor">
<section class="linenumbers">1</section>
<textarea id="input">
# Parkdown
– a simple recursive descent Markdown parser for PHP *(version >= 8.1)*
![Markdown is a simple markup language](https://git.mike-ochmann.de/MassiveDynamic/Parkdown/raw/branch/master/docs/logo_parkdown.svg)
## Specification
### Index
* [Block types](#supported_block_types)
* [Inline types](#supported_inline_types)
* [Examples](#examples)
* [Paragraphs](#paragraphs)
* [Images](#images)
* [Horizontal Rules](#horizontal_rules)
* [Block quotes](#block_quotes)
* [Code blocks](#code_blocks)
* [Tables](#tables)
* [References](#references)
* [Usage](#usage)
* [Testing](#testing)
### Supported block types
Parkdown currently support the following block types:
* codeblocks *(with the ability to specify a language for the code block)*
* tables *(with alignment specification)*
* paragraphs
* block quotes
* lists *(like this one)*
* also nested
* horizontal rules `---`
### Supported inline types
Parkdown currently support the following block types:
* bold text (`**bold**`)
* italic text (`*italic*`)
* code snippets
* images (`![alt text](src url)`)
* links (`[link text][url or reference]`)
### Additional functionality
* references (`[marker]: URL`)
## Examples
### Paragraphs
```markdown
A simple paragraph can contain **bold text**, `inline codeblocks` and *italic text*. We can also link [with a direct url][https://google.com] *(i.e. to google)*
or via reference to [a later defined url][massivedynamic], if we so desire.
```
A simple paragraph can contain **bold text**, `inline codeblocks` and *italic text*. We can also link [with a direct url](https://google.com) *(i.e. to google)*
or via reference to [a later defined url][massivedynamic], if we so desire.
Paragraphs can be annotated with `id` and `class` attributes:
```markdown
Paragraphs can be annotated with ids and classes {.thisIsAClass, .anotherClass, #thisIsAnID}
```
results in
Paragraphs can be annotated with ids and classes {.thisIsAClass, .anotherClass, #thisIsAnID}
```html
<p class="thisIsAClass anotherClass" id="thisIsAnID">
Paragraphs can be annotated with ids and classes
</p>
```
### Images
```markdown
![this is an alt text](https://images.unsplash.com/photo-1571171637578-41bc2dd41cd2?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&h=300&w=1740&q=80\)
```
![this is an alt text](https://images.unsplash.com/photo-1571171637578-41bc2dd41cd2?ixlib=rb-1.2.1&ixid=MnwxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8&auto=format&fit=crop&h=300&w=1740&q=80)
### Horizontal rules
```markdown
---
```
---
### Block quotes
```markdown
> Only two things are infinite,
> the universe and human stupidity,
> i am not totally shure about the universe, though...
> – Albert Einstein
```
> Only two things are infinite,
> the universe and human stupidity,
> i am not totally shure about the universe, though...
> – Albert Einstein
### Code blocks
```markdown
\`\`\`php
function main(int $argc, array $argv) : int {
echo "Hello World!";
return 0;
}
\`\`\`
```
```php
function main(int $argc, array $argv) : int {
echo "Hello World!";
return 0;
}
```
### Tables
```markdown
| Product name | Amount | Price |
|--------------|:--------:|-------:|
| Football | 7 | $18,00 |
| Golfball | 122 | $7,00 |
| Fooseball | 355 | $1,00 |
| Puck | 58 | $12,00 |
```
| Product name | Amount | Price |
|--------------|:--------:|-------:|
| Football | 7 | $18,00 |
| Golfball | 122 | $7,00 |
| Fooseball | 355 | $1,00 |
| Puck | 58 | $12,00 |
### References
```markdown
[massivedynamic]: https://massivedynamic.eu
```
[massivedynamic]: https://massivedynamic.eu
## Usage
Simply construct an new `parkdown\Parkdown` object and pass the Markdown source code to it's constructor. The parsed `DOMDocument` or it's `HTML` output can then be retrieved through the `::html()` and `::tree()` member functions.
**Example**
```php
use parkdown\Parkdown;
$source = "
This is a **bold** word in a paragraph.
";
$parser = new Parkdown($source);
$tree = $parser->tree();
print_r($tree);
echo $parser->html();
```
## Testing
Unit tests can be run via `composer`:
```
composer test
```
</textarea>
</section>
<section id="output"></section>
<section class="statusbar">
<span id="percent">0%</span>
<span id="position">[1:2]</span>
</section>
</body>
</html>

@ -3,29 +3,36 @@
namespace parkdown; namespace parkdown;
class Lexer { class Lexer {
private array $source; private array $source;
private ?string $fileName;
public function __construct(string $sourceCode) { public function __construct(string $sourceCode, ?string $fileName = null) {
$unifiedSource = str_replace(["\r\n", "\r"], "\n", $sourceCode); $this->fileName = $fileName;
$this->source = explode("\n", trim($unifiedSource, "\n")); $unifiedSource = str_replace(["\r\n", "\r"], "\n", $sourceCode);
$unifiedSource = str_replace(" ", "\t", $unifiedSource);
$this->source = explode("\n", trim($unifiedSource, "\n"));
} }
public function tokenize() : array { public function tokenize() : array {
$tokens = []; $tokens = [];
$row = 1;
$col = 1;
foreach ($this->source as $line) { foreach ($this->source as $line) {
if (strlen($line) < 1) { if (strlen($line) < 1) {
array_push($tokens, new Token(TokenType::EOL, "\n")); array_push($tokens, new Token(TokenType::EOL, "\n", [$row, 0, $this->fileName]));
$row++;
continue; continue;
} }
$buffer = ""; $buffer = "";
$number = false; $number = false;
$col = 1;
$clearBuffer = function() use (&$buffer, &$tokens) { $clearBuffer = function() use (&$buffer, &$tokens, $col, $row) {
if (strlen($buffer) < 1) if (strlen($buffer) < 1)
return; return;
array_push($tokens, new Token(TokenType::TEXT, $buffer)); array_push($tokens, new Token(TokenType::TEXT, $buffer, [$col, $row, $this->fileName]));
$buffer = ""; $buffer = "";
}; };
@ -34,66 +41,66 @@ class Lexer {
$clearBuffer(); $clearBuffer();
$number = true; $number = true;
} else if (!is_numeric($char) && $number) { } else if (!is_numeric($char) && $number) {
array_push($tokens, new Token(TokenType::NUMBER, $buffer)); array_push($tokens, new Token(TokenType::NUMBER, $buffer, [$col, $row, $this->fileName]));
$buffer = ""; $buffer = "";
$number = false; $number = false;
} }
switch($char) { switch($char) {
case '#': case '#':
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::HASH, $char)); array_push($tokens, new Token(TokenType::HASH, $char, [$col, $row, $this->fileName]));
break; break;
case '*': case '*':
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::ASTERISK, $char)); array_push($tokens, new Token(TokenType::ASTERISK, $char, [$col, $row, $this->fileName]));
break; break;
case '.': case '.':
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::DOT, $char)); array_push($tokens, new Token(TokenType::DOT, $char, [$col, $row, $this->fileName]));
break; break;
case '-': case '-':
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::MINUS, $char)); array_push($tokens, new Token(TokenType::MINUS, $char, [$col, $row, $this->fileName]));
break; break;
case '`': case '`':
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::BACKTICK, $char)); array_push($tokens, new Token(TokenType::BACKTICK, $char, [$col, $row, $this->fileName]));
break; break;
case '[': case '[':
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::LBRACKET, $char)); array_push($tokens, new Token(TokenType::LBRACKET, $char, [$col, $row, $this->fileName]));
break; break;
case ']': case ']':
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::RBRACKET, $char)); array_push($tokens, new Token(TokenType::RBRACKET, $char, [$col, $row, $this->fileName]));
break; break;
case '(': case '(':
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::LPAREN, $char)); array_push($tokens, new Token(TokenType::LPAREN, $char, [$col, $row, $this->fileName]));
break; break;
case ')': case ')':
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::RPAREN, $char)); array_push($tokens, new Token(TokenType::RPAREN, $char, [$col, $row, $this->fileName]));
break; break;
case '!': case '!':
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::BANG, $char)); array_push($tokens, new Token(TokenType::BANG, $char, [$col, $row, $this->fileName]));
break; break;
case '|': case '|':
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::PIPE, $char)); array_push($tokens, new Token(TokenType::PIPE, $char, [$col, $row, $this->fileName]));
break; break;
case '\\': case '\\':
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::BACKSLASH, $char)); array_push($tokens, new Token(TokenType::BACKSLASH, $char, [$col, $row, $this->fileName]));
break; break;
case '>': case '>':
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::GT, $char)); array_push($tokens, new Token(TokenType::GT, $char, [$col, $row, $this->fileName]));
break; break;
case ' ': case ' ':
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::TAB, $char)); array_push($tokens, new Token(TokenType::TAB, $char, [$col, $row, $this->fileName]));
break; break;
case ':': case ':':
if (str_ends_with($buffer, "http") || str_ends_with($buffer, "https")) { if (str_ends_with($buffer, "http") || str_ends_with($buffer, "https")) {
@ -102,26 +109,33 @@ class Lexer {
} }
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::COLON, $char)); array_push($tokens, new Token(TokenType::COLON, $char, [$col, $row, $this->fileName]));
break; break;
case '{': case '{':
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::LBRACE, $char)); array_push($tokens, new Token(TokenType::LBRACE, $char, [$col, $row, $this->fileName]));
break; break;
case '}': case '}':
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::RBRACE, $char)); array_push($tokens, new Token(TokenType::RBRACE, $char, [$col, $row, $this->fileName]));
break;
case ' ':
$clearBuffer();
array_push($tokens, new Token(TokenType::SPACE, $char, [$col, $row, $this->fileName]));
break; break;
default: default:
$buffer .= $char; $buffer .= $char;
break; break;
} }
$col++;
} }
$clearBuffer(); $clearBuffer();
array_push($tokens, new Token(TokenType::EOL, "\n")); array_push($tokens, new Token(TokenType::EOL, "\n", [$col, $row, $this->fileName]));
$row++;
} }
$clearBuffer(); array_push($tokens, new Token(TokenType::EOF, "\0", [$col, $row, $this->fileName]));
array_push($tokens, new Token(TokenType::EOF, "\0"));
return $tokens; return $tokens;
} }

@ -8,11 +8,11 @@ class Parkdown {
private string $sourceCode; private string $sourceCode;
private DOMDocument $tree_; private DOMDocument $tree_;
public function __construct(string $sourceCode) { public function __construct(string $sourceCode, bool $strict = false, ?string $fileName = null) {
$this->sourceCode = $sourceCode; $this->sourceCode = $sourceCode;
$lexer = new Lexer($this->sourceCode); $lexer = new Lexer($this->sourceCode, $fileName);
$parser = new Parser($lexer->tokenize()); $parser = new Parser($lexer->tokenize(), $strict);
$this->tree_ = $parser->parse(); $this->tree_ = $parser->parse();
} }

@ -6,6 +6,7 @@ use Attribute;
use DOMDocument; use DOMDocument;
use DOMElement; use DOMElement;
use DOMNode; use DOMNode;
use phpDocumentor\Reflection\DocBlock\Tags\Throws;
enum ListType { enum ListType {
case ORDERED; case ORDERED;
@ -24,12 +25,14 @@ class Parser {
private int $pointer; private int $pointer;
private DOMDocument $document; private DOMDocument $document;
private array $references; private array $references;
private bool $strict;
public function __construct(array $tokenStream) { public function __construct(array $tokenStream, $strict = false) {
$this->tokenStream = $tokenStream; $this->tokenStream = $tokenStream;
$this->pointer = 0; $this->pointer = 0;
$this->document = new DOMDocument(); $this->document = new DOMDocument();
$this->references = []; $this->references = [];
$this->strict = $strict;
} }
private function current() : Token { private function current() : Token {
@ -44,6 +47,11 @@ class Parser {
return $this->peek(-1); return $this->peek(-1);
} }
private function insert(Token $token, int $offset = 0) : void {
$newElement = [$token];
array_splice($this->tokenStream, $this->pointer + $offset, 0, $newElement);
}
private function peek(int $amount = 0) : Token { private function peek(int $amount = 0) : Token {
$amount += $this->pointer; $amount += $this->pointer;
if ($amount < 0 || $amount >= count($this->tokenStream)) if ($amount < 0 || $amount >= count($this->tokenStream))
@ -63,6 +71,26 @@ class Parser {
return stripslashes($text); return stripslashes($text);
} }
private static function LOC(array $loc) : string {
[$col, $row, $fileName] = $loc;
$file = $fileName ? $fileName : "INPUT_STRING";
return "$file:$row:$col: ERROR: ";
return " in row $row, column $col of source string";
}
private static function Assert(bool $assertion, Token $token, string $message = "") : void {
assert($assertion, new ParserError(self::LOC($token->location).$message));
}
public static function TextToSlug(string $html) : string {
$out = trim(strip_tags($html));
$out = strtolower($out);
$out = str_replace(" ", "_", $out);
return $out;
}
private function resolveReferences(DOMElement $node) : void { private function resolveReferences(DOMElement $node) : void {
if (count($this->references) < 1) if (count($this->references) < 1)
return; return;
@ -92,34 +120,68 @@ class Parser {
private function parseBold() : DOMNode { private function parseBold() : DOMNode {
$buffer = ""; $buffer = "";
while ($this->current()->type !== TokenType::ASTERISK && $this->current()->type !== TokenType::EOL) { while ($this->current()->type !== TokenType::ASTERISK) {
if ($this->current()->type === TokenType::EOL || $this->current()->type === TokenType::EOF) {
if (!$this->strict) {
[$col, $row, $fileName] = $this->current()->location;
$this->insert(new Token(TokenType::ASTERISK, "*", [$col + 1, $row, $fileName]));
$this->insert(new Token(TokenType::ASTERISK, "*", [$col + 2, $row, $fileName]));
}
break;
}
$buffer .= $this->consume()->data; $buffer .= $this->consume()->data;
} }
$this->consume(); if (!$this->strict && $this->current()->type !== TokenType::ASTERISK)
$this->consume(); $this->insert(new Token(TokenType::ASTERISK, "*", $this->current()->location));
$asterisk = $this->consume();
self::Assert($asterisk->type === TokenType::ASTERISK, $asterisk, "expected asterisk, got ".$asterisk->type->name);
if (!$this->strict && $this->current()->type !== TokenType::ASTERISK)
$this->insert(new Token(TokenType::ASTERISK, "*", $this->current()->location));
$asterisk = $this->consume();
self::Assert($asterisk->type === TokenType::ASTERISK, $asterisk, "expected asterisk, got ".$asterisk->type->name);
return $this->document->createElement("b", $buffer); return $this->document->createElement("b", $buffer);
} }
private function parseItalic() : DOMNode { private function parseItalic() : DOMNode {
$buffer = ""; $buffer = "";
while ($this->current()->type !== TokenType::ASTERISK && $this->current()->type !== TokenType::EOL) { while ($this->current()->type !== TokenType::ASTERISK) {
if ($this->current()->type === TokenType::EOL || $this->current()->type === TokenType::EOF) {
if (!$this->strict) {
[$col, $row, $fileName] = $this->current()->location;
$this->insert(new Token(TokenType::ASTERISK, "*", [$col + 1, $row, $fileName]));
}
break;
}
$buffer .= $this->consume()->data; $buffer .= $this->consume()->data;
} }
$this->consume(); $asterisk = $this->consume();
self::Assert($asterisk->type === TokenType::ASTERISK, $asterisk, "expected asterisk, got ".$asterisk->type->name);
return $this->document->createElement("i", $buffer); return $this->document->createElement("i", $buffer);
} }
private function parseCode() : DOMNode { private function parseCode() : DOMNode {
$buffer = ""; $buffer = "";
$this->consume(); $backtick = $this->consume();
while ($this->current()->type !== TokenType::BACKTICK && $this->current()->type !== TokenType::EOL) self::Assert($backtick->type === TokenType::BACKTICK, $backtick, "expected backtick, got ".$backtick->type->name);
while ($this->current()->type !== TokenType::BACKTICK) {
// we need to recover, if input is malformed
if ($this->current()->type === TokenType::EOL || $this->current()->type === TokenType::EOF) {
if (!$this->strict)
$this->insert(new Token(TokenType::BACKTICK, "`", $this->current()->location));
break;
}
$buffer .= $this->consume()->data; $buffer .= $this->consume()->data;
}
$this->consume(); $backtick = $this->consume();
self::Assert($backtick->type === TokenType::BACKTICK, $backtick, "inline code expression not autmatically closed (expected backtick)");
return $this->document->createElement("code", $buffer); return @$this->document->createElement("code", $buffer);
} }
private function parseLink() : ?DOMNode { private function parseLink() : ?DOMNode {
@ -127,11 +189,13 @@ class Parser {
$consumption = 1; $consumption = 1;
$lbracket = $this->consume(); $lbracket = $this->consume();
self::Assert($lbracket->type === TokenType::LBRACKET, $lbracket, "expected left bracket, got ".$lbracket->type->name);
while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::EOL)) { while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::EOL)) {
$text .= $this->consume()->data; $text .= $this->consume()->data;
$consumption++; $consumption++;
} }
$rbracket = $this->consume(); $rbracket = $this->consume();
self::Assert($rbracket->type === TokenType::RBRACKET, $rbracket, "expected right bracket, got ".$rbracket->type->name);
$consumption++; $consumption++;
if ($this->current()->type !== TokenType::LBRACKET && $this->current()->type !== TokenType::LPAREN) { if ($this->current()->type !== TokenType::LBRACKET && $this->current()->type !== TokenType::LPAREN) {
@ -139,6 +203,7 @@ class Parser {
return null; return null;
} }
$lbracketOrParen = $this->consume(); $lbracketOrParen = $this->consume();
self::Assert($lbracketOrParen->type === TokenType::LBRACKET || $lbracketOrParen->type === TokenType::LPAREN, $lbracketOrParen, "expected left bracket or left parenthesis, got ".$lbracketOrParen->type->name);
$index = ""; $index = "";
while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::RPAREN || $this->current()->type === TokenType::EOL)) while (!($this->current()->type === TokenType::RBRACKET || $this->current()->type === TokenType::RPAREN || $this->current()->type === TokenType::EOL))
@ -184,12 +249,16 @@ class Parser {
array_push($elms, $this->parseBold()); array_push($elms, $this->parseBold());
} else { } else {
$this->consume(); $this->consume();
array_push($elms, $this->parseItalic()); array_push($elms, $this->parseItalic());
} }
continue; continue;
} elseif ($this->current()->type === TokenType::BACKTICK) { } elseif ($this->current()->type === TokenType::BACKTICK) {
$clearBuffer(); $clearBuffer();
array_push($elms, $this->parseCode()); $code = $this->parseCode();
self::Assert($code !== false, $this->current(), "malformed code block");
array_push($elms, $code);
continue; continue;
} elseif ($this->current()->type === TokenType::LBRACKET) { } elseif ($this->current()->type === TokenType::LBRACKET) {
$links = $this->parseLink(); $links = $this->parseLink();
@ -203,11 +272,14 @@ class Parser {
continue; continue;
} elseif ($this->current()->type === TokenType::BANG) { } elseif ($this->current()->type === TokenType::BANG) {
$bang = $this->consume(); $bang = $this->consume();
self::Assert($bang->type === TokenType::BANG, $bang, "expected exclamation mark, got ".$bang->type->name.", this may be a parser bug");
if ($this->current()->type !== TokenType::LBRACKET) { if ($this->current()->type !== TokenType::LBRACKET) {
$buffer .= self::StripBackslashes($this->consume()->data); $buffer .= self::StripBackslashes($this->consume()->data);
continue; continue;
} }
$lbracket = $this->consume(); $lbracket = $this->consume();
self::Assert($lbracket->type === TokenType::LBRACKET, $lbracket, "expected left bracket, got ".$lbracket->type->name);
$alt = ""; $alt = "";
while ($this->current()->type !== TokenType::RBRACKET && $this->current()->type !== TokenType::EOL) while ($this->current()->type !== TokenType::RBRACKET && $this->current()->type !== TokenType::EOL)
$alt .= self::StripBackslashes($this->consume()->data); $alt .= self::StripBackslashes($this->consume()->data);
@ -218,7 +290,10 @@ class Parser {
continue; continue;
} }
$rbracket = $this->consume(); $rbracket = $this->consume();
self::Assert($rbracket->type === TokenType::RBRACKET, $rbracket, "expected right bracket, got ".$rbracket->type->name);
$lparen = $this->consume(); $lparen = $this->consume();
self::Assert($lparen->type === TokenType::LPAREN, $lparen, "expected left parenthesis, got ".$lparen->type->name);
$src = ""; $src = "";
while ($this->current()->type !== TokenType::RPAREN && $this->current()->type !== TokenType::EOL) while ($this->current()->type !== TokenType::RPAREN && $this->current()->type !== TokenType::EOL)
$src .= $this->consume()->data; $src .= $this->consume()->data;
@ -228,6 +303,8 @@ class Parser {
continue; continue;
} }
$rparen = $this->consume(); $rparen = $this->consume();
self::Assert($rparen->type === TokenType::RPAREN, $rparen, "expected right parenthesis, got ".$rparen->type->name);
$elm = $this->document->createElement("img"); $elm = $this->document->createElement("img");
if (strlen($alt) > 0) if (strlen($alt) > 0)
$elm->setAttribute("alt", $alt); $elm->setAttribute("alt", $alt);
@ -235,9 +312,10 @@ class Parser {
$clearBuffer(); $clearBuffer();
array_push($elms, $elm); array_push($elms, $elm);
continue; continue;
} elseif ($this->current()->type === TokenType::LBRACE) { } elseif ($this->current()->type === TokenType::LBRACE) {
$lbrace = $this->consume(); $lbrace = $this->consume();
assert($lbrace->type === TokenType::LBRACE, "expected left brace, got ".$lbrace->type->name); self::Assert($lbrace->type === TokenType::LBRACE, $lbrace, "expected left brace, got ".$lbrace->type->name);
$content = ""; $content = "";
while ($this->current()->type !== TokenType::EOF && while ($this->current()->type !== TokenType::EOF &&
@ -246,7 +324,7 @@ class Parser {
$content .= $this->consume()->data; $content .= $this->consume()->data;
} }
$rbrace = $this->consume(); $rbrace = $this->consume();
assert($rbrace->type === TokenType::RBRACE, "expected right brace, got ".$rbrace->type->name); self::Assert($rbrace->type === TokenType::RBRACE, $rbrace, "expected right brace, got ".$rbrace->type->name);
$attributes = array_map(function($element) { $attributes = array_map(function($element) {
return trim($element); return trim($element);
@ -270,6 +348,18 @@ class Parser {
} }
array_push($elms, $obj); array_push($elms, $obj);
} elseif ($this->current()->type === TokenType::SPACE) { // do linebreak when two spaces are at the EOL
if ($this->last()->type === TokenType::SPACE && $this->next()->type === TokenType::EOL) {
$this->consume();
$clearBuffer();
$elm = $this->document->createElement("br");
array_push($elms, $elm);
continue;
} else {
$this->consume();
$buffer .= " ";
}
} else } else
$buffer .= self::StripBackslashes($this->consume()->data); $buffer .= self::StripBackslashes($this->consume()->data);
} }
@ -296,15 +386,18 @@ class Parser {
if ($this->current()->type === TokenType::EOF) if ($this->current()->type === TokenType::EOF)
break; break;
// then we except an asterisk or a number followed by a period // then we expect an asterisk or a number followed by a period
if ($type === ListType::UNORDERED) { if ($type === ListType::UNORDERED) {
$asterisk = $this->consume(); if ($this->current()->type === TokenType::ASTERISK)
assert($asterisk->type === TokenType::ASTERISK, "expected asterisk, got ".$asterisk->type->name); $this->consume();
} else { } else {
$number = $this->consume(); if ($this->current()->type === TokenType::NUMBER) {
assert($number->type === TokenType::NUMBER, "expected number, got ".$number->type->name); $this->consume();
$period = $this->consume(); if ($this->strict && $this->current()->type !== TokenType::DOT)
assert($period->type === TokenType::DOT, "expected period, got ".$period->type->name); $this->insert(new Token(TokenType::DOT, ".", $this->current()->location));
$period = $this->consume();
self::Assert($period->type === TokenType::DOT, $period, "expected period, got ".$period->type->name);
}
} }
// then we parse the node content // then we parse the node content
@ -317,7 +410,7 @@ class Parser {
// if so, we want to append a sub list to the current item // if so, we want to append a sub list to the current item
// here should be a EOL // here should be a EOL
assert($this->current()->type === TokenType::EOL, "expected EOL, got ".$this->current()->type->name); self::Assert($this->current()->type === TokenType::EOL, $this->current(), "expected EOL, got ".$this->current()->type->name);
$this->consume(); $this->consume();
$nextLevel = 0; $nextLevel = 0;
@ -378,6 +471,7 @@ class Parser {
foreach ($this->parseText() as $node) foreach ($this->parseText() as $node)
if ($node instanceof DOMNode) if ($node instanceof DOMNode)
$elm->appendChild($node); $elm->appendChild($node);
$elm->setAttribute("id", self::TextToSlug($elm->textContent));
$this->document->appendChild($elm); $this->document->appendChild($elm);
} }
@ -502,6 +596,8 @@ class Parser {
$head = $this->parseTableHead(); $head = $this->parseTableHead();
$props = $this->parseTableAlignment(); $props = $this->parseTableAlignment();
self::Assert(count($props) === count($head->childNodes), $this->current(), "the number of alignment columns does not match the number of header columns");
$i = 0; $i = 0;
foreach($head->childNodes as $col) { foreach($head->childNodes as $col) {
$col->setAttribute("style", "text-align: ".$props[$i]); $col->setAttribute("style", "text-align: ".$props[$i]);
@ -573,9 +669,11 @@ class Parser {
$this->parseHeading(); $this->parseHeading();
break; break;
case TokenType::NUMBER: case TokenType::NUMBER:
$list = $this->parseList(ListType::ORDERED); if ($this->next()->type === TokenType::DOT) {
$this->document->appendChild($list); $list = $this->parseList(ListType::ORDERED);
break; $this->document->appendChild($list);
break;
}
case TokenType::BACKTICK: case TokenType::BACKTICK:
$this->parseCodeBlock(); $this->parseCodeBlock();
break; break;

@ -0,0 +1,7 @@
<?php declare(strict_types=1);
namespace parkdown;
use AssertionError;
class ParserError extends AssertionError {}

@ -24,14 +24,17 @@ enum TokenType {
case TAB ; case TAB ;
case LBRACE ; case LBRACE ;
case RBRACE ; case RBRACE ;
case SPACE ;
} }
class Token { class Token {
public TokenType $type; public TokenType $type;
public string $data; public string $data;
public array $location;
public function __construct(TokenType $type, string $data = "") { public function __construct(TokenType $type, string $data = "", array $location = []) {
$this->type = $type; $this->type = $type;
$this->data = $data; $this->data = $data;
$this->location = $location;
} }
} }

@ -12,11 +12,11 @@ final class AnnotationsTest extends TestCase {
##### This is an H5 {.withAClass} ##### This is an H5 {.withAClass}
"; ";
$target = " $target = "
<h1>This is an H1</h1> <h1 id=\"this_is_an_h1\">This is an H1</h1>
<h2>This is an H2</h2> <h2 id=\"this_is_an_h2\">This is an H2</h2>
<h3>This is an H3</h3> <h3 id=\"this_is_an_h3\">This is an H3</h3>
<h4>This is an H4</h4> <h4 id=\"this_is_an_h4\">This is an H4</h4>
<h5>This is an H5</h5> <h5 id=\"this_is_an_h5\">This is an H5</h5>
"; ";
[$source, $result] = createTest($source, $target); [$source, $result] = createTest($source, $target);

@ -8,7 +8,7 @@ final class HeadingsTest extends TestCase {
# This is an H1 # This is an H1
"; ";
$target = " $target = "
<h1>This is an H1</h1> <h1 id=\"this_is_an_h1\">This is an H1</h1>
"; ";
[$source, $result] = createTest($source, $target); [$source, $result] = createTest($source, $target);
@ -20,7 +20,7 @@ final class HeadingsTest extends TestCase {
## This is an H2 ## This is an H2
"; ";
$target = " $target = "
<h2>This is an H2</h2> <h2 id=\"this_is_an_h2\">This is an H2</h2>
"; ";
[$source, $result] = createTest($source, $target); [$source, $result] = createTest($source, $target);
@ -32,7 +32,7 @@ final class HeadingsTest extends TestCase {
### This is an H3 ### This is an H3
"; ";
$target = " $target = "
<h3>This is an H3</h3> <h3 id=\"this_is_an_h3\">This is an H3</h3>
"; ";
[$source, $result] = createTest($source, $target); [$source, $result] = createTest($source, $target);
@ -44,7 +44,7 @@ final class HeadingsTest extends TestCase {
#### This is an H4 #### This is an H4
"; ";
$target = " $target = "
<h4>This is an H4</h4> <h4 id=\"this_is_an_h4\">This is an H4</h4>
"; ";
[$source, $result] = createTest($source, $target); [$source, $result] = createTest($source, $target);
@ -56,7 +56,7 @@ final class HeadingsTest extends TestCase {
##### This is an H5 ##### This is an H5
"; ";
$target = " $target = "
<h5>This is an H5</h5> <h5 id=\"this_is_an_h5\">This is an H5</h5>
"; ";
[$source, $result] = createTest($source, $target); [$source, $result] = createTest($source, $target);

Loading…
Cancel
Save