~ancarda/gemtext-parser

gemtext-parser/src/Parser.php -rw-r--r-- 2.2 KiB
356266afMark Dain Add contributing guidelines 7 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
<?php

declare(strict_types=1);

namespace Ancarda\Gemini\Gemtext;

use Generator;

final class Parser implements ParserInterface
{
    public function parse(iterable $lines): Generator
    {
        $inPreBlock = false;
        $preContents = '';
        $preCaption = null;

        foreach ($lines as $line) {
            if (str_starts_with($line, '```')) {
                if ($inPreBlock) {
                    // This is the end of the pre block, yield everything collected so far.
                    // The last byte of preContents is ignored as it is just a newline character.
                    yield new Node\Preformatted(substr($preContents, 0, -1), $preCaption);
                    $preContents = '';
                } else {
                    // This is the start of the pre block. Extract the caption if there is one.
                    $cap = substr($line, 3);
                    $preCaption = strlen($cap) === 0 ? null : $cap;
                }
                $inPreBlock = !$inPreBlock;
            } elseif ($inPreBlock) {
                $preContents .= $line . "\n";
            } elseif (str_starts_with($line, '# ')) {
                yield new Node\H1(substr($line, 2));
            } elseif (str_starts_with($line, '## ')) {
                yield new Node\H2(substr($line, 3));
            } elseif (str_starts_with($line, '### ')) {
                yield new Node\H3(substr($line, 4));
            } elseif (str_starts_with($line, '>')) {
                yield new Node\Blockquote(substr($line, 1));
            } elseif (str_starts_with($line, '* ')) {
                yield new Node\ListElement(substr($line, 2));
            } elseif (str_starts_with($line, '=>')) {
                // Lop off `=>', then try to extract the label if there is one.
                // Labels are everything after the first space.
                $line = trim(substr($line, 2));
                $space = strpos($line, ' ');
                if ($space === false) {
                    yield new Node\Link($line);
                } else {
                    yield new Node\Link(substr($line, 0, $space), trim(substr($line, $space)));
                }
            } else {
                yield new Node\Paragraph($line);
            }
        }
    }
}