~aasg/haunted-blog

ref: ddc39469a1b6ea301e20815b2cedc4491472c9df haunted-blog/scm/aasg/sxml/reparse-extended-markdown.scm -rw-r--r-- 4.1 KiB
ddc39469 — Aluísio Augusto Silva Gonçalves Apply SPDX copyright and license headers in compliance with REUSE 9 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
; SPDX-FileCopyrightText: 2020 Aluísio Augusto Silva Gonçalves <https://aasg.name>
; SPDX-License-Identifier: GPL-3.0-or-later

(define-module (aasg sxml reparse-extended-markdown)
               #:use-module (ice-9 match)
               #:use-module (ice-9 regex)
               #:use-module (ice-9 vlist)
               #:use-module (srfi srfi-1)
               #:use-module (srfi srfi-11)
               #:use-module (sxml transform)
               #:export (reparse-markdown-sxml))

(define (parse-heading-attributes . sxml)
  ; Split heading attributes off the heading text proper.
  ; The heading text and a list of (unparsed) attributes are returned
  ; as multiple values.
  ;
  ; Note that spaces are not supported in values (even quoted ones).
  (define (extract-attributes str)
    (let ((match (string-match "[[:space:]]*\\{([^\\{\\}]*)\\}$" str)))
      (if match
          (values
            (match:prefix match)
            (string-split (match:substring match 1) char-whitespace?))
          (values str '()))))
  ; Parse a list of attributes and return a VHash mapping attribute
  ; keys to individual values.
  (define (build-attributes-dict attrs)
    (fold (lambda (attr attr-dict)
            (let ((id-match (string-match "#([^[:space:]]+)" attr))
                  (class-match (string-match "\\.([^[:space:]]+)" attr))
                  (kvu-match (string-match "([^\"'[:space:]=/>]+)=([^\"'[:space:]]+)" attr))
                  (kvs-match (string-match "([^\"'[:space:]=/>]+)='([^']+)'" attr))
                  (kvd-match (string-match "([^\"'[:space:]=/>]+)=\"([^']+)\"" attr)))
              (cond (id-match (vhash-consq 'id (match:substring id-match 1) attr-dict))
                    (class-match (vhash-consq 'class (match:substring class-match 1) attr-dict))
                    (kvu-match (vhash-consq (string->symbol (match:substring kvu-match 1)) (match:substring kvu-match 2) attr-dict))
                    (kvs-match (vhash-consq (string->symbol (match:substring kvs-match 1)) (match:substring kvs-match 2) attr-dict))
                    (kvd-match (vhash-consq (string->symbol (match:substring kvd-match 1)) (match:substring kvd-match 2) attr-dict))
                    (else attr-dict))))
          vlist-null
          attrs))
  ; Merge attributes with the same key in the VHash, concatenating
  ; repeated attributes with whitespace.  The result is a list of
  ; key-value lists (not pairs) so it can be fed directly to SXML.
  (define (fold-attributes-dict attr-dict)
    (map (lambda (key)
           (list key (string-join (vhash-foldq* cons '() key attr-dict) " ")))
         (vhash-fold (lambda (key _ prev-keys) (lset-adjoin eq? prev-keys key)) '() attr-dict)))
  (match sxml
         ((tag middle ... tail)
          (let*-values (((before attrs) (extract-attributes tail))
                        ((parsed-attrs) (build-attributes-dict attrs))
                        ((folded-attrs) (fold-attributes-dict parsed-attrs)))
            (let ((sxml-attrs
                    (if (vhash-assq 'id parsed-attrs)
                        (cons '@ folded-attrs)
                        ; TODO: devise a deterministic ID generator (probably hashing the XML representation)
                        ;(cons* '@ (list 'id (generate-identifier sxml)) folded-attrs))))
                        (cons '@ folded-attrs))))
              `(,tag ,sxml-attrs ,@middle ,before))))
         (_ sxml)))

(define (remove-single-paragraphs-from-lists . sxml)
  (match sxml
         (`(li (p ,text ...))
           `(li ,text))
         (`(li (p ,text ...) (ul ,sublist))
           `(li ,text (ul ,sublist)))
         (_ sxml)))

(define (reparse-markdown-sxml sxml)
  (pre-post-order sxml `((h1 . ,parse-heading-attributes)
                         (h2 . ,parse-heading-attributes)
                         (h3 . ,parse-heading-attributes)
                         (h4 . ,parse-heading-attributes)
                         (h5 . ,parse-heading-attributes)
                         (h6 . ,parse-heading-attributes)
                         (li . ,remove-single-paragraphs-from-lists)
                         (*default* . ,(lambda (. sxml) sxml))
                         (*text* . ,(lambda (_ text) text)))))