~aasg/haunted-blog

9619879d35e6229e6c27fa1bd98e7204bd831c73 — Aluísio Augusto Silva Gonçalves 1 year, 2 months ago c6ffdcb
haunt: Split utility modules off haunt.scm

With the upcoming TOC generation, the file will get too big for proper
navigation.
3 files changed, 125 insertions(+), 102 deletions(-)

M haunt.scm
A scm/aasg/sxml/from-pandoc.scm
A scm/aasg/sxml/reparse-extended-markdown.scm
M haunt.scm => haunt.scm +15 -102
@@ 3,7 3,10 @@
;---------
;+ Imports

(use-modules (haunt asset)
(add-to-load-path (string-append (dirname (current-filename)) "/scm"))
(use-modules (aasg sxml from-pandoc)
             (aasg sxml reparse-extended-markdown)
             (haunt asset)
             (haunt builder blog)
             (haunt builder atom)
             (haunt builder assets)


@@ 205,116 208,26 @@
;---------------------
;+ Readers and writers

(define (transform-markdown-tree sxml)
  (define (parse-heading-attributes . sxml)
    ; Split heading attributes off the heading text proper.
    ; The heading text and a list of (unparsed) attributes are returned
    ; as multiple values.
    ;
    ; Note that spaces are not supported in values (even quoted ones).
    (define (extract-attributes str)
      (let ((match (string-match "[[:space:]]*\\{([^\\{\\}]*)\\}$" str)))
        (if match
            (values
              (match:prefix match)
              (string-split (match:substring match 1) char-whitespace?))
            (values str '()))))
    ; Parse a list of attributes and return a VHash mapping attribute
    ; keys to individual values.
    (define (build-attributes-dict attrs)
      (fold (lambda (attr attr-dict)
              (let ((id-match (string-match "#([^[:space:]]+)" attr))
                    (class-match (string-match "\\.([^[:space:]]+)" attr))
                    (kvu-match (string-match "([^\"'[:space:]=/>]+)=([^\"'[:space:]]+)" attr))
                    (kvs-match (string-match "([^\"'[:space:]=/>]+)='([^']+)'" attr))
                    (kvd-match (string-match "([^\"'[:space:]=/>]+)=\"([^']+)\"" attr)))
                (cond (id-match (vhash-consq 'id (match:substring id-match 1) attr-dict))
                      (class-match (vhash-consq 'class (match:substring class-match 1) attr-dict))
                      (kvu-match (vhash-consq (string->symbol (match:substring kvu-match 1)) (match:substring kvu-match 2) attr-dict))
                      (kvs-match (vhash-consq (string->symbol (match:substring kvs-match 1)) (match:substring kvs-match 2) attr-dict))
                      (kvd-match (vhash-consq (string->symbol (match:substring kvd-match 1)) (match:substring kvd-match 2) attr-dict))
                      (else attr-dict))))
            vlist-null
            attrs))
    ; Merge attributes with the same key in the VHash, concatenating
    ; repeated attributes with whitespace.  The result is a list of
    ; key-value lists (not pairs) so it can be fed directly to SXML.
    (define (fold-attributes-dict attr-dict)
      (map (lambda (key)
             (list key (string-join (vhash-foldq* cons '() key attr-dict) " ")))
           (vhash-fold (lambda (key _ prev-keys) (lset-adjoin eq? prev-keys key)) '() attr-dict)))
    (match sxml
           ((tag middle ... tail)
            (let*-values (((before attrs) (extract-attributes tail))
                          ((parsed-attrs) (build-attributes-dict attrs))
                          ((folded-attrs) (fold-attributes-dict parsed-attrs)))
              (let ((sxml-attrs
                      (if (vhash-assq 'id parsed-attrs)
                          (cons '@ folded-attrs)
                          ; TODO: devise a deterministic ID generator (probably hashing the XML representation)
                          ;(cons* '@ (list 'id (generate-identifier sxml)) folded-attrs))))
                          (cons '@ folded-attrs))))
                `(,tag ,sxml-attrs ,@middle ,before))))
           (_ sxml)))
  (define (remove-single-paragraphs-from-lists . sxml)
    (match sxml
           (`(li (p ,text ...))
            `(li ,text))
           (`(li (p ,text ...) (ul ,sublist))
            `(li ,text (ul ,sublist)))
           (_ sxml)))
  (pre-post-order sxml `((h1 . ,parse-heading-attributes)
                         (h2 . ,parse-heading-attributes)
                         (h3 . ,parse-heading-attributes)
                         (h4 . ,parse-heading-attributes)
                         (h5 . ,parse-heading-attributes)
                         (h6 . ,parse-heading-attributes)
                         (li . ,remove-single-paragraphs-from-lists)
                        (*default* . ,(lambda (. sxml) sxml))
                        (*text* . ,(lambda (_ text) text)))))

(define extended-markdown-reader
  (make-reader
    (make-file-extension-matcher "md")
    (lambda (filename)
      (let*-values (((read-commonmark) (reader-proc commonmark-reader))
                    ((metadata orig-tree) (read-commonmark filename))
                    ((new-tree) (transform-markdown-tree orig-tree)))
                    ((new-tree) (reparse-markdown-sxml orig-tree)))
        (values metadata new-tree)))))

(define pandoc-reader
  (make-reader
    (make-file-extension-matcher "md")
    (lambda (filename)
      (define (read-pandoc filename)
        (let* ((html-port (open-pipe* OPEN_READ "pandoc" "--read=markdown" "--write=html" "--" filename))
               (html-string (get-string-all html-port)))
          (if (not (eqv? 0 (status:exit-val (close-pipe html-port))))
              (error "pandoc failed for " filename)
              html-string)))
      (define (html-fragment->sxml fragments)
        ; This is taken from haunt/reader.scm, © 2015 David Thompson.
        (define (read-html-fragments port)
          (let loop ()
            (let ((next-char (peek-char port)))
              (cond
                ((eof-object? next-char)
                 '())
                ((char-set-contains? char-set:whitespace next-char)
                 (read-char port)
                 (loop))
                (else
                  (match (xml->sxml port)
                         (('*TOP* sxml) (cons sxml (loop)))))))))
        (call-with-input-string fragments read-html-fragments))
      (let ((metadata (call-with-input-file filename
                                            (lambda (port)
                                              (let ((header (read-line port 'concat)))
                                                (unless (string=? header "---\n")
                                                  (unread-string header port)))
                                              (read-metadata-headers port))))
            (html-fragments (read-pandoc filename)))
        (values metadata (html-fragment->sxml html-fragments))))))
  (let ((read-yaml-preamble (lambda (filename)
                              (call-with-input-file filename (lambda (port)
                                                               (let ((header (read-line port 'concat)))
                                                                 (unless (string=? header "---\n")
                                                                   (unread-string header port)))
                                                               (read-metadata-headers port))))))
    (make-reader
      (make-file-extension-matcher "md")
      (lambda (filename)
        (values (read-yaml-preamble filename) (pandoc-md->sxml filename))))))

(define (uncollected-pages directory theme)
  (lambda (site posts)

A scm/aasg/sxml/from-pandoc.scm => scm/aasg/sxml/from-pandoc.scm +32 -0
@@ 0,0 1,32 @@
(define-module (aasg sxml from-pandoc)
               #:use-module (ice-9 match)
               #:use-module (ice-9 popen)
               #:use-module (ice-9 textual-ports)
               #:use-module (ice-9 vlist)
               #:use-module (srfi srfi-1)
               #:use-module (sxml simple)
               #:export (pandoc-md->sxml))

(define (pandoc filename)
  (let* ((html-port (open-pipe* OPEN_READ "pandoc" "--read=markdown" "--write=html" "--" filename))
         (html-string (get-string-all html-port)))
    (if (not (eqv? 0 (status:exit-val (close-pipe html-port))))
        (error "pandoc failed for " filename)
        html-string)))

; This is taken from haunt/reader.scm, © 2015 David Thompson.
(define (read-html-fragments port)
  (let loop ()
    (let ((next-char (peek-char port)))
      (cond
        ((eof-object? next-char)
         '())
        ((char-set-contains? char-set:whitespace next-char)
         (read-char port)
         (loop))
        (else
          (match (xml->sxml port)
                 (('*TOP* sxml) (cons sxml (loop)))))))))

(define (pandoc-md->sxml filename)
  (call-with-input-string (pandoc filename) read-html-fragments))

A scm/aasg/sxml/reparse-extended-markdown.scm => scm/aasg/sxml/reparse-extended-markdown.scm +78 -0
@@ 0,0 1,78 @@
(define-module (aasg sxml reparse-extended-markdown)
               #:use-module (ice-9 match)
               #:use-module (ice-9 regex)
               #:use-module (ice-9 vlist)
               #:use-module (srfi srfi-1)
               #:use-module (srfi srfi-11)
               #:use-module (sxml transform)
               #:export (reparse-markdown-sxml))

(define (parse-heading-attributes . sxml)
  ; Split heading attributes off the heading text proper.
  ; The heading text and a list of (unparsed) attributes are returned
  ; as multiple values.
  ;
  ; Note that spaces are not supported in values (even quoted ones).
  (define (extract-attributes str)
    (let ((match (string-match "[[:space:]]*\\{([^\\{\\}]*)\\}$" str)))
      (if match
          (values
            (match:prefix match)
            (string-split (match:substring match 1) char-whitespace?))
          (values str '()))))
  ; Parse a list of attributes and return a VHash mapping attribute
  ; keys to individual values.
  (define (build-attributes-dict attrs)
    (fold (lambda (attr attr-dict)
            (let ((id-match (string-match "#([^[:space:]]+)" attr))
                  (class-match (string-match "\\.([^[:space:]]+)" attr))
                  (kvu-match (string-match "([^\"'[:space:]=/>]+)=([^\"'[:space:]]+)" attr))
                  (kvs-match (string-match "([^\"'[:space:]=/>]+)='([^']+)'" attr))
                  (kvd-match (string-match "([^\"'[:space:]=/>]+)=\"([^']+)\"" attr)))
              (cond (id-match (vhash-consq 'id (match:substring id-match 1) attr-dict))
                    (class-match (vhash-consq 'class (match:substring class-match 1) attr-dict))
                    (kvu-match (vhash-consq (string->symbol (match:substring kvu-match 1)) (match:substring kvu-match 2) attr-dict))
                    (kvs-match (vhash-consq (string->symbol (match:substring kvs-match 1)) (match:substring kvs-match 2) attr-dict))
                    (kvd-match (vhash-consq (string->symbol (match:substring kvd-match 1)) (match:substring kvd-match 2) attr-dict))
                    (else attr-dict))))
          vlist-null
          attrs))
  ; Merge attributes with the same key in the VHash, concatenating
  ; repeated attributes with whitespace.  The result is a list of
  ; key-value lists (not pairs) so it can be fed directly to SXML.
  (define (fold-attributes-dict attr-dict)
    (map (lambda (key)
           (list key (string-join (vhash-foldq* cons '() key attr-dict) " ")))
         (vhash-fold (lambda (key _ prev-keys) (lset-adjoin eq? prev-keys key)) '() attr-dict)))
  (match sxml
         ((tag middle ... tail)
          (let*-values (((before attrs) (extract-attributes tail))
                        ((parsed-attrs) (build-attributes-dict attrs))
                        ((folded-attrs) (fold-attributes-dict parsed-attrs)))
            (let ((sxml-attrs
                    (if (vhash-assq 'id parsed-attrs)
                        (cons '@ folded-attrs)
                        ; TODO: devise a deterministic ID generator (probably hashing the XML representation)
                        ;(cons* '@ (list 'id (generate-identifier sxml)) folded-attrs))))
                        (cons '@ folded-attrs))))
              `(,tag ,sxml-attrs ,@middle ,before))))
         (_ sxml)))

(define (remove-single-paragraphs-from-lists . sxml)
  (match sxml
         (`(li (p ,text ...))
           `(li ,text))
         (`(li (p ,text ...) (ul ,sublist))
           `(li ,text (ul ,sublist)))
         (_ sxml)))

(define (reparse-markdown-sxml sxml)
  (pre-post-order sxml `((h1 . ,parse-heading-attributes)
                         (h2 . ,parse-heading-attributes)
                         (h3 . ,parse-heading-attributes)
                         (h4 . ,parse-heading-attributes)
                         (h5 . ,parse-heading-attributes)
                         (h6 . ,parse-heading-attributes)
                         (li . ,remove-single-paragraphs-from-lists)
                         (*default* . ,(lambda (. sxml) sxml))
                         (*text* . ,(lambda (_ text) text)))))