~jleightcap/hhtml

ref: d2020239c517c1c9b5c944eaa598dea1f2b7adef hhtml/src/Markup/Parse.hs -rw-r--r-- 4.0 KiB
d2020239jleightcap applicative / fmap in parsing 4 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
-- Parse.hs
module Markup.Parse
  ( ast
  , asts
  , bold
  , codeBlock
  , codeInline
  , condense
  , config
  , configElem
  , document
  , format
  , italic
  , line
  , paragraph
  , parseMarkup
  , parseWithEof
  , parseWithLeftover
  , plain
  , strikeout
  , text
  , url
  ) where

import Control.Monad (void)
import Document
import Text.ParserCombinators.Parsec

--
-- parsec parsing
--
whitespace :: Parser ()
whitespace = void $ many1 $ oneOf " \n\t"

eol :: Parser String
eol = string "\n"

line :: Parser String
line = many1 $ noneOf "\n"

configElem :: Parser a -> String -> Parser a
configElem p s = string (s ++ ":") *> optional whitespace *> p <* eol

true :: Parser Bool
true = string "True" *> return True

false :: Parser Bool
false = string "False" *> return False

bool :: Parser Bool
bool = choice [true, false]

config :: Parser Conf
config = do
  void $ string "---\n"
  c_author <- (try . optionMaybe . configElem line) "author"
  c_date <- (try . optionMaybe . configElem line) "date"
  c_readTime <- (try . optionMaybe . configElem line) "read"
  c_style <- (try . optionMaybe . configElem line) "style"
  c_title <- configElem line "title"
  c_contents <- (try . configElem bool) "toc"
  void $ string "---\n"
  return $ Conf c_author c_date c_readTime c_style c_title c_contents

-- parse between delimiters (delimit "`" "`code`" -> code)
{-
delimit :: String -> String -> Parser String
delimit l r = between (string l) (string r) (many $ noneOf r)
-}

delimit1 :: String -> String -> Parser String
delimit1 l r = between (string l) (string r) (many1 $ noneOf r)

--
-- inline text formatting
--
codeInline :: Parser Text
codeInline = CodeInline <$> delimit1 "`" "`"

italic :: Parser Text
italic = Italic . format <$> delimit1 "_" "_"

bold :: Parser Text
bold = Bold . format <$> delimit1 "*" "*"

strikeout :: Parser Text
strikeout = Strikeout . format <$> delimit1 "~" "~"

url :: Parser Text
url = Link <$> (format <$> delimit1 "[" "]") <*> delimit1 "(" ")"

delims :: [Char]
delims = "*_`[]~" -- characters that define inline formatting

plain :: Parser Text
plain = Plain <$> (many1 . noneOf) delims

text :: Parser Text
text =
  choice
    [try italic, try bold, try codeInline, try strikeout, try url, try plain]

format :: String -> [Text]
format s =
  case parseWithLeftover text s of
    Left _ -> []
    Right (f, rest) -> f : (format rest)

--
-- document structure
--
unorderedListItem :: Parser [Text]
unorderedListItem = format <$> (optional whitespace *> line)

unorderedList :: Parser AST
unorderedList =
  UnorderedList <$>
  (string "-" *> unorderedListItem `sepBy` (try $ string "\n-"))

codeBlock :: Parser AST
codeBlock =
  CodeBlock <$> do string "```\n" *> (manyTill anyToken . try . string) "\n```"

quote :: Parser AST
quote = Quote <$> (string ">" *> optional whitespace *> fmap format line)

-- image alt text does not include formatting
img :: Parser AST
img =
  Image <$> (between (string "![") (string "]") (many $ noneOf "]")) <*>
  (optionMaybe $ delimit1 "[" "]") <*>
  (delimit1 "(" ")")

header :: Parser AST
header =
  Header <$> (fmap length $ many1 $ char '#') <* optional whitespace <*>
  (fmap format line)

centerText :: Parser AST
centerText = (CenterText . format) <$> delimit1 "{{{" "}}}"

paragraph :: Parser AST
paragraph =
  Paragraph . format <$> (manyTill anyToken . try . lookAhead $ string "\n")

ast :: Parser AST
ast =
  choice
    [ try centerText
    , try codeBlock
    , try header
    , try img
    , try quote
    , try unorderedList
    , try paragraph -- default; must be last!
    ]

asts :: Parser [AST]
asts = ast `sepEndBy` eol

document :: Parser Document
document = Document <$> config <*> fmap condense asts

parseWithLeftover :: Parser a -> String -> Either ParseError (a, String)
parseWithLeftover p = parse ((,) <$> p <*> leftOver) ""
  where
    leftOver = manyTill anyToken eof

parseWithEof :: Parser a -> String -> Either ParseError a
parseWithEof p = parse (p <* eof) ""

parseMarkup :: String -> Either ParseError Document
parseMarkup = parseWithEof document