~jleightcap/hhtml

ref: f6e115bbfa6e40e311acfe2b8aa9445d8f99d2c5 hhtml/src/Markup/Parse.hs -rw-r--r-- 4.1 KiB
f6e115bbjleightcap strickout formatting 6 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
-- Parse.hs
module Markup.Parse
  ( ast
  , asts
  , bold
  , codeBlock
  , codeInline
  , condense
  , config
  , configElem
  , document
  , format
  , italic
  , line
  , paragraph
  , parseMarkup
  , parseWithEof
  , parseWithLeftover
  , plain
  , strikeout
  , text
  , url
  ) where

import Control.Monad (void)
import Document
import Text.ParserCombinators.Parsec

--
-- parsec parsing
--
whitespace :: Parser ()
whitespace = void $ many1 $ oneOf " \n\t"

eol :: Parser String
eol = string "\n"

lexeme :: Parser String
lexeme = many1 $ alphaNum

line :: Parser String
line = many1 $ noneOf "\n"

configElem :: Parser a -> String -> Parser a
configElem p s = string (s ++ ":") *> optional whitespace *> p <* eol

true :: Parser Bool
true = string "True" *> return True

false :: Parser Bool
false = string "False" *> return False

bool :: Parser Bool
bool = choice [true, false]

config :: Parser Conf
config = do
  void $ string "---\n"
  author <- (try . optionMaybe . configElem line) "author"
  date <- (try . optionMaybe . configElem line) "date"
  readTime <- (try . optionMaybe . configElem line) "read"
  style <- (try . optionMaybe . configElem line) "style"
  title <- configElem line "title"
  contents <- (try . configElem bool) "toc"
  void $ string "---\n"
  return $ Conf author date readTime style title contents

-- parse between delimiters (delimit "`" "`code`" -> code)
delimit :: String -> String -> Parser String
delimit l r = between (string l) (string r) (many $ noneOf r)

delimit1 :: String -> String -> Parser String
delimit1 l r = between (string l) (string r) (many1 $ noneOf r)

--
-- inline text formatting
--
codeInline :: Parser Text
codeInline = CodeInline <$> delimit1 "`" "`"

italic :: Parser Text
italic = Italic . format <$> delimit1 "_" "_"

bold :: Parser Text
bold = Bold . format <$> delimit1 "*" "*"

strikeout :: Parser Text
strikeout = Strikeout . format <$> delimit1 "~" "~"

url :: Parser Text
url = Link <$> (fmap format $ delimit1 "[" "]") <*> delimit1 "(" ")"

delims :: [Char]
delims = "*_`[]~" -- characters that define inline formatting

plain :: Parser Text
plain = fmap Plain $ many1 $ noneOf delims

text :: Parser Text
text =
  choice
    [try italic, try bold, try codeInline, try strikeout, try url, try plain]

format :: String -> [Text]
format s =
  case parseWithLeftover text s of
    Left _ -> []
    Right (f, rest) -> f : (format rest)

--
-- document structure
--
unorderedListItem :: Parser [Text]
unorderedListItem = fmap format $ optional whitespace *> line

unorderedList :: Parser AST
unorderedList =
  fmap UnorderedList $
  string "-" *> unorderedListItem `sepBy` (try $ string "\n-")

codeBlock :: Parser AST
codeBlock =
  fmap CodeBlock $ do
    void $ string "```\n"
    manyTill anyToken $ try $ string "\n```"

quote :: Parser AST
quote = fmap Quote $ string ">" *> optional whitespace *> fmap format line

-- image alt text does not include formatting
img :: Parser AST
img = do
  alt <- between (string "![") (string "]") (many $ noneOf "]")
  fmt <- optionMaybe $ delimit1 "[" "]"
  path <- delimit1 "(" ")"
  return $ Image alt fmt path

header :: Parser AST
header = do
  hdelims <- many1 $ char '#'
  optional whitespace
  hbody <- line
  return $ Header (length hdelims) (format hbody)

centerText :: Parser AST
centerText = fmap (CenterText . format) $ delimit1 "{{{" "}}}"

paragraph :: Parser AST
paragraph =
  Paragraph . format <$> (manyTill anyToken . try . lookAhead $ string "\n")

ast :: Parser AST
ast =
  choice
    [ try centerText
    , try codeBlock
    , try header
    , try img
    , try quote
    , try unorderedList
    , try paragraph -- default; must be last!
    ]

asts :: Parser [AST]
asts = ast `sepEndBy` eol

document :: Parser Document
document = Document <$> config <*> (fmap condense asts)

parseWithLeftover :: Parser a -> String -> Either ParseError (a, String)
parseWithLeftover p = parse ((,) <$> p <*> leftOver) ""
  where
    leftOver = manyTill anyToken eof

parseWithEof :: Parser a -> String -> Either ParseError a
parseWithEof p = parse (p <* eof) ""

parseMarkup :: String -> Either ParseError Document
parseMarkup = parseWithEof document