~zenomat/tinywiki

tinywiki/extract_markdown.py -rwxr-xr-x 843 bytes
728e6a1a — Adrian Change grep regex to not replace inter-document links 2 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#!/usr/bin/env python
import commonmark

with open('index.md', 'r') as myfile:
	text = myfile.read()

parser = commonmark.Parser()
ast = parser.parse(text)

# Returns the text from markdown, stripped of the markdown syntax itself
def ast2text(astNode):
	walker = astNode.walker()
	acc = "";
	iterator = iter(walker)
	while True:
		try:
			(current, entering) = next(iterator)
		except StopIteration:
			break  # Iterator exhausted: stop the loop
		else:
			# Add the text
			if current.literal:
				acc += current.literal
			# Add in the missing line breaks
			if current.t == "linebreak":
				acc += "\n"
			if current.t == "paragraph" and entering == False:
				acc += "\n\n"
			if current.t == "heading" and entering == False:
				acc += "\n"
			if current.t == "image":
				acc += "\n"
	print("")
	return acc.strip()

print(ast2text(ast))