~zenomat/tinywiki

8794a37914517be50720a4f2fa98e3a196aef450 — zeno 3 months ago cfd3bfd
markdown extraction tool
1 files changed, 36 insertions(+), 0 deletions(-)

A extract_markdown.py
A extract_markdown.py => extract_markdown.py +36 -0
@@ 0,0 1,36 @@
#!/usr/bin/env python
import commonmark

with open('index.md', 'r') as myfile:
	text = myfile.read()

parser = commonmark.Parser()
ast = parser.parse(text)

# Returns the text from markdown, stripped of the markdown syntax itself
def ast2text(astNode):
	walker = astNode.walker()
	acc = "";
	iterator = iter(walker)
	while True:
		try:
			(current, entering) = next(iterator)
		except StopIteration:
			break  # Iterator exhausted: stop the loop
		else:
			# Add the text
			if current.literal:
				acc += current.literal
			# Add in the missing line breaks
			if current.t == "linebreak":
				acc += "\n"
			if current.t == "paragraph" and entering == False:
				acc += "\n\n"
			if current.t == "heading" and entering == False:
				acc += "\n"
			if current.t == "image":
				acc += "\n"
	print("")
	return acc.strip()

print(ast2text(ast))