// GemPress gmi-to-html utility, using Sugar-C in less than 190 lines of code
// 2021, by Antonio Prates <antonioprates at gmail dot com>
#include <sugar.h>
#define DEFAULT_TITLE "<title>#</title>\n"
// globals (hold some state during parsing of each file)
string title;
bool preformattedMode;
bool itemsMode;
number linkCount;
string htmlTag = "<html>\n";
// html escape code helpers
string escapeQ(string text) { return replaceWord(text, "\"", """); }
string escapeE(string text) { return replaceWord(text, "&", "&"); }
string escapeLt(string text) { return replaceWord(text, "<", "<"); }
string escapeGt(string text) { return replaceWord(text, ">", ">"); }
// removes/escapes/repaces special chars and splits buffer to lines
stringList preProcess(string text) {
text = replaceWord(text, "\r", ""); // remove \r to support LF/CRLF
text = escapeGt(escapeLt(escapeE(text))); // html escape codes
text = replaceWord(text, "\n", "\n<br />\n"); // swap to break line code
return splitSep(text, '\n'); // ...and split to lines
}
// join lines back to single buffer, fix spacing issues and wrap body to html
string postProcess(number linesCount, stringList lines) {
// build body contents by joining html lines back to single buffer
string body = joinSep(linesCount, lines, '\n');
// hacks and fixes for spacing issues
body = replaceWord(body, "\n\n", "\n");
body = replaceWord(body, "</h1>\n<br />\n<br />", "</h1>");
body = replaceWord(body, "</h1>\n<br />", "</h1>");
body = replaceWord(body, "</h2>\n<br />\n<br />", "</h2>");
body = replaceWord(body, "</h2>\n<br />", "</h2>");
body = replaceWord(body, "</h3>\n<br />\n<br />", "</h3>");
body = replaceWord(body, "</h3>\n<br />", "</h3>");
body = replaceWord(body, "</pre>\n<br />\n<br />", "</pre>");
body = replaceWord(body, "</pre>\n<br />", "</pre>");
body = replaceWord(body, "</blockquote>\n<br />", "</blockquote>");
body = replaceWord(body, "<hr />\n<br />", "<hr />");
body = replaceWord(body, "<br />\n<li>", "<li>");
body = replaceWord(body, "<br />\n<br />\n</ul>", "</ul>\n<br />");
body = replaceWord(body, "<br />\n</ul>", "</ul>\n<br />");
// wrap body into html boilerplate (add title to head)
string head =
join5s("<!DOCTYPE HTML>\n", htmlTag,
"<head>\n<meta charset='utf-8'>\n"
"<meta name='generator' content='GemPress' />\n"
"<link rel='stylesheet' href='styles.css' type='text/css' />\n"
"<link rel='icon' href='favicon.ico' type='image/x-icon' />\n",
title, "</head>\n<body>\n<div id='root'>\n<div id='content'>\n");
string ending = "\n<br />\n<br />\n</div>\n</div>\n</body>\n</html>\n";
return replaceWord(join3s(head, body, ending), "\n", "\r\n"); // add back \r
}
// convert a line starting with => to html link
string toLink(string line) {
stringList words = splitSep(line, ' '); // tokenize the link
number wordCount = listCount(words); // count our tokens
if (wordCount < 2) // fail-safe for missing link
return line;
string url = words[1]; // get url
bool isInternal = true; // adapt internal/external links:
string externalRef[6] = {"http://", "https://", "ftp://",
"gemini://", "gopher://", "www."};
for (number i = 0; i < 6; i++)
if (startsWith(url, externalRef[i]))
isInternal = false;
url = isInternal ? replaceWord(url, ".gmi", ".html") : url;
string description = // join the rest as description OR use url as desc.
wordCount > 2 ? joinSep(listCount(&words[2]), &words[2], ' ') : url;
string linkNumber = join3s("[", ofNumber(++linkCount), "] <a href='");
return join5s(linkNumber, url, "'>", description, "</a>");
}
// parse line and do markdown substitutions for equivalent html tags
string lineToHTML(string line) {
// <preformatted mode>
if (startsWith(line, "```")) { // ``` -> preformatted text
preformattedMode = !preformattedMode; // toggle global <pre> mode
if (!preformattedMode)
return replaceWord(line, "```", "</pre>");
if (strlen(line) < 5)
return "<pre>";
return join3s("<pre title='", escapeQ(&line[4]), "'>");
}
if (preformattedMode) // while global <pre> mode
return replaceWord(line, "<br />", ""); // -> revert preProcess
// <simple blockquote>
if (startsWith(line, "> ")) { // > -> blockquote (escaped)
line = replaceWord(line, "<br />", ""); // -> revert preProcess
return join3s("<blockquote>", &line[5], "</blockquote>\n");
}
// <links>
if (startsWith(line, "=>")) // => link (escaped)
return toLink(line); // -> <a href...
// <headings>
if (startsWith(line, "### ")) // ### -> h3
return join3s("<h3>", &line[4], "</h3>");
if (startsWith(line, "## ")) // ## -> h2
return join3s("<h2>", &line[3], "</h2>");
if (startsWith(line, "# ")) { // # -> h1 ...AND set global page title!
if (areSame(title, DEFAULT_TITLE)) // (stick to first h1 found)
title = join3s("<title>", &line[2], "</title>\n");
return join3s("<h1>", &line[2], "</h1>");
}
// <horizontal rule>
if (areSame(line, "---"))
return "<hr />";
// <inline code>
number length;
number ticksCount =
countWord(line, "`"); // futile attempt to prevent bleeding
if (ticksCount > 1 && ticksCount % 2 == 0) { // simple check, no guarantees
line = replaceWord(line, " `", " <code>"); // start inline code
line = replaceWord(line, "` ", "</code> "); // end inline code
if (startsWith(line, "`")) // if first thin in the line
line = join2s("<code>", &line[1]); // begin with inline code
length = strlen(line);
if (length > 0 && line[length - 1] == '`') // if last...
line = replaceWord(line, "`", "</code>"); // end inline code
}
// <list items> (accepts inline code)
if (startsWith(line, "* ")) { // * -> list item
itemsMode = true; // toggle global <pre> mode
return join3s("<li>", &line[2], "</li>"); // end inline bold text
}
// ...or else -> simple text
return line;
}
// try to get the file from path and generate the corresponding html in place
void convert(string path) {
if (startsWith(path, "-lang:")) {
htmlTag = join3s("<html lang=\"", &path[6], "\">\n"); // set langCode
return;
}
if (countWord(path, ".gmi")) { // ultra-simple path validadion
string text = readFile(path); // read and hope content is text/gemini :D
if (text) { // safe-gard / read failure
title = DEFAULT_TITLE; // reset global 'page title' for each file
preformattedMode = false; // reset global '<pre> mode' for each file
linkCount = 0; // reset global 'link count' for each file
bool wasItemsMode = false;
stringList lines = preProcess(text);
number linesCount = listCount(lines);
for (number i = 0; i < linesCount; i++) {
if (itemsMode && areSame(lines[i], "<br />"))
continue;
itemsMode = false; // reset global 'items mode' for each file
lines[i] = lineToHTML(lines[i]); // do the magic :D
if (itemsMode && !wasItemsMode)
lines[i] = join2s("<ul>\n", lines[i]);
if (wasItemsMode && !itemsMode)
lines[i] = join2s("</ul>\n", lines[i]);
wasItemsMode = itemsMode;
}
string newPath = replaceWord(path, ".gmi", ".html");
if (writeFile(postProcess(linesCount, lines), newPath)) // and save...
return println(newPath);
}
println(join2s(path, " -> FAILED!")); // ...or complain!
}
}
app({
if (argc < 2)
println("please, provide '.gmi' file paths as arguments...");
else
forEach(argc - 1, &argv[1], &convert);
})