~aprates/gempress

ref: fc494358d9df2a561369b66182580ce0af35c54d gempress/src/gmi-to-html.c -rwxr-xr-x 7.8 KiB
fc494358Antonio Prates Fix html lang bug 10 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
// GemPress gmi-to-html utility, using Sugar-C in less than 190 lines of code
// 2021, by Antonio Prates <hello@aprates.dev>

#include <sugar.h>

#define DEFAULT_TITLE "<title>#</title>\n"

// globals (hold some state during parsing of each file)
string title;
bool preformattedMode;
bool itemsMode;
number linkCount;
string htmlTag = "<html>\n";
bool extraMarkup = false;

// html escape code helpers
string escapeQ(string text) { return replaceWord(text, "\"", "&quot;"); }
string escapeE(string text) { return replaceWord(text, "&", "&amp;"); }
string escapeLt(string text) { return replaceWord(text, "<", "&lt;"); }
string escapeGt(string text) { return replaceWord(text, ">", "&gt;"); }

// removes/escapes/repaces special chars and splits buffer to lines
stringList preProcess(string text) {
  text = replaceWord(text, "\r", "");           // remove \r to support LF/CRLF
  text = escapeGt(escapeLt(escapeE(text)));     // html escape codes
  text = replaceWord(text, "\n", "\n<br />\n"); // swap to break line code
  return splitSep(text, '\n');                  // …and split to lines
}

// join lines back to single buffer, fix spacing issues and wrap body to html
string postProcess(number linesCount, stringList lines) {

  // build body contents by joining html lines back to single buffer
  string body = joinSep(linesCount, lines, '\n');

  // hacks and fixes for spacing issues
  body = replaceWord(body, "\n\n", "\n");
  body = replaceWord(body, "</h1>\n<br />\n<br />", "</h1>");
  body = replaceWord(body, "</h1>\n<br />", "</h1>");
  body = replaceWord(body, "</h2>\n<br />\n<br />", "</h2>");
  body = replaceWord(body, "</h2>\n<br />", "</h2>");
  body = replaceWord(body, "</h3>\n<br />\n<br />", "</h3>");
  body = replaceWord(body, "</h3>\n<br />", "</h3>");
  body = replaceWord(body, "</pre>\n<br />\n<br />", "</pre>");
  body = replaceWord(body, "</pre>\n<br />", "</pre>");
  body = replaceWord(body, "</blockquote>\n<br />", "</blockquote>");
  body = replaceWord(body, "<hr />\n<br />", "<hr />");
  body = replaceWord(body, "<br />\n<li>", "<li>");
  body = replaceWord(body, "<br />\n<br />\n</ul>", "</ul>");
  body = replaceWord(body, "<br />\n</ul>", "</ul>");

  // wrap body into html boilerplate (add title to head)
  string head =
      join5s("<!DOCTYPE HTML>\n", htmlTag,
             "<head>\n<meta charset='utf-8'>\n"
             "<meta name='generator' content='GemPress' />\n"
             "<link rel='stylesheet' href='styles.css' type='text/css' />\n"
             "<link rel='icon' href='favicon.ico' type='image/x-icon' />\n",
             title, "</head>\n<body>\n<div id='root'>\n<div id='content'>\n");
  string ending = "\n<br />\n<br />\n</div>\n</div>\n</body>\n</html>\n";
  return replaceWord(join3s(head, body, ending), "\n", "\r\n"); // add back \r
}

// convert a line starting with => to html link
string toLink(string line) {
  stringList words = splitSep(line, ' '); // tokenize the link
  number wordCount = listCount(words);    // count our tokens
  if (wordCount < 2)                      // fail-safe for missing link
    return line;
  string url = words[1];  // get url
  bool isInternal = true; // adapt internal/external links:
  string externalRef[6] = {"http://",   "https://",  "ftp://",
                           "gemini://", "gopher://", "www."};
  for (number i = 0; i < 6; i++)
    if (startsWith(url, externalRef[i]))
      isInternal = false;
  url = isInternal ? replaceWord(url, ".gmi", ".html") : url;
  string description = // join the rest as description OR use url as desc.
      wordCount > 2 ? joinSep(listCount(&words[2]), &words[2], ' ') : url;
  string linkNumber = join3s("[", ofNumber(++linkCount), "] <a href='");
  return join5s(linkNumber, url, "'>", description, "</a>");
}

// parse line and do markdown substitutions for equivalent html tags
string lineToHTML(string line) {

  // <preformatted mode>
  if (startsWith(line, "```")) {          // ``` -> preformatted text
    preformattedMode = !preformattedMode; // toggle global <pre> mode
    if (!preformattedMode)
      return replaceWord(line, "```", "</pre>");
    if (strlen(line) < 5)
      return "<pre>";
    return join3s("<pre title='", escapeQ(&line[4]), "'>");
  }
  if (preformattedMode)                     // while global <pre> mode
    return replaceWord(line, "<br />", ""); // -> revert preProcess

  // <simple blockquote>
  if (startsWith(line, "&gt; ")) {          // > -> blockquote (escaped)
    line = replaceWord(line, "<br />", ""); // -> revert preProcess
    return join3s("<blockquote>", &line[5], "</blockquote>\n");
  }

  // <links>
  if (startsWith(line, "=&gt;")) // => link (escaped)
    return toLink(line);         // -> <a href…

  // <headings>
  if (startsWith(line, "### ")) // ### -> h3
    return join3s("<h3>", &line[4], "</h3>");
  if (startsWith(line, "## ")) // ## -> h2
    return join3s("<h2>", &line[3], "</h2>");
  if (startsWith(line, "# ")) {        // # -> h1 …AND set global page title!
    if (areSame(title, DEFAULT_TITLE)) // (stick to first h1 found)
      title = join3s("<title>", &line[2], "</title>\n");
    return join3s("<h1>", &line[2], "</h1>");
  }

  if (extraMarkup) {
    // <horizontal rule>
    if (areSame(line, "---"))
      return "<hr />";

    // <inline code>
    number length;
    number ticksCount =
        countWord(line, "`"); // futile attempt to prevent bleeding
    if (ticksCount > 1 && ticksCount % 2 == 0) {  // simple check is even
      line = replaceWord(line, " `", " <code>");  // start inline code
      line = replaceWord(line, "` ", "</code> "); // end inline code
      if (startsWith(line, "`"))                  // if first thin in the line
        line = join2s("<code>", &line[1]);        // begin with inline code
      length = strlen(line);
      if (length > 0 && line[length - 1] == '`')  // if last…
        line = replaceWord(line, "`", "</code>"); // end inline code
    }
  }

  // <list items> (accepts inline code)
  if (startsWith(line, "* ")) {               // * -> list item
    itemsMode = true;                         // toggle global <pre> mode
    return join3s("<li>", &line[2], "</li>"); // end inline bold text
  }

  // …or else -> simple text
  return line;
}

// try to get the file from path and generate the corresponding html in place
void convert(string path) {
  if (startsWith(path, "lang:")) {
    htmlTag = join3s("<html lang='", &path[5], "'>\n"); // set langCode
    return;
  }
  if (startsWith(path, "extraMarkup")) {
    extraMarkup = true; // set extraMarkup
    return;
  }
  if (countWord(path, ".gmi")) {  // ultra-simple path validadion
    string text = readFile(path); // read and hope content is text/gemini :D
    if (text) {                   // safe-gard / read failure
      title = DEFAULT_TITLE;      // reset global 'page title' for each file
      preformattedMode = false;   // reset global '<pre> mode' for each file
      linkCount = 0;              // reset global 'link count' for each file
      bool wasItemsMode = false;
      stringList lines = preProcess(text);
      number linesCount = listCount(lines);
      for (number i = 0; i < linesCount; i++) {
        if (itemsMode && areSame(lines[i], "<br />"))
          continue;
        itemsMode = false; // reset global 'items mode' for each file
        lines[i] = lineToHTML(lines[i]); // do the magic :D
        if (itemsMode && !wasItemsMode)
          lines[i] = join2s("<ul>\n", lines[i]);
        if (wasItemsMode && !itemsMode)
          lines[i] = join2s("</ul>\n", lines[i]);
        wasItemsMode = itemsMode;
      }
      string newPath = replaceWord(path, ".gmi", ".html");
      writeFile(postProcess(linesCount, lines), newPath); // and save…
    } else
      println(join2s(path, " -> FAILED!")); // …or complain!
  }
}

app({
  if (argc < 2)
    println("gmi-to-html: please, provide '.gmi' file paths as arguments…");
  else
    forEach(argc - 1, &argv[1], &convert);
})