~aprates/gempress

ref: 0cdd4d4eacaecdd25e5dced330decc77b9822b05 gempress/src/gmi-to-html.c -rwxr-xr-x 7.7 KiB
0cdd4d4eAntonio Prates Use fully qualified link to home on feed 8 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
// GemPress gmi-to-html utility, using Sugar-C in less than 190 lines of code
// 2021, by Antonio Prates <antonioprates at gmail dot com>

#include <sugar.h>

#define DEFAULT_TITLE "<title>#</title>\n"

// globals (hold some state during parsing of each file)
string title;
bool preformattedMode;
bool itemsMode;
number linkCount;
string htmlTag = "<html>\n";

// html escape code helpers
string escapeQ(string text) { return replaceWord(text, "\"", "&quot;"); }
string escapeE(string text) { return replaceWord(text, "&", "&amp;"); }
string escapeLt(string text) { return replaceWord(text, "<", "&lt;"); }
string escapeGt(string text) { return replaceWord(text, ">", "&gt;"); }

// removes/escapes/repaces special chars and splits buffer to lines
stringList preProcess(string text) {
  text = replaceWord(text, "\r", "");           // remove \r to support LF/CRLF
  text = escapeGt(escapeLt(escapeE(text)));     // html escape codes
  text = replaceWord(text, "\n", "\n<br />\n"); // swap to break line code
  return splitSep(text, '\n');                  // ...and split to lines
}

// join lines back to single buffer, fix spacing issues and wrap body to html
string postProcess(number linesCount, stringList lines) {

  // build body contents by joining html lines back to single buffer
  string body = joinSep(linesCount, lines, '\n');

  // hacks and fixes for spacing issues
  body = replaceWord(body, "\n\n", "\n");
  body = replaceWord(body, "</h1>\n<br />\n<br />", "</h1>");
  body = replaceWord(body, "</h1>\n<br />", "</h1>");
  body = replaceWord(body, "</h2>\n<br />\n<br />", "</h2>");
  body = replaceWord(body, "</h2>\n<br />", "</h2>");
  body = replaceWord(body, "</h3>\n<br />\n<br />", "</h3>");
  body = replaceWord(body, "</h3>\n<br />", "</h3>");
  body = replaceWord(body, "</pre>\n<br />\n<br />", "</pre>");
  body = replaceWord(body, "</pre>\n<br />", "</pre>");
  body = replaceWord(body, "</blockquote>\n<br />", "</blockquote>");
  body = replaceWord(body, "<hr />\n<br />", "<hr />");
  body = replaceWord(body, "<br />\n<li>", "<li>");
  body = replaceWord(body, "<br />\n<br />\n</ul>", "</ul>\n<br />");
  body = replaceWord(body, "<br />\n</ul>", "</ul>\n<br />");

  // wrap body into html boilerplate (add title to head)
  string head =
      join5s("<!DOCTYPE HTML>\n", htmlTag,
             "<head>\n<meta charset='utf-8'>\n"
             "<meta name='generator' content='GemPress' />\n"
             "<link rel='stylesheet' href='styles.css' type='text/css' />\n"
             "<link rel='icon' href='favicon.ico' type='image/x-icon' />\n",
             title, "</head>\n<body>\n<div id='root'>\n<div id='content'>\n");
  string ending = "\n<br />\n<br />\n</div>\n</div>\n</body>\n</html>\n";
  return replaceWord(join3s(head, body, ending), "\n", "\r\n"); // add back \r
}

// convert a line starting with => to html link
string toLink(string line) {
  stringList words = splitSep(line, ' '); // tokenize the link
  number wordCount = listCount(words);    // count our tokens
  if (wordCount < 2)                      // fail-safe for missing link
    return line;
  string url = words[1];  // get url
  bool isInternal = true; // adapt internal/external links:
  string externalRef[6] = {"http://",   "https://",  "ftp://",
                           "gemini://", "gopher://", "www."};
  for (number i = 0; i < 6; i++)
    if (startsWith(url, externalRef[i]))
      isInternal = false;
  url = isInternal ? replaceWord(url, ".gmi", ".html") : url;
  string description = // join the rest as description OR use url as desc.
      wordCount > 2 ? joinSep(listCount(&words[2]), &words[2], ' ') : url;
  string linkNumber = join3s("[", ofNumber(++linkCount), "] <a href='");
  return join5s(linkNumber, url, "'>", description, "</a>");
}

// parse line and do markdown substitutions for equivalent html tags
string lineToHTML(string line) {

  // <preformatted mode>
  if (startsWith(line, "```")) {          // ``` -> preformatted text
    preformattedMode = !preformattedMode; // toggle global <pre> mode
    if (!preformattedMode)
      return replaceWord(line, "```", "</pre>");
    if (strlen(line) < 5)
      return "<pre>";
    return join3s("<pre title='", escapeQ(&line[4]), "'>");
  }
  if (preformattedMode)                     // while global <pre> mode
    return replaceWord(line, "<br />", ""); // -> revert preProcess

  // <simple blockquote>
  if (startsWith(line, "&gt; ")) {          // > -> blockquote (escaped)
    line = replaceWord(line, "<br />", ""); // -> revert preProcess
    return join3s("<blockquote>", &line[5], "</blockquote>\n");
  }

  // <links>
  if (startsWith(line, "=&gt;")) // => link (escaped)
    return toLink(line);         // -> <a href...

  // <headings>
  if (startsWith(line, "### ")) // ### -> h3
    return join3s("<h3>", &line[4], "</h3>");
  if (startsWith(line, "## ")) // ## -> h2
    return join3s("<h2>", &line[3], "</h2>");
  if (startsWith(line, "# ")) {        // # -> h1 ...AND set global page title!
    if (areSame(title, DEFAULT_TITLE)) // (stick to first h1 found)
      title = join3s("<title>", &line[2], "</title>\n");
    return join3s("<h1>", &line[2], "</h1>");
  }

  // <horizontal rule>
  if (areSame(line, "---"))
    return "<hr />";

  // <inline code>
  number length;
  number ticksCount =
      countWord(line, "`"); // futile attempt to prevent bleeding
  if (ticksCount > 1 && ticksCount % 2 == 0) {  // simple check, no guarantees
    line = replaceWord(line, " `", " <code>");  // start inline code
    line = replaceWord(line, "` ", "</code> "); // end inline code
    if (startsWith(line, "`"))                  // if first thin in the line
      line = join2s("<code>", &line[1]);        // begin with inline code
    length = strlen(line);
    if (length > 0 && line[length - 1] == '`')  // if last...
      line = replaceWord(line, "`", "</code>"); // end inline code
  }

  // <list items> (accepts inline code)
  if (startsWith(line, "* ")) {               // * -> list item
    itemsMode = true;                         // toggle global <pre> mode
    return join3s("<li>", &line[2], "</li>"); // end inline bold text
  }

  // ...or else -> simple text
  return line;
}

// try to get the file from path and generate the corresponding html in place
void convert(string path) {
  if (startsWith(path, "-lang:")) {
    htmlTag = join3s("<html lang=\"", &path[6], "\">\n"); // set langCode
    return;
  }
  if (countWord(path, ".gmi")) {  // ultra-simple path validadion
    string text = readFile(path); // read and hope content is text/gemini :D
    if (text) {                   // safe-gard / read failure
      title = DEFAULT_TITLE;      // reset global 'page title' for each file
      preformattedMode = false;   // reset global '<pre> mode' for each file
      linkCount = 0;              // reset global 'link count' for each file
      bool wasItemsMode = false;
      stringList lines = preProcess(text);
      number linesCount = listCount(lines);
      for (number i = 0; i < linesCount; i++) {
        if (itemsMode && areSame(lines[i], "<br />"))
          continue;
        itemsMode = false; // reset global 'items mode' for each file
        lines[i] = lineToHTML(lines[i]); // do the magic :D
        if (itemsMode && !wasItemsMode)
          lines[i] = join2s("<ul>\n", lines[i]);
        if (wasItemsMode && !itemsMode)
          lines[i] = join2s("</ul>\n", lines[i]);
        wasItemsMode = itemsMode;
      }
      string newPath = replaceWord(path, ".gmi", ".html");
      if (writeFile(postProcess(linesCount, lines), newPath)) // and save...
        return println(newPath);
    }
    println(join2s(path, " -> FAILED!")); // ...or complain!
  }
}

app({
  if (argc < 2)
    println("please, provide '.gmi' file paths as arguments...");
  else
    forEach(argc - 1, &argv[1], &convert);
})