~sotirisp/qute-gemini

ref: 67d8f98fde5b9398c41988984391558d0b9a640b qute-gemini/qute-gemini -rwxr-xr-x 13.2 KiB
67d8f98f — Sotiris Papatheodorou Bump version to 1.0.0 8 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
#!/usr/bin/env python3
# qute-gemini - Open Gemini links in qutebrowser and render them as HTML
#
# SPDX-FileCopyrightText: 2019-2020 solderpunk
# SPDX-FileCopyrightText: 2020 Aaron Janse
# SPDX-FileCopyrightText: 2020 petedussin
# SPDX-FileCopyrightText: 2020-2021 Sotiris Papatheodorou
# SPDX-License-Identifier: GPL-3.0-or-later

import cgi
import html
import os
import socket
import ssl
import sys
import tempfile
import urllib.parse

from typing import Tuple


_version = "1.0.0"

_max_redirects = 5

_error_page_template = '''<?xml version="1.0" encoding="UTF-8"?>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<title>Error opening page: URL</title>
<style>
CSS
</style>
</head>
<body>
<h1>qute-gemini error</h1>
<p>Error while opening:<br/><a href="URL">URL_TEXT</a></p>
<p>DESCRIPTION</p>
</body>
</html>
'''

_status_code_desc = {
    "1":  "Gemini status code 1 Input. This is not implemented in qute-gemini.",
    "10": "Gemini status code 10 Input. This is not implemented in qute-gemini.",
    "11": "Gemini status code 11 Sensitive Input. This is not implemented in qute-gemini.",
    "3":  "Gemini status code 3 Redirect. Stopped after " + str(_max_redirects) + " redirects.",
    "30": "Gemini status code 30 Temporary Redirect. Stopped after " + str(_max_redirects) + " redirects.",
    "31": "Gemini status code 31 Permanent Redirect. Stopped after " + str(_max_redirects) + " redirects.",
    "4":  "Gemini status code 4 Temporary Failure. Server message: META",
    "40": "Gemini status code 40 Temporary Failure. Server message: META",
    "41": "Gemini status code 41 Server Unavailable. The server is unavailable due to overload or maintenance. Server message: META",
    "42": "Gemini status code 42 CGI Error. A CGI process, or similar system for generating dynamic content, died unexpectedly or timed out. Server message: META",
    "43": "Gemini status code 43 Proxy Error. A proxy request failed because the server was unable to successfully complete a transaction with the remote host. Server message: META",
    "44": "Gemini status code 44 Slow Down. Rate limiting is in effect. Please wait META seconds before making another request to this server.",
    "5":  "Gemini status code 5 Permanent Failure. Server message: META",
    "50": "Gemini status code 50 Permanent Failure. Server message: META",
    "51": "Gemini status code 51 Not Found. he requested resource could not be found but may be available in the future. Server message: META",
    "52": "Gemini status code 52 Gone. The resource requested is no longer available and will not be available again. Server message: META",
    "53": "Gemini status code 53 Proxy Request Refused. The request was for a resource at a domain not served by the server and the server does not accept proxy requests. Server message: META",
    "59": "Gemini status code 59 Bad Request. The server was unable to parse the client's request, presumably due to a malformed request. Server message: META",
    "6":  "Gemini status code 6 Client Certificate Required. This is not implemented in qute-gemini.",
}


def qute_url() -> str:
    """Get the URL passed to the script by qutebrowser."""
    return os.environ["QUTE_URL"]


def qute_fifo() -> str:
    """Get the FIFO or file to write qutebrowser commands to."""
    return os.environ["QUTE_FIFO"]


def html_href(url: str, description: str) -> str:
    return "".join(['<a href="', url, '">', description, "</a>"])


def qute_gemini_css_path() -> str:
    """Return the path where the custom CSS file is expected to be."""
    try:
        base_dir = os.environ["XDG_DATA_HOME"]
    except KeyError:
        base_dir = os.path.join(os.environ["HOME"], ".local/share")
    return os.path.join(base_dir, "qutebrowser/userscripts/qute-gemini.css")


def gemini_absolutise_url(base_url: str, relative_url: str) -> str:
    """Absolutise relative gemini URLs.

    Adapted from gcat: https://github.com/aaronjanse/gcat
    """
    if "://" not in relative_url:
        # Python's URL tools somehow only work with known schemes?
        base_url = base_url.replace("gemini://", "http://")
        relative_url = urllib.parse.urljoin(base_url, relative_url)
        relative_url = relative_url.replace("http://", "gemini://")
    return relative_url


def gemini_fetch_url(url: str) -> Tuple[str, str, str, str, str]:
    """Fetch a Gemini URL and return the content as a string.

    url: URL with gemini:// or no scheme.
    Returns 4 strings: the content, the URL the content was fetched from, the
      Gemini status code, the value of the meta field and an error message.

    Adapted from gcat: https://github.com/aaronjanse/gcat
    """
    # Parse the URL to get the hostname and port
    parsed_url = urllib.parse.urlparse(url)
    if not parsed_url.scheme:
        url = "gemini://" + url
        parsed_url = urllib.parse.urlparse(url)
    if parsed_url.scheme != "gemini":
        return "", "Received non-gemini:// URL: " + url
    if parsed_url.port is not None:
        useport = parsed_url.port
    else:
        useport = 1965
    # Do the Gemini transaction, looping for redirects
    redirects = 0
    while True:
        # Send the request
        s = socket.create_connection((parsed_url.hostname, useport))
        context = ssl.SSLContext(ssl.PROTOCOL_TLS)
        context.check_hostname = False
        context.verify_mode = ssl.CERT_NONE
        s = context.wrap_socket(s, server_hostname = parsed_url.netloc)
        s.sendall((url + "\r\n").encode("UTF-8"))
        # Get the status code and meta
        fp = s.makefile("rb")
        header = fp.readline().decode("UTF-8").strip()
        status, meta = header.split()[:2]
        # Follow up to 5 redirects
        if status.startswith("3"):
            url = gemini_absolutise_url(url, meta)
            parsed_url = urllib.parse.urlparse(url)
            redirects += 1
            if redirects > _max_redirects:
                # Too many redirects
                break
        # Otherwise we're done
        else:
            break
    # Process the response
    content = ""
    error_msg = ""
    # 2x Success
    if status.startswith("2"):
        media_type, media_type_opts = cgi.parse_header(meta)
        # Decode according to declared charset defaulting to UTF-8
        if meta.startswith("text/gemini"):
            charset = media_type_opts.get("charset", "UTF-8")
            content = fp.read().decode(charset)
        else:
            error_msg = "Expected media type text/gemini but received " \
                + media_type
    # Handle errors
    else:
        # Try matching a 2-digit and then a 1-digit status code
        try:
            error_msg = _status_code_desc[status[0:2]]
        except KeyError:
            try:
                error_msg = _status_code_desc[status[0]]
            except KeyError:
                error_msg = "The server sent back something weird."
        # Substitute the contents of meta into the error message if needed
        error_msg = error_msg.replace("META", meta)
    return content, url, status, meta, error_msg


def gemtext_to_html(gemtext: str, url: str, original_url: str,
                    status: str, meta: str) -> str:
    """Convert gemtext to HTML.

    title: Used as the document title.
    url:          The URL the gemtext was received from. Used to resolve
                  relative URLs in the gemtext content.
    original_url: The URL the original request was made at.
    status:       The Gemini status code returned by the server.
    meta:         The meta returned by the server.
    Returns the HTML representation as a string.
    """
    # Accumulate converted gemtext lines
    lines = ['<?xml version="1.0" encoding="UTF-8"?>',
             '<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">',
             "\t<head>",
             "\t\t<title>" + html.escape(url) + "</title>",
             "\t\t<style>",
             get_css(),
             "\t\t</style>",
             "\t</head>",
             "\t<body>",
             "\t<article>"]
    in_pre = False
    in_list = False
    # Add an extra newline to ensure list tags are closed properly
    for line in (gemtext + "\n").splitlines():
        # Add the list closing tag
        if not line.startswith("*") and in_list:
            lines.append("\t\t</ul>")
            in_list = False
        # Blank line, ignore
        if not line:
            pass
        # Link
        elif line.startswith("=>"):
            l = line[2:].split(None, 1)
            # Use the URL itself as the description if there is none
            if len(l) == 1:
                l.append(l[0])
            # Encode the link description
            l[1] = html.escape(l[1])
            # Resolve relative URLs
            l[0] = gemini_absolutise_url(url, l[0])
            lines.append("\t\t<p>" + html_href(l[0], l[1]) + "</p>")
        # Preformated toggle
        elif line.startswith("```"):
            if in_pre:
                lines.append("\t\t</pre>")
            else:
                lines.append("\t\t<pre>")
            in_pre = not in_pre
        # Preformated
        elif in_pre:
            lines.append(line)
        # Header
        elif line.startswith("###"):
            lines.append("\t\t<h3>" + html.escape(line[3:].strip()) + "</h3>")
        elif line.startswith("##"):
            lines.append("\t\t<h2>" + html.escape(line[2:].strip()) + "</h2>")
        elif line.startswith("#"):
            lines.append("\t\t<h1>" + html.escape(line[1:].strip()) + "</h1>")
        # List
        elif line.startswith("*"):
            if not in_list:
                lines.append("\t\t<ul>")
            in_list = True
            lines.append("\t\t\t<li>" + html.escape(line[1:].strip()) + "</li>")
        # Quote
        elif line.startswith(">"):
            lines.extend(["\t\t<blockquote>",
                          "\t\t\t<p>" + line[1:].strip() + "</p>",
                          "\t\t</blockquote>"])
        # Normal text
        else:
            lines.append("\t\t<p>" + html.escape(line.strip()) + "</p>")
    url_html = html_href(url, html.escape(url))
    original_url_html = html_href(original_url, html.escape(original_url))
    lines.extend(["",
                  "\t</article>",
                  "\t<details>",
                  "\t\t<summary>",
                  "\t\t\tContent from " + url_html,
                  "\t\t</summary>",
                  "\t\t<dl>",
                  "\t\t\t<dt>Original URL</dt>",
                  "\t\t\t<dd>" + original_url_html + "</dd>",
                  "\t\t\t<dt>Status</dt>",
                  "\t\t\t<dd>" + status + "</dd>",
                  "\t\t\t<dt>Meta</dt>",
                  "\t\t\t<dd>" + meta + "</dd>",
                  "\t\t\t<dt>Fetched by</dt>",
                  '\t\t\t<dd><a href="https://git.sr.ht/~sotirisp/qute-gemini">qute-gemini ' + str(_version) + "</a></dd>",
                  "\t\t</dl>",
                  "\t</details>",
                  "\t</body>",
                  "</html>"])
    return "\n".join(lines)


def get_css() -> str:
    # Search for qute-gemini.css in the directory this script is located in
    css_file = qute_gemini_css_path()
    if os.path.isfile(css_file):
        # Return the file contents
        with open(css_file, "r") as f:
            return f.read().strip()
    else:
        # Use no CSS
        return ""


def qute_error_page(url: str, description: str) -> str:
    """Return a data URI error page like qutebrowser does.

    url:         The URL of the page that failed to load.
    description: A description of the error.
    Returns a data URI containing the error page.
    """
    # Generate the HTML error page
    html_page = _error_page_template.replace("URL", url)
    html_page = html_page.replace("URL_TEXT", html.escape(url))
    html_page = html_page.replace("DESCRIPTION", html.escape(description))
    html_page = html_page.replace("CSS", get_css())
    # URL encode and return as a data URI
    return "data:text/html;charset=UTF-8," + urllib.parse.quote(html_page)


def open_gemini(url: str, open_args: str) -> None:
    """Open Gemini URL in qutebrowser."""
    # Get the Gemini content
    content, content_url, status, meta, error_msg = gemini_fetch_url(url)
    if error_msg:
        # Generate an error page in a data URI
        open_url = qute_error_page(url, error_msg)
    else:
        # Success, convert to HTML in a temporary file
        tmpf = tempfile.NamedTemporaryFile("w", suffix=".html", delete=False)
        tmp_filename = tmpf.name
        tmpf.close()
        with open(tmp_filename, "w") as f:
            f.write(gemtext_to_html(content, content_url, url, status, meta))
        open_url = " file://" + tmp_filename
    # Open the HTML file in qutebrowser
    with open(qute_fifo(), "w") as qfifo:
        qfifo.write("open " + open_args + open_url)


def open_other(url: str, open_args: str) -> None:
    """Open non-Gemini URL in qutebrowser."""
    with open(qute_fifo(), "w") as qfifo:
        qfifo.write("open " + open_args + " " + url)


if __name__ == "__main__":
    # Open in the current or a new tab depending on the script name
    if sys.argv[0].endswith("-tab"):
        open_args = "-t"
    else:
        open_args = ""
    # Select how to open the URL depending on its scheme
    url = qute_url()
    parsed_url = urllib.parse.urlparse(url)
    if parsed_url.scheme == "gemini":
        open_gemini(url, open_args)
    else:
        open_other(url, open_args)