~ols/cramp

de4c5ab2643ddf2dab3dc690c85dfb82d91931db — Oliver Leaver-Smith 2 years ago 4dd390c + 933cb02 master
Use shiroi readability and add page title
2 files changed, 31 insertions(+), 16 deletions(-)

M README.md
M cramp.go
M README.md => README.md +7 -5
@@ 2,22 2,24 @@

## A web=>gemini proxy that does a half-decent job of converting web pages to Gemtext

### Running
You can see an demo of this running by pointing your Gemini browser at gemini://ols.wtf:5555

1. Generate some TLS certificates in any manner you like, e.g.
Alternatively, you can view this page through the proxy by clicking [here](gemini://ols.wtf:5555?https%3A%2F%2Fgit.sr.ht%2F~ols%2Fcramp)... how meta!

### Generate some TLS certificates in any manner you like, e.g.

```
openssl ecparam -genkey -name prime256v1 -out server.key
openssl req -new -x509 -key server.key -out server.pem -days 3650
```

2. Set the following environment variables
### Set the following environment variables

* `PORT`: port on which the server will run (defaults to 1965)
* `CRAMP_ROOT`: the root at which you will be running the server, e.g. **gemini://cramp.demo/** (defaults to gemini://localhost/)

3. You can then run with `go run cramp.go`
You can then run with `go run cramp.go`

### //TODO
## //TODO

[] Add better^H^H^H^H^H^H logging

M cramp.go => cramp.go +24 -11
@@ 2,6 2,7 @@ package main

import (
	"bufio"
	"bytes"
	"crypto/tls"
	"fmt"
	"io/ioutil"


@@ 14,7 15,7 @@ import (
	"strings"

	"github.com/LukeEmmet/html2gemini"
	"github.com/cixtor/readability"
	readability "github.com/go-shiori/go-readability"
)

func main() {


@@ 65,14 66,14 @@ func main() {
					return
				} else {
					query := strings.Join(split[1:], "?")
					url, err := url.QueryUnescape(query)
					urlQuery, err := url.QueryUnescape(query)
					if err != nil {
						returned = "20 text/gemini; charset=utf-8\r\n# Uh oh\nUnable to read that URL, sorry\n=> / Try another"
						c.Write([]byte(returned))
						c.Close()
						return
					}
					response, err := http.Get(url)
					response, err := http.Get(urlQuery)
					if err != nil {
						returned = "20 text/gemini; charset=utf-8\r\n# Uh oh\nUnable to visit that URL, sorry\n=> / Try another"
						c.Write([]byte(returned))


@@ 80,7 81,19 @@ func main() {
						return
					}
					defer response.Body.Close()
					text, err := readability.New().Parse(response.Body, url)
					bodyBytes, _ := ioutil.ReadAll(response.Body)
					mimeType := http.DetectContentType(bodyBytes)
					mimere := regexp.MustCompile(`text/.*`)
					if !mimere.MatchString(mimeType) {
						returned = "20 " + mimeType + "\r\n" + string(bodyBytes)
						c.Write([]byte(returned))
						c.Close()
						return
					}
					fmt.Printf("Trying to read %s\n", urlQuery)
					body := bytes.NewReader(bodyBytes)
					urlAsURL, _ := url.Parse(urlQuery)
					text, err := readability.FromReader(body, urlAsURL)
					if err != nil {
						returned = "20 text/gemini; charset=utf-8\r\n# Uh oh\nUnable to parse that page, sorry\n=> / Try another"
						c.Write([]byte(returned))


@@ 94,26 107,26 @@ func main() {
						CitationStart:       1,
						CitationMarkers:     true,
						NumberedLinks:       true,
						LinkEmitFrequency:   0,
						EmitImagesAsLinks:   false,
						LinkEmitFrequency:   9999,
						EmitImagesAsLinks:   true,
						ImageMarkerPrefix:   "‡",
						EmptyLinkPrefix:     ">>",
					}
					ctx := html2gemini.NewTraverseContext(options)
					fmt.Printf("Trying to gemtextify %s\n", urlQuery)
					gemtext, err := html2gemini.FromString(text.Content, *ctx)
					re := regexp.MustCompile(`=> http`)
					httpReplace := "=> " + root + "/?http"
					gemtext = re.ReplaceAllString(gemtext, httpReplace)
					if err != nil {
						returned = "20 text/gemini; charset=utf-8\r\n# Uh oh\nUnable to convert that site to Gemtext, sorry"
						c.Write([]byte(returned))
						c.Close()
						return
					}
					returned = "20 text/gemini; charset=utf-8\r\n" + gemtext
					re := regexp.MustCompile(`=> http`)
					httpReplace := "=> " + root + "/?http"
					gemtext = re.ReplaceAllString(gemtext, httpReplace)
					returned = "20 text/gemini; charset=utf-8\r\n# " + text.Title + "\n\n" + gemtext
					c.Write([]byte(returned))
					c.Close()

				}
			}
		}(conn)