~duncan-bayne/gemlint

8bdd42bbd3cd0e9149cf04c043577279ea575359 — Duncan Bayne 2 years ago d454129
Handle (somewhat) all possible Gemini response codes
1 files changed, 85 insertions(+), 23 deletions(-)

M main.go
M main.go => main.go +85 -23
@@ 5,7 5,9 @@ import (
	"io"
	"net/url"
	"os"
	"strconv"
	"strings"
	"time"

	"github.com/makeworld-the-better-one/go-gemini"
	"golang.org/x/exp/slices"


@@ 23,45 25,105 @@ func crawl(link string) {
	crawledLinks = append(crawledLinks, link)

	client := &gemini.Client{Insecure: true}
	// FIXME: ignoring errors
	response, _ := client.Fetch(link)
	buf := new(strings.Builder)
	io.Copy(buf, response.Body)

	if response.Status == 30 || response.Status == 31 {
	switch response.Status {
	case 10:
		// INPUT
		fallthrough
	case 11:
		// SENSITIVE INPUT
		fmt.Fprintf(os.Stdout, "%s\t%d\n", link, response.Status)
		return
	case 30:
		fallthrough
	case 31:
		fmt.Fprintf(os.Stdout, "%s\t%d\n", link, response.Status)
		crawl(response.Meta)
		return
	}

	if response.Status != 20 {
	case 40:
		// TEMPORARY FAILURE
		fallthrough
	case 41:
		// SERVER UNAVAILABLE
		fallthrough
	case 42:
		// CGI ERROR
		fallthrough
	case 43:
		// PROXY ERROR
		fmt.Fprintf(os.Stderr, "%s\t%d\n", link, response.Status)
		failed = true
		return
	}

	if response.Meta != "text/gemini" {
		fmt.Fprintf(os.Stdout, "%s\t%d\n", link, response.Status)
	case 44:
		// SLOW DOWN
		// BUG: infinite slowdown?
		// FIXME: ignoring errors
		seconds, _ := strconv.ParseFloat(response.Meta, 64)
		duration := time.Duration(seconds * float64(time.Second))
		time.Sleep(duration)
		crawl(link)
	case 50:
		// PERMANENT FAILURE
		fallthrough
	case 51:
		// NOT FOUND
		fallthrough
	case 52:
		// GONE
		fallthrough
	case 53:
		// PROXY REQUEST REFUSED
		fallthrough
	case 59:
		// BAD REQUEST
		fallthrough
	case 60:
		// CLIENT CERTIFICATE REQUIRED
		fallthrough
	case 61:
		// CERTIFICATE NOT AUTHORISED
		fallthrough
	case 62:
		// CERTIFICATE NOT VALID
		fmt.Fprintf(os.Stderr, "%s\t%d\n", link, response.Status)
		failed = true
		return
	}
	case 20:
		if response.Meta != "text/gemini" {
			fmt.Fprintf(os.Stdout, "%s\t%d\n", link, response.Status)
			return
		}

	page, _ := gemtext.ParsePage(buf.String())
	for _, component := range page {
		if component.Path != "" {
			path, _ := url.Parse(component.Path)
			hostname := path.Hostname()
		// FIXME: ignoring errors
		page, _ := gemtext.ParsePage(buf.String())

			if slices.Contains(crawledLinks, path.String()) {
				continue
			}
			crawledLinks = append(crawledLinks, path.String())
		for _, component := range page {
			if component.Path != "" {
				// FIXME: ignoring errors
				path, _ := url.Parse(component.Path)
				hostname := path.Hostname()

				if slices.Contains(crawledLinks, path.String()) {
					continue
				}
				crawledLinks = append(crawledLinks, path.String())

			if hostname == "" {
				absolutePath := rootHostname + path.String()
				crawl(absolutePath)
			} else {
				fmt.Fprintf(os.Stdout, "%s\t \n", path)
				if hostname == "" {
					absolutePath := rootHostname + path.String()
					crawl(absolutePath)
				} else {
					fmt.Fprintf(os.Stdout, "%s\t \n", path)
				}
			}
		}
	default:
		// should never get here
		panic(fmt.Sprintf("unknown Gemini response code: %s\n", response.Status))

	}
}