@@ 5,7 5,9 @@ import (
"io"
"net/url"
"os"
+ "strconv"
"strings"
+ "time"
"github.com/makeworld-the-better-one/go-gemini"
"golang.org/x/exp/slices"
@@ 23,45 25,105 @@ func crawl(link string) {
crawledLinks = append(crawledLinks, link)
client := &gemini.Client{Insecure: true}
+ // FIXME: ignoring errors
response, _ := client.Fetch(link)
buf := new(strings.Builder)
io.Copy(buf, response.Body)
- if response.Status == 30 || response.Status == 31 {
+ switch response.Status {
+ case 10:
+ // INPUT
+ fallthrough
+ case 11:
+ // SENSITIVE INPUT
+ fmt.Fprintf(os.Stdout, "%s\t%d\n", link, response.Status)
+ return
+ case 30:
+ fallthrough
+ case 31:
fmt.Fprintf(os.Stdout, "%s\t%d\n", link, response.Status)
crawl(response.Meta)
return
- }
-
- if response.Status != 20 {
+ case 40:
+ // TEMPORARY FAILURE
+ fallthrough
+ case 41:
+ // SERVER UNAVAILABLE
+ fallthrough
+ case 42:
+ // CGI ERROR
+ fallthrough
+ case 43:
+ // PROXY ERROR
fmt.Fprintf(os.Stderr, "%s\t%d\n", link, response.Status)
failed = true
return
- }
-
- if response.Meta != "text/gemini" {
- fmt.Fprintf(os.Stdout, "%s\t%d\n", link, response.Status)
+ case 44:
+ // SLOW DOWN
+ // BUG: infinite slowdown?
+ // FIXME: ignoring errors
+ seconds, _ := strconv.ParseFloat(response.Meta, 64)
+ duration := time.Duration(seconds * float64(time.Second))
+ time.Sleep(duration)
+ crawl(link)
+ case 50:
+ // PERMANENT FAILURE
+ fallthrough
+ case 51:
+ // NOT FOUND
+ fallthrough
+ case 52:
+ // GONE
+ fallthrough
+ case 53:
+ // PROXY REQUEST REFUSED
+ fallthrough
+ case 59:
+ // BAD REQUEST
+ fallthrough
+ case 60:
+ // CLIENT CERTIFICATE REQUIRED
+ fallthrough
+ case 61:
+ // CERTIFICATE NOT AUTHORISED
+ fallthrough
+ case 62:
+ // CERTIFICATE NOT VALID
+ fmt.Fprintf(os.Stderr, "%s\t%d\n", link, response.Status)
+ failed = true
return
- }
+ case 20:
+ if response.Meta != "text/gemini" {
+ fmt.Fprintf(os.Stdout, "%s\t%d\n", link, response.Status)
+ return
+ }
- page, _ := gemtext.ParsePage(buf.String())
- for _, component := range page {
- if component.Path != "" {
- path, _ := url.Parse(component.Path)
- hostname := path.Hostname()
+ // FIXME: ignoring errors
+ page, _ := gemtext.ParsePage(buf.String())
- if slices.Contains(crawledLinks, path.String()) {
- continue
- }
- crawledLinks = append(crawledLinks, path.String())
+ for _, component := range page {
+ if component.Path != "" {
+ // FIXME: ignoring errors
+ path, _ := url.Parse(component.Path)
+ hostname := path.Hostname()
+
+ if slices.Contains(crawledLinks, path.String()) {
+ continue
+ }
+ crawledLinks = append(crawledLinks, path.String())
- if hostname == "" {
- absolutePath := rootHostname + path.String()
- crawl(absolutePath)
- } else {
- fmt.Fprintf(os.Stdout, "%s\t \n", path)
+ if hostname == "" {
+ absolutePath := rootHostname + path.String()
+ crawl(absolutePath)
+ } else {
+ fmt.Fprintf(os.Stdout, "%s\t \n", path)
+ }
}
}
+ default:
+ // should never get here
+ panic(fmt.Sprintf("unknown Gemini response code: %s\n", response.Status))
+
}
}