~elektito/gemplex

065e7419e5d0a8346d50c0f7f3fac33f2f531813 β€” Mostafa Razavi 7 months ago d1df491
Add random ascii art page and permalink
5 files changed, 258 insertions(+), 17 deletions(-)

M cmd/gemplex/crawl.go
M cmd/gemplex/main.go
M cmd/gemplex/search.go
M cmd/gpcgi/main.go
M pkg/gsearch/gsearch.go
M cmd/gemplex/crawl.go => cmd/gemplex/crawl.go +0 -8
@@ 69,7 69,6 @@ func errGeminiSlowdown(meta string) *GeminiSlowdownError {
var _ error = (*GeminiSlowdownError)(nil)

var ErrRobotsBackoff = fmt.Errorf("Backing off from fetching robots.txt")
var Db *sql.DB

func readGemini(ctx context.Context, client *gemini.Client, u *url.URL, visitorId string) (body []byte, code int, meta string, finalUrl *url.URL, err error) {
	redirs := 0


@@ 986,13 985,6 @@ func dumpCrawlerState(filename string, nprocs int, urls [][]string) {
func crawl(done chan bool, wg *sync.WaitGroup) {
	defer wg.Done()

	// open (and check) database for all workers to use
	var err error
	Db, err = sql.Open("postgres", Config.GetDbConnStr())
	utils.PanicOnErr(err)
	err = Db.Ping()
	utils.PanicOnErr(err)

	nprocs := 500

	// create an array of channel, which will each serve as the input to each

M cmd/gemplex/main.go => cmd/gemplex/main.go +11 -0
@@ 1,6 1,7 @@
package main

import (
	"database/sql"
	"flag"
	"fmt"
	"log"


@@ 14,10 15,12 @@ import (

	"git.sr.ht/~elektito/gemplex/pkg/config"
	"git.sr.ht/~elektito/gemplex/pkg/gcrawler"
	"git.sr.ht/~elektito/gemplex/pkg/utils"
)

var Config *config.Config
var CrawlerStateFile *string
var Db *sql.DB

func main() {
	configFile := flag.String("config", "", "config file")


@@ 30,6 33,14 @@ func main() {
	flag.Parse()

	Config = config.LoadConfig(*configFile)

	// open (and check) database for all workers to use
	var err error
	Db, err = sql.Open("postgres", Config.GetDbConnStr())
	utils.PanicOnErr(err)
	err = Db.Ping()
	utils.PanicOnErr(err)

	updateBlacklist()

	var cmds []string

M cmd/gemplex/search.go => cmd/gemplex/search.go +99 -9
@@ 8,11 8,16 @@ import (
	"net"
	"os"
	"sync"
	"time"

	"git.sr.ht/~elektito/gemplex/pkg/gsearch"
	"git.sr.ht/~elektito/gemplex/pkg/utils"
)

type TypedRequest struct {
	Type string `json:"t"`
}

func search(done chan bool, wg *sync.WaitGroup) {
	defer wg.Done()



@@ 59,31 64,116 @@ func handleConn(conn net.Conn) {
		return
	}

	reqLine := scanner.Bytes()
	log.Println("Request:", scanner.Text())

	var req TypedRequest
	req.Type = "search"
	err := json.Unmarshal(reqLine, &req)
	if err != nil {
		conn.Write([]byte("bad request"))
		return
	}

	var resp []byte
	switch req.Type {
	case "search":
		resp = handleSearchRequest(reqLine)
	case "randimg":
		resp = handleRandImgRequest(reqLine)
	case "getimg":
		resp = handleGetImgRequest(reqLine)
	default:
		resp = errorResponse("unknown request type")
		return
	}

	resp = append(resp, byte('\n'))
	conn.Write(resp)
}

func handleSearchRequest(reqLine []byte) []byte {
	var req gsearch.SearchRequest
	req.Page = 1
	err := json.Unmarshal(scanner.Bytes(), &req)
	err := json.Unmarshal(reqLine, &req)
	if err != nil {
		conn.Write(errorResponse("bad request"))
		return
		return errorResponse("bad request")
	}

	if req.Query == "" {
		conn.Write(errorResponse("no query"))
		return
		return errorResponse("no query")
	}

	resp, err := gsearch.Search(req, idx)
	if err != nil {
		conn.Write(errorResponse(err.Error()))
		return
		return errorResponse(err.Error())
	}

	jsonResp, err := json.Marshal(resp)
	if err != nil {
		return errorResponse(fmt.Sprintf("Error marshalling results: %s", err))
	}

	return jsonResp
}

func handleRandImgRequest(reqLine []byte) []byte {
	var resp struct {
		Url       string    `json:"url"`
		Alt       string    `json:"alt"`
		Image     string    `json:"image"`
		FetchTime time.Time `json:"fetch_time"`
		ImageId   string    `json:"image_id"`
	}

	row := Db.QueryRow(`
select * from
	(select url, alt, image_hash, image, fetch_time from images tablesample bernoulli(1)) s
order by random() limit 1;
`)
	err := row.Scan(&resp.Url, &resp.Alt, &resp.ImageId, &resp.Image, &resp.FetchTime)
	if err != nil {
		return errorResponse(fmt.Sprintf("Database error: %s", err))
	}

	jsonResp, err := json.Marshal(resp)
	if err != nil {
		return errorResponse(fmt.Sprintf("Error marshalling results: %s", err))
	}

	return jsonResp
}

func handleGetImgRequest(reqLine []byte) []byte {
	var req struct {
		Id string `json:"id"`
	}

	var resp struct {
		Url       string    `json:"url"`
		Alt       string    `json:"alt"`
		Image     string    `json:"image"`
		FetchTime time.Time `json:"fetch_time"`
		ImageId   string    `json:"image_id"`
	}

	err := json.Unmarshal(reqLine, &req)
	if err != nil {
		return errorResponse("bad request")
	}

	err = json.NewEncoder(conn).Encode(resp)
	row := Db.QueryRow(`select url, alt, image_hash, image, fetch_time from images where image_hash = $1`, req.Id)
	err = row.Scan(&resp.Url, &resp.Alt, &resp.ImageId, &resp.Image, &resp.FetchTime)
	if err != nil {
		conn.Write(errorResponse(fmt.Sprintf("Error marshalling results: %s", err)))
		return errorResponse(fmt.Sprintf("Database error: %s", err))
	}

	jsonResp, err := json.Marshal(resp)
	if err != nil {
		return errorResponse(fmt.Sprintf("Error marshalling results: %s", err))
	}

	return jsonResp
}

func errorResponse(msg string) (resp []byte) {

M cmd/gpcgi/main.go => cmd/gpcgi/main.go +145 -0
@@ 14,6 14,7 @@ import (
	"os"
	"regexp"
	"strconv"
	"strings"
	"text/template"
	"time"



@@ 82,6 83,147 @@ func cgi(r io.Reader, w io.Writer, params Params) {
		return
	}

	switch {
	case strings.HasPrefix(u.Path, "/search"):
		fallthrough
	case strings.HasPrefix(u.Path, "/v/search"):
		handleSearch(u, r, w, params)
	case strings.HasPrefix(u.Path, "/image/random"):
		handleRandomImage(u, r, w, params)
	case strings.HasPrefix(u.Path, "/image/perm/"):
		handleImagePermalink(u, r, w, params)
	default:
		geminiHeader(w, 51, "Not found")
	}
}

func handleRandomImage(u *url.URL, r io.Reader, w io.Writer, params Params) {
	var req struct {
		Type string `json:"t"`
	}

	var resp struct {
		Url       string    `json:"url"`
		Alt       string    `json:"alt"`
		Image     string    `json:"image"`
		FetchTime time.Time `json:"fetch_time"`
		ImageId   string    `json:"image_id"`
	}

	conn, err := net.Dial("unix", params.SearchDaemonSocket)
	if err != nil {
		log.Println("Cannot connect to search backend:", err)
		cgiErr(w, "Cannot connect to search backend")
		return
	}

	req.Type = "randimg"
	err = json.NewEncoder(conn).Encode(req)
	if err != nil {
		log.Println("Error encoding search request:", err)
		cgiErr(w, "Internal error")
		return
	}

	err = json.NewDecoder(conn).Decode(&resp)
	if err != nil {
		cgiErr(w, "Internal error")
		return
	}

	t := `# πŸ–ΌοΈ Gemplex - Random Gemini Image

XXX {{ .Alt }}
{{ .Image }}
XXX

{{ if .Alt }}Alt: {{ .Alt }}{{ else }}No alt text.{{ end }}

Fetched at {{ .FetchTime }} from:
=> {{ .Url }} Source

=> /image/perm/{{ .ImageId }} ♾️ Permalink
=> / 🏠 Gemplex Home
`
	t = strings.Replace(t, "XXX", "```", 2)
	tmpl := template.Must(template.New("root").Parse(t))

	var out bytes.Buffer
	err = tmpl.Execute(&out, resp)
	utils.PanicOnErr(err)

	geminiHeader(w, 20, "text/gemini")
	w.Write(out.Bytes())
}

func handleImagePermalink(u *url.URL, r io.Reader, w io.Writer, params Params) {
	var req struct {
		Type string `json:"t"`
		Id   string `json:"id"`
	}

	var resp struct {
		Url       string    `json:"url"`
		Alt       string    `json:"alt"`
		Image     string    `json:"image"`
		FetchTime time.Time `json:"fetch_time"`
		ImageId   string    `json:"image_id"`
	}

	conn, err := net.Dial("unix", params.SearchDaemonSocket)
	if err != nil {
		log.Println("Cannot connect to search backend:", err)
		cgiErr(w, "Cannot connect to search backend")
		return
	}

	req.Type = "getimg"
	req.Id = u.Path[len("/image/perm/"):]
	err = json.NewEncoder(conn).Encode(req)
	if err != nil {
		log.Println("Error encoding search request:", err)
		cgiErr(w, "Internal error")
		return
	}

	err = json.NewDecoder(conn).Decode(&resp)
	if err != nil {
		log.Println("Internal error:", err)
		cgiErr(w, "Internal error")
		return
	}

	if resp.ImageId == "" {
		geminiHeader(w, 51, "Not found")
		return
	}

	t := `# πŸ–ΌοΈ Gemplex - Random Gemini Image

XXX {{ .Alt }}
{{ .Image }}
XXX

{{ if .Alt }}Alt: {{ .Alt }}{{ else }}No alt text.{{ end }}

Fetched at {{ .FetchTime }} from:
=> {{ .Url }} Source

=> /image/random πŸ”€ Random Image
=> / 🏠 Gemplex Home
`
	t = strings.Replace(t, "XXX", "```", 2)
	tmpl := template.Must(template.New("root").Parse(t))

	var out bytes.Buffer
	err = tmpl.Execute(&out, resp)
	utils.PanicOnErr(err)

	geminiHeader(w, 20, "text/gemini")
	w.Write(out.Bytes())
}

func handleSearch(u *url.URL, r io.Reader, w io.Writer, params Params) {
	if u.RawQuery == "" {
		geminiHeader(w, 10, "Search query")
		return


@@ 95,6 237,7 @@ func cgi(r io.Reader, w io.Writer, params Params) {
		geminiHeader(w, 59, "Bad URL")
		return
	} else if err != nil {
		log.Println("Internal error:", err)
		cgiErr(w, "Internal error")
		return
	}


@@ 116,6 259,7 @@ func cgi(r io.Reader, w io.Writer, params Params) {
	var resp gsearch.SearchResponse
	err = json.NewDecoder(conn).Decode(&resp)
	if err != nil {
		log.Println("Internal error:", err)
		cgiErr(w, "Internal error")
		return
	}


@@ 250,6 394,7 @@ func parseSearchRequest(u *url.URL) (req gsearch.SearchRequest, err error) {
	}

	// default value
	req.Type = "search"
	req.Page = 1

	for i, name := range re.SubexpNames() {

M pkg/gsearch/gsearch.go => pkg/gsearch/gsearch.go +3 -0
@@ 41,6 41,9 @@ type RankedSort struct {
}

type SearchRequest struct {
	// for search requests, this should be "search"
	Type string `json:"t"`

	Query          string `json:"q"`
	Page           int    `json:"page,omitempty"`
	HighlightStyle string `json:"-"`