~mendelmaleh/tg_xkcdbot

bc3a95d34a6d646f428a82d57a725f490c3a163b — Mendel E 4 years ago 031ce03
Proper full text search

Also:
- Proper formatting
- Decent error handling
- Download xkcds at start
4 files changed, 166 insertions(+), 56 deletions(-)

M bleve.go
M go.mod
M go.sum
M main.go
M bleve.go => bleve.go +73 -34
@@ 1,9 1,10 @@
package main

import (
	"encoding/json"
	"io/ioutil"
	"path/filepath"
	"encoding/binary"
	"errors"
	"fmt"
	"html"
	"strconv"

	"github.com/blevesearch/bleve"


@@ 11,50 12,85 @@ import (
	"github.com/blevesearch/bleve/analysis/lang/en"

	"git.sr.ht/~mendelmaleh/tgbotapi"
	"git.sr.ht/~mendelmaleh/xkcd"
)

var ErrNoHits = errors.New("no search results")

// FTS queries the bleve index and returns a []interface, its
// elements are tgbotapi.InlineQueryResultPhoto.
func (bot *Bot) FTS(query string, n int) ([]interface{}, error) {
func (bot *Bot) FTS(query string, max int) ([]interface{}, error) {
	search := bleve.NewSearchRequest(bleve.NewQueryStringQuery(query))
	search.Fields = []string{"num", "alt", "img"}
	results, err := bot.Bleve.Search(search)
	if err != nil {
		return make([]interface{}, 0), err
	}

	hits := results.Hits
	if len(hits) > n {
		hits = hits[:n]
	if len(hits) == 0 {
		return make([]interface{}, 0), ErrNoHits
	} else if max > 0 && len(hits) > max {
		hits = hits[:max]
	}

	res := make([]interface{}, len(hits))
	for i, h := range hits {
		r := &tgbotapi.InlineQueryResultPhoto{
			Type: "photo", // must be
		}

		if v, ok := h.Fields["num"].(float64); ok {
			r.ID = strconv.Itoa(int(v))
		d, err := bot.Bleve.Document(h.ID)
		if err != nil {
			return make([]interface{}, 0), err
		}

		if v, ok := h.Fields["alt"].(string); ok {
			r.Caption = v
		var c xkcd.Comic
		for _, f := range d.Fields {
			switch f.Name() {
			case "title":
				c.Title = string(f.Value())
			case "alt":
				c.Alt = string(f.Value())
			case "img":
				c.Img = string(f.Value())
			}
		}

		if v, ok := h.Fields["img"].(string); ok {
			r.ThumbURL = v
			r.URL = v
		res[i] = tgbotapi.InlineQueryResultPhoto{
			Type:      "photo", // must be
			ID:        h.ID,
			URL:       c.Img,
			ThumbURL:  c.Img,
			Title:     c.Title,
			ParseMode: "html",
			Caption: fmt.Sprintf("<a href=\"%s\">#%s:</a> <i>%s</i>",
				xkcd.DefaultBaseURL+h.ID, h.ID, html.EscapeString(c.Alt)),
		}

		res[i] = r
	}

	return res, nil
}

func (bot *Bot) IndexData() error {
	files, err := ioutil.ReadDir(bot.Config.Bleve.Data)
func (bot *Bot) Update() error {
	lastByte, err := bot.Bleve.GetInternal(lastKey)
	if err != nil {
		return err
	}

	var lastIndex int
	switch cap(lastByte) {
	case 0:
		lastIndex = 0
	case 4:
		lastIndex = int(binary.LittleEndian.Uint32(lastByte))
	default:
		return fmt.Errorf(
			"can't get int from lastByte %q, type %T, len %d, cap %d\n",
			lastByte, lastByte, len(lastByte), cap(lastByte),
		)
	}

	if lastIndex == 0 {
		lastIndex = 1
	}

	lastComic, err := bot.XKCD.GetNum(0)
	if err != nil {
		return err
	}


@@ 62,24 98,19 @@ func (bot *Bot) IndexData() error {
	batch := bot.Bleve.NewBatch()
	count := 0

	for _, f := range files {
		n := f.Name()

		// bytes
		jsonBytes, err := ioutil.ReadFile(bot.Config.Bleve.Data + "/" + n)
		if err != nil {
			return err
	for i := int(lastIndex); i <= lastComic.Num; i++ {
		if i == 404 {
			continue
		}

		// json
		var jsonDoc interface{}
		if err := json.Unmarshal(jsonBytes, &jsonDoc); err != nil {
		// get json, marshal
		c, err := bot.XKCD.GetNum(i)
		if err != nil {
			return err
		}

		// index
		id := n[:len(n)-len(filepath.Ext(n))]
		batch.Index(id, jsonDoc)
		batch.Index(strconv.Itoa(c.Num), c)
		count++

		// exec batch


@@ 100,6 131,14 @@ func (bot *Bot) IndexData() error {
		}
	}

	if lastIndex < 0 || lastIndex > 0xffffffff {
		return fmt.Errorf("cannot store %d as uint32\n", lastIndex)
	}

	bs := make([]byte, 4)
	binary.LittleEndian.PutUint32(bs, uint32(lastComic.Num))
	bot.Bleve.SetInternal(lastKey, bs)

	return nil
}


M go.mod => go.mod +8 -1
@@ 2,9 2,12 @@ module git.sr.ht/~mendelmaleh/tg_xkcdbot

go 1.14

replace git.sr.ht/~mendelmaleh/tgbotapi => /home/mendel/go/src/git.sr.ht/~mendelmaleh/tgbotapi

require (
	git.sr.ht/~mendelmaleh/tgbotapi v0.0.0-20200428075811-843aa9fd9bdd
	git.sr.ht/~mendelmaleh/xkcd v0.0.0-20200428044053-ed73e79a6523
	git.sr.ht/~mendelmaleh/xkcd v0.0.0-20200428131247-24696ead7e33
	github.com/RoaringBitmap/roaring v0.4.23 // indirect
	github.com/blevesearch/bleve v1.0.7
	github.com/cznic/b v0.0.0-20181122101859-a26611c4d92d // indirect
	github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548 // indirect


@@ 12,8 15,12 @@ require (
	github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c // indirect
	github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 // indirect
	github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4 // indirect
	github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a // indirect
	github.com/golang/protobuf v1.4.0 // indirect
	github.com/jmhodges/levigo v1.0.0 // indirect
	github.com/pelletier/go-toml v1.7.0
	github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0 // indirect
	github.com/tecbot/gorocksdb v0.0.0-20191217155057-f0fad39f321c // indirect
	github.com/tinylib/msgp v1.1.2 // indirect
	golang.org/x/sys v0.0.0-20200427175716-29b57079015a // indirect
)

M go.sum => go.sum +28 -4
@@ 1,11 1,11 @@
git.sr.ht/~mendelmaleh/tgbotapi v0.0.0-20200428075811-843aa9fd9bdd h1:AbPfssabO/ifZeWJqLYv973PjZ+XGWfgTzArnDEo3Mc=
git.sr.ht/~mendelmaleh/tgbotapi v0.0.0-20200428075811-843aa9fd9bdd/go.mod h1:egXMJiK4m38HNQ01Axpu9FcHZb3bcw0N+ksl1EfA5y8=
git.sr.ht/~mendelmaleh/xkcd v0.0.0-20200428044053-ed73e79a6523 h1:x9CE/GhDn2T8dKzMa27BPMYrJywo8N8X1TRuj8zXsg8=
git.sr.ht/~mendelmaleh/xkcd v0.0.0-20200428044053-ed73e79a6523/go.mod h1:qTClixocAw87fOtZ1s7kBlA7QS1YpUBieq1xNPXAdmk=
git.sr.ht/~mendelmaleh/xkcd v0.0.0-20200428131247-24696ead7e33 h1:jn2EpPbp8A+tuhXOFiMS5B0lK5T+9w4EDOKND0aU8kI=
git.sr.ht/~mendelmaleh/xkcd v0.0.0-20200428131247-24696ead7e33/go.mod h1:qTClixocAw87fOtZ1s7kBlA7QS1YpUBieq1xNPXAdmk=
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/RoaringBitmap/roaring v0.4.21 h1:WJ/zIlNX4wQZ9x8Ey33O1UaD9TCTakYsdLFSBcTwH+8=
github.com/RoaringBitmap/roaring v0.4.21/go.mod h1:D0gp8kJQgE1A4LQ5wFLggQEyvDi06Mq5mKs52e1TwOo=
github.com/RoaringBitmap/roaring v0.4.23 h1:gpyfd12QohbqhFO4NVDUdoPOCXsyahYRQhINmlHxKeo=
github.com/RoaringBitmap/roaring v0.4.23/go.mod h1:D0gp8kJQgE1A4LQ5wFLggQEyvDi06Mq5mKs52e1TwOo=
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
github.com/blevesearch/bleve v1.0.7 h1:4PspZE7XABMSKcVpzAKp0E05Yer1PIYmTWk+1ngNr/c=
github.com/blevesearch/bleve v1.0.7/go.mod h1:3xvmBtaw12Y4C9iA1RTzwWCof5j5HjydjCTiDE2TeE0=


@@ 50,14 50,26 @@ github.com/fsnotify/fsnotify v1.4.7 h1:IXs+QLmnXW2CcXuY+8Mzv/fWEsPGWxqefPtCP5CnV
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 h1:Ujru1hufTHVb++eG6OuNDKMxZnGIvF6o/u8q/8h2+I4=
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a h1:FQqoVvjbiUioBBFUL5up+h+GdCa/AnJsL/1bIs/veSI=
github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8=
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
github.com/golang/protobuf v1.4.0 h1:oOuy+ugB+P/kBdUnG5QaMXSIyJ1q38wWSojYCb3z5VQ=
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99 h1:twflg0XRTjwKpxb/jFExr4HGq6on2dEOmnL6FV+fgPw=
github.com/gopherjs/gopherjs v0.0.0-20190910122728-9d188e94fb99/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=


@@ 111,6 123,8 @@ github.com/technoweenie/multipartstreamer v1.0.1 h1:XRztA5MXiR1TIRHxH2uNxXxaIkKQ
github.com/technoweenie/multipartstreamer v1.0.1/go.mod h1:jNVxdtShOxzAsukZwTSw6MDx5eUJoiEBsSvzDU9uzog=
github.com/tinylib/msgp v1.1.0 h1:9fQd+ICuRIu/ue4vxJZu6/LzxN0HwMds2nq/0cFvxHU=
github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
github.com/tinylib/msgp v1.1.2 h1:gWmO7n0Ys2RBEb7GPYB9Ujq8Mk5p2U08lRnmMcGy6BQ=
github.com/tinylib/msgp v1.1.2/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE=
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
github.com/willf/bitset v1.1.10 h1:NotGKqX0KwQ72NUzqrjZq5ipPNDQex9lo3WpaS8L2sc=
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=


@@ 127,8 141,18 @@ golang.org/x/sys v0.0.0-20181221143128-b4a75ba826a6/go.mod h1:STP8DvDyc/dI5b8T5h
golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5 h1:LfCXLvNmTYH9kEmVgqbnsWfruoXZIrh4YBgqVHtDvw0=
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200427175716-29b57079015a h1:08u6b1caTT9MQY4wSbmsd4Ulm6DmgNYnbImBuZjGJow=
golang.org/x/sys v0.0.0-20200427175716-29b57079015a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
google.golang.org/protobuf v1.21.0 h1:qdOKuR/EIArgaWNjetjgTzgVTAZ+S/WXVrq9HW9zimw=
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/fsnotify.v1 v1.4.7 h1:xOHLXZwVvI9hhs+cLKq5+I5onOuwQLhQwiu63xxlHs4=

M main.go => main.go +57 -17
@@ 1,6 1,9 @@
package main

import (
	"encoding/json"
	"errors"
	"fmt"
	"io/ioutil"
	"log"
	"net/http"


@@ 19,7 22,7 @@ type Config struct {
	}

	Bleve struct {
		Index, Data string
		Index string
	}
}



@@ 33,6 36,8 @@ type Bot struct {
	client *http.Client
}

var lastKey = []byte("last")

func main() {
	bot := &Bot{}



@@ 60,7 65,6 @@ func main() {
		log.Fatal(err)
	}

	// bot.Debug = true
	log.Printf("Authorized on account %s", bot.Self.UserName)

	// get xkcd


@@ 80,36 84,68 @@ func main() {
			log.Fatal(err)
		}

		// index data
		go func(bot *Bot) {
			if err := bot.IndexData(); err != nil {
				log.Fatal(err)
			}
			log.Println("Done indexing data")
		}(bot)
		// init internal last key
		if err := bot.Bleve.SetInternal(lastKey, []byte{}); err != nil {
			log.Fatal(err)
		}

	} else if err != nil {
		log.Fatal(err)
	}

	// index data
	go func(bot *Bot) {
		log.Println("Starting indexing data")
		if err := bot.Update(); err != nil {
			log.Fatal(err)
		}
		log.Println("Done indexing data")
	}(bot)

	// parse updates
	updates := bot.GetUpdatesChan(tgbotapi.UpdateConfig{Timeout: 60})
	for u := range updates {
		if u.InlineQuery != nil {
			q := u.InlineQuery
			query := q.Query

			if q.Query == "" {
			if query == "" {
				continue
			}

			results, err := bot.FTS(q.Query, 5)
			if err != nil {
				continue
			if len(query) > 1 && query[0] == '#' && isDigit(query[1]) {
				query = "num:" + query[1:]
			}

			if len(results) < 1 {
				log.Println("Less than one result")
				continue
			results, err := bot.FTS(query, 0)
			if err != nil {
				switch {
				case err == ErrNoHits:
					desc := fmt.Sprintf("No search results found for %q", query)
					results = make([]interface{}, 1)

					results[0] = tgbotapi.InlineQueryResultArticle{
						Type:                "article", // must be
						ID:                  "ErrNoHits",
						Title:               "No results",
						Description:         desc,
						InputMessageContent: tgbotapi.InputTextMessageContent{Text: desc},
					}
				case err.Error() == "syntax error":
					desc := fmt.Sprintf("Invalid query syntax %q", query)
					results = make([]interface{}, 1)

					results[0] = tgbotapi.InlineQueryResultArticle{
						Type:                "article", // must be
						ID:                  "ErrInvalidSyntax",
						Title:               "Invalid Syntax",
						Description:         desc,
						InputMessageContent: tgbotapi.InputTextMessageContent{Text: desc},
					}
				default:
					log.Printf("%T: %s\n", err, err)
					continue
				}
			}

			api, err := bot.Send(tgbotapi.InlineConfig{


@@ 117,9 153,13 @@ func main() {
				Results:       results,
			})

			if err != nil {
			if err != nil && errors.Is(err, &json.UnmarshalTypeError{}) {
				log.Println(api, err)
			}
		}
	}
}

func isDigit(b byte) bool {
	return '0' <= b && b <= '9'
}