~jboverfelt/idx2rss

18599b72e5873c976b51401449af4c7cdadcce7e — Justin Overfelt 2 years ago
initial commit
4 files changed, 159 insertions(+), 0 deletions(-)

A .gitignore
A go.mod
A go.sum
A main.go
A  => .gitignore +2 -0
@@ 1,2 @@
idx2rss
*.swp

A  => go.mod +10 -0
@@ 1,10 @@
module git.sr.ht/~jboverfelt/idx2rss

go 1.13

require (
	github.com/gorilla/feeds v1.1.1
	github.com/russross/blackfriday/v2 v2.0.1
	github.com/shurcooL/sanitized_anchor_name v1.0.0 // indirect
	golang.org/x/net v0.0.0-20200625001655-4c5254603344
)

A  => go.sum +16 -0
@@ 1,16 @@
github.com/gorilla/feeds v1.1.1 h1:HwKXxqzcRNg9to+BbvJog4+f3s/xzvtZXICcQGutYfY=
github.com/gorilla/feeds v1.1.1/go.mod h1:Nk0jZrvPFZX1OBe5NPiddPw7CfwF6Q9eqzaBbaightA=
github.com/russross/blackfriday v2.0.0+incompatible h1:cBXrhZNUf9C+La9/YpS+UHpUT8YD6Td9ZMSU9APFcsk=
github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20200625001655-4c5254603344 h1:vGXIOMxbNfDTk/aXCmfdLgkrSV+Z2tcbze+pEc3v5W4=
golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

A  => main.go +131 -0
@@ 1,131 @@
package main

import (
	"fmt"
	"io"
	"io/ioutil"
	"os"
	"path/filepath"
	"strings"
	"time"

	"github.com/gorilla/feeds"
	"github.com/russross/blackfriday/v2"
	"golang.org/x/net/html"
)

func check(err error) {
	if err != nil {
		panic(err)
	}
}

func main() {
	if len(os.Args) != 5 {
		fmt.Println("usage: idx2rss [src dir] [url prefix] [feed title] [feed description]")
		os.Exit(1)
	}

	srcPath := os.Args[1]
	urlPrefix := os.Args[2]
	feedTitle := os.Args[3]
	feedDesc := os.Args[4]

	feed := &feeds.Feed{
		Title:       feedTitle,
		Link:        &feeds.Link{Href: urlPrefix},
		Description: feedDesc,
		Created:     time.Now(),
	}

	// read srcdir/index.html and parse html
	f, err := os.Open(filepath.Join(srcPath, "index.html"))
	check(err)
	defer f.Close()

	err = fillFeed(f, feed, srcPath, urlPrefix)
	check(err)

	err = feed.WriteRss(os.Stdout)
	check(err)
}

func fillFeed(r io.Reader, feed *feeds.Feed, srcPath, urlPrefix string) error {
	hr := html.NewTokenizer(r)
	var inMain bool
	var inPost bool
	var curDate string
	var curHref string
	for {
		curType := hr.Next()
		if curType == html.ErrorToken {
			if hr.Err() == io.EOF {
				break
			}

			return hr.Err()
		}

		curTok := hr.Token()
		if curTok.Type == html.StartTagToken && curTok.Data == "main" {
			inMain = true
		} else if inMain && curTok.Type == html.StartTagToken && curTok.Data == "a" {
			inPost = true
			for _, attr := range curTok.Attr {
				if attr.Key == "data-date" {
					curDate = attr.Val
				} else if attr.Key == "href" {
					curHref = attr.Val
				}
			}
		} else if inPost && curTok.Type == html.TextToken {
			// a post must have a link and a date
			// to be part of the feed
			if curDate == "" || curHref == "" {
				continue
			}

			mdHref := strings.TrimSuffix(curHref, filepath.Ext(curHref)) + ".md"
			postHTML, err := getPostHTML(filepath.Join(srcPath, mdHref))
			if err != nil {
				return err
			}

			curTs, err := time.Parse(time.RFC1123Z, curDate)
			if err != nil {
				return err
			}

			feed.Add(&feeds.Item{
				Title:       curTok.Data,
				Link:        &feeds.Link{Href: urlPrefix + curHref},
				Id:          urlPrefix + curHref,
				Description: postHTML,
				Created:     curTs,
			})
		} else if inMain && curTok.Type == html.EndTagToken && curTok.Data == "a" {
			inPost = false
			curDate = ""
			curHref = ""
		} else if inMain && curTok.Type == html.EndTagToken && curTok.Data == "main" {
			inMain = false
		}
	}

	return nil
}

func getPostHTML(postPath string) (string, error) {
	f, err := os.Open(postPath)
	if err != nil {
		return "", err
	}
	defer f.Close()

	md, err := ioutil.ReadAll(f)
	if err != nil {
		return "", err
	}

	return string(blackfriday.Run(md)), nil
}