package seculardb
import (
"bytes"
"fmt"
"strings"
"sync"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
"git.sr.ht/~hokiegeek/htmlscrape"
)
// GuideURL is the URL for the WP page that has the Secular Homeschool Guide
const GuideURL = "https://www.secularhomeschooler.com/secular-homeschool-guide/"
// Build creates a DB object out of the page
func Build() (db DB, err error) {
db.Entries = make([]Entry, 0)
var mu sync.Mutex
var wg sync.WaitGroup
rows := make(chan *html.Node, 5)
for w := 1; w <= 20; w++ {
wg.Add(1)
go func() {
defer wg.Done()
colMatcher := func(n *html.Node, name string) *html.Node {
return htmlscrape.FindNode(n, htmlscrape.NewNodeMatcher().Type(html.ElementNode).Atom(atom.Td).Attr("class", name))
}
col := func(n *html.Node, name string) string {
td := colMatcher(n, name)
var buf bytes.Buffer
for c := td.FirstChild; c != nil; c = c.NextSibling {
switch {
case c.Type == html.TextNode && strings.Contains(c.Data, "wp-content"):
fallthrough
case c.Type == html.ElementNode && c.DataAtom == atom.Center:
b := htmlscrape.FindNode(td, htmlscrape.NewNodeMatcher().Type(html.ElementNode).Atom(atom.B))
switch {
case b == nil:
buf.WriteString(c.FirstChild.Data)
default:
buf.WriteString(b.FirstChild.Data)
}
case c.Type == html.TextNode:
buf.WriteString(c.Data)
}
}
return buf.String()
}
for tr := range rows {
name := col(tr, "column-1")
gradeLevels := col(tr, "column-3") // []string?
subjects := col(tr, "column-4") // []string
desc := col(tr, "column-5")
linkNode := htmlscrape.FindNode(colMatcher(tr, "column-5"), htmlscrape.NewNodeMatcher().Atom(atom.A))
var link string
if linkNode != nil {
for _, attr := range linkNode.Attr {
if attr.Key == "href" {
link = attr.Val
if !strings.HasPrefix(link, "http") {
link = "http://" + link
}
}
}
}
ratingStr := strings.ToLower(col(tr, "column-2"))
var rating int
switch {
case ratingStr == "":
fallthrough
case strings.Contains(ratingStr, "unconfirmed") || strings.Contains(ratingStr, "not confirmed") || ratingStr == "n/a":
rating = 0
case strings.Contains(ratingStr, "not secular"):
rating = 1
case strings.Contains(ratingStr, "can be secular."):
fallthrough
case strings.Contains(ratingStr, "neutral") || strings.Contains(ratingStr, "questionable"):
fallthrough
case strings.Contains(ratingStr, "the \"bible\" version is obviously not secular."):
fallthrough
case strings.Contains(ratingStr, "okay, so the book itself is technically secular"):
rating = 2
case strings.Contains(ratingStr, "mostly secular") || strings.Contains(ratingStr, "sorta secular?"):
rating = 3
case ratingStr == "secular":
rating = 4
case strings.Contains(ratingStr, "super secular!"):
rating = 5
default:
fmt.Printf("[%s] rating: %s\n", name, ratingStr)
panic("shit")
}
entry := Entry{
Name: name,
Rating: Rating(rating),
GradeLevels: strings.Split(strings.Replace(gradeLevels, " ", "", -1), ","),
Subjects: strings.Split(strings.Replace(subjects, " ", "", -1), ","),
Description: desc,
URL: link,
}
mu.Lock()
db.Entries = append(db.Entries, entry)
mu.Unlock()
}
}()
}
if err = htmlscrape.TableRows(GuideURL, rows, htmlscrape.NewNodeMatcher().Attr("id", "tablepress-1")); err == nil {
wg.Wait()
}
return
}