package seculardb import ( "bytes" "fmt" "strings" "sync" "golang.org/x/net/html" "golang.org/x/net/html/atom" "git.sr.ht/~hokiegeek/htmlscrape" ) // GuideURL is the URL for the WP page that has the Secular Homeschool Guide const GuideURL = "https://www.secularhomeschooler.com/secular-homeschool-guide/" // Build creates a DB object out of the page func Build() (db DB, err error) { db.Entries = make([]Entry, 0) var mu sync.Mutex var wg sync.WaitGroup rows := make(chan *html.Node, 5) for w := 1; w <= 20; w++ { wg.Add(1) go func() { defer wg.Done() colMatcher := func(n *html.Node, name string) *html.Node { return htmlscrape.FindNode(n, htmlscrape.NewNodeMatcher().Elem().Atom(atom.Td).Attr("class", name)) } col := func(n *html.Node, name string) string { td := colMatcher(n, name) var buf bytes.Buffer for c := td.FirstChild; c != nil; c = c.NextSibling { switch { case c.Type == html.TextNode && strings.Contains(c.Data, "wp-content"): fallthrough case c.Type == html.ElementNode && c.DataAtom == atom.Center: b := htmlscrape.FindNode(td, htmlscrape.NewNodeMatcher().Elem().Atom(atom.B)) switch { case b == nil: buf.WriteString(c.FirstChild.Data) default: buf.WriteString(b.FirstChild.Data) } case c.Type == html.TextNode: buf.WriteString(c.Data) } } return buf.String() } for tr := range rows { name := col(tr, "column-1") gradeLevels := col(tr, "column-3") // []string? subjects := col(tr, "column-4") // []string desc := col(tr, "column-5") linkNode := htmlscrape.FindNode(colMatcher(tr, "column-5"), htmlscrape.NewNodeMatcher().Atom(atom.A)) var link string if linkNode != nil { for _, attr := range linkNode.Attr { if attr.Key == "href" { link = attr.Val if !strings.HasPrefix(link, "http") { link = "http://" + link } } } } ratingStr := strings.ToLower(col(tr, "column-2")) var rating int switch { case ratingStr == "": fallthrough case strings.Contains(ratingStr, "unconfirmed") || strings.Contains(ratingStr, "not confirmed") || ratingStr == "n/a": rating = 0 case strings.Contains(ratingStr, "not secular"): rating = 1 case strings.Contains(ratingStr, "can be secular."): fallthrough case strings.Contains(ratingStr, "neutral") || strings.Contains(ratingStr, "questionable"): fallthrough case strings.Contains(ratingStr, "the \"bible\" version is obviously not secular."): fallthrough case strings.Contains(ratingStr, "okay, so the book itself is technically secular"): rating = 2 case strings.Contains(ratingStr, "mostly secular") || strings.Contains(ratingStr, "sorta secular?"): rating = 3 case ratingStr == "secular": rating = 4 case strings.Contains(ratingStr, "super secular!"): rating = 5 default: fmt.Printf("[%s] rating: %s\n", name, ratingStr) panic("shit") } entry := Entry{ Name: name, Rating: Rating(rating), GradeLevels: strings.Split(strings.Replace(gradeLevels, " ", "", -1), ","), Subjects: strings.Split(strings.Replace(subjects, " ", "", -1), ","), Description: strings.Replace(desc, " |", "", -1), URL: link, } mu.Lock() db.Entries = append(db.Entries, entry) mu.Unlock() } }() } if err = htmlscrape.TableRows(GuideURL, rows, htmlscrape.NewNodeMatcher().Attr("id", "tablepress-1")); err == nil { wg.Wait() } return }