~hokiegeek/htmlscrape

092eb1e45f5df4b502efcd514ba22e89fa48dc55 — HokieGeek 6 months ago f87372e v0.3.3
Fixed some issues with generating a matcher
2 files changed, 40 insertions(+), 7 deletions(-)

M scraper.go
A scraper_test.go
M scraper.go => scraper.go +14 -7
@@ 12,17 12,20 @@ import (

// NodeMatcher allows you to build a set of criteria to match against a node
type NodeMatcher struct {
	nodeType html.NodeType
	dataAtom atom.Atom
	attr     map[string]string
	checkType, checkAtom bool
	nodeType             html.NodeType
	dataAtom             atom.Atom
	attr                 map[string]string
}

func (m *NodeMatcher) Type(v html.NodeType) *NodeMatcher {
	m.checkType = true
	m.nodeType = v
	return m
}

func (m *NodeMatcher) Atom(v atom.Atom) *NodeMatcher {
	m.checkAtom = true
	m.dataAtom = v
	return m
}


@@ 38,11 41,11 @@ func (m *NodeMatcher) Attrs(v map[string]string) *NodeMatcher {
}

func (m *NodeMatcher) matches(n *html.Node) (_ bool) {
	if n.Type != m.nodeType {
	if m.checkType && n.Type != m.nodeType {
		return
	}

	if n.DataAtom == m.dataAtom {
	if m.checkAtom && n.DataAtom != m.dataAtom {
		return
	}



@@ 69,13 72,17 @@ func NewNodeMatcher() *NodeMatcher {
}

func FindNode(n *html.Node, matcher *NodeMatcher) *html.Node {
	if n == nil || matcher == nil {
		return nil
	}

	if matcher.matches(n) {
		return n
	}

	for c := n.FirstChild; c != nil; c = c.NextSibling {
		if n := FindNode(c, matcher); n != nil {
			return n
		if found := FindNode(c, matcher); found != nil {
			return found
		}
	}


A scraper_test.go => scraper_test.go +26 -0
@@ 0,0 1,26 @@
package htmlscrape

import (
	"testing"

	"golang.org/x/net/html"
)

const testURL = "https://www.secularhomeschooler.com/secular-homeschool-guide/"

func TestTableRows(t *testing.T) {
	t.Skip("this is not a good test")
	rows := make(chan *html.Node, 5)

	var err error
	go func() {
		err = TableRows(testURL, rows, NewNodeMatcher().Attr("id", "tablepress-1"))
	}()

	for tr := range rows {
		func(n *html.Node) {}(tr)
	}
	if err != nil {
		t.Error(err)
	}
}