~hokiegeek/htmlscrape

6f54e8b31b3d4f6cffde68f4ba77af21b2e7b711 — HokieGeek 6 months ago 092eb1e v0.4.0
Added better commenting and added convenience methods to NodeMatcher
1 files changed, 19 insertions(+), 2 deletions(-)

M scraper.go
M scraper.go => scraper.go +19 -2
@@ 18,23 18,37 @@ type NodeMatcher struct {
	attr                 map[string]string
}

// Elem specifies ElementNode as matching criteria
func (m *NodeMatcher) Elem() *NodeMatcher {
	return m.Type(html.ElementNode)
}

// Text specifies TextNode as matching criteria
func (m *NodeMatcher) Text() *NodeMatcher {
	return m.Type(html.TextNode)
}

// Type allows you to add NodeType as matching criteria
func (m *NodeMatcher) Type(v html.NodeType) *NodeMatcher {
	m.checkType = true
	m.nodeType = v
	return m
}

// Atom allows you to add DataAtom as matching criteria
func (m *NodeMatcher) Atom(v atom.Atom) *NodeMatcher {
	m.checkAtom = true
	m.dataAtom = v
	return m
}

// Attr allows you to add an attribute as matching criteria
func (m *NodeMatcher) Attr(k, v string) *NodeMatcher {
	m.attr[k] = v
	return m
}

// Attrs allows you to add a slice of attributes as matching criteria
func (m *NodeMatcher) Attrs(v map[string]string) *NodeMatcher {
	m.attr = v
	return m


@@ 65,12 79,14 @@ func (m *NodeMatcher) matches(n *html.Node) (_ bool) {
	return true
}

// NewNodeMatcher initializes a new NodeMatcher object
func NewNodeMatcher() *NodeMatcher {
	m := new(NodeMatcher)
	m.attr = make(map[string]string)
	return m
}

// FindNode recursively searches for a node which matches the given NodeMatcher
func FindNode(n *html.Node, matcher *NodeMatcher) *html.Node {
	if n == nil || matcher == nil {
		return nil


@@ 89,6 105,7 @@ func FindNode(n *html.Node, matcher *NodeMatcher) *html.Node {
	return nil
}

// Table returns an HTML table which matches the given NodeMatcher
func Table(url string, matcher *NodeMatcher) (*html.Node, error) {
	resp, err := nethttp.Get(url)
	defer resp.Body.Close()


@@ 117,11 134,11 @@ func Table(url string, matcher *NodeMatcher) (*html.Node, error) {

	if tbd == nil {
		return nil, errors.New("did not find the table")
	} else {
		return tbd, nil
	}
	return tbd, nil
}

// TableRows sends to a channel the rows of the HTML table which matches the NodeMatcher
func TableRows(url string, rows chan<- *html.Node, tableMatcher *NodeMatcher) error {
	table, err := Table(url, tableMatcher)
	if err != nil {