~gjabell/go-yt

d004dd119c0c6f952a686f584d6b79f9943ae5e3 — Galen Abell 3 years ago 0eb0390
Split main file into subfiles
3 files changed, 182 insertions(+), 174 deletions(-)

A extract.go
A parse.go
M yt.go
A extract.go => extract.go +54 -0
@@ 0,0 1,54 @@
package yt

import (
	"errors"
	"io"
	"strings"

	"golang.org/x/net/html"
)

func parseDataFromHtml(r io.Reader) (string, error) {
	n, err := html.Parse(r)
	if err != nil {
		return "", err
	}

	n = findNode(n)
	if n == nil {
		return "", errors.New("failed to parse YouTube data from HTML")
	}

	data := n.Data
	data = strings.TrimLeftFunc(data, func(r rune) bool {
		return r != '{'
	})
	data = strings.TrimRightFunc(data, func(r rune) bool {
		return r != '}'
	})

	return data, nil
}

func nodeMatches(n *html.Node) bool {
	if n == nil || n.Type != html.ElementNode || n.Data != "script" || n.FirstChild == nil {
		return false
	}

	return strings.HasPrefix(n.FirstChild.Data, "var ytInitialData")
}

func findNode(n *html.Node) *html.Node {
	if nodeMatches(n) {
		return n.FirstChild
	}

	for c := n.FirstChild; c != nil; c = c.NextSibling {
		result := findNode(c)
		if result != nil {
			return result
		}
	}

	return nil
}

A parse.go => parse.go +128 -0
@@ 0,0 1,128 @@
package yt

import (
	"encoding/json"
	"errors"
)

type wrapper struct {
	Contents struct {
		TwoColumnSearchResultsRenderer struct {
			PrimaryContents struct {
				SectionListRenderer struct {
					Contents []struct {
						ItemSectionRenderer struct {
							Contents []struct {
								VideoRenderer videoRenderer
							}
						}
					}
				}
			}
		}
	}
}

type videoRenderer struct {
	DescriptionSnippet struct {
		Runs []struct {
			Text string
		}
	}
	LengthText struct {
		SimpleText string
	}
	OwnerText struct {
		Runs []struct {
			NavigationEndpoint struct {
				BrowseEndpoint struct {
					BrowseID         string
					CanonicalBaseURL string
				}
			}
			Text string
		}
	}
	Title struct {
		Runs []struct {
			Text string
		}
	}
	Thumbnail struct {
		Thumbnails []Thumbnail
	}
	VideoID       string
	ViewCountText struct {
		SimpleText string
	}
}

func parseDataFromJson(data string) ([]SearchResult, error) {
	var w wrapper

	if err := json.Unmarshal([]byte(data), &w); err != nil {
		return nil, err
	}

	sectionListRendererContents := w.Contents.TwoColumnSearchResultsRenderer.
		PrimaryContents.SectionListRenderer.Contents
	if len(sectionListRendererContents) < 1 {
		return nil, errors.New("parse error: SectionListRenderer contents too short")
	}
	videoRenderers := sectionListRendererContents[0].ItemSectionRenderer.Contents

	var videoData []SearchResult
	for _, renderer := range videoRenderers {
		result := convertYoutubeData(renderer.VideoRenderer)
		if result != nil {
			videoData = append(videoData, *result)
		}
	}

	return videoData, nil
}

func convertYoutubeData(renderer videoRenderer) *SearchResult {
	// check for invalid data (video ID should always be non-empty)
	if renderer.VideoID == "" {
		return nil
	}

	description := ""
	if len(renderer.DescriptionSnippet.Runs) > 0 {
		description = renderer.DescriptionSnippet.Runs[0].Text
	}

	var channel Channel
	if len(renderer.OwnerText.Runs) > 0 {
		owner := renderer.OwnerText.Runs[0]
		channel = Channel{
			ID:           owner.NavigationEndpoint.BrowseEndpoint.BrowseID,
			Name:         owner.Text,
			CanonicalURL: owner.NavigationEndpoint.BrowseEndpoint.CanonicalBaseURL,
		}
	}

	title := ""
	if len(renderer.Title.Runs) > 0 {
		title = renderer.Title.Runs[0].Text
	}

	videoURL, err := makeURL(watchURL, map[string]string{
		"v": renderer.VideoID,
	})
	if err != nil {
		videoURL = ""
	}

	return &SearchResult{
		Description: description,
		Channel:     channel,
		ID:          renderer.VideoID,
		Length:      renderer.LengthText.SimpleText,
		Title:       title,
		Thumbnails:  renderer.Thumbnail.Thumbnails,
		ViewCount:   renderer.ViewCountText.SimpleText,
		VideoURL:    videoURL,
	}
}

M yt.go => yt.go +0 -174
@@ 1,15 1,8 @@
package yt

import (
	"encoding/json"
	"errors"
	"io"

	"net/http"
	"net/url"
	"strings"

	"golang.org/x/net/html"
)

// SearchResult contains metadata about a Youtube video.


@@ 69,51 62,6 @@ func Search(query string, page int) ([]SearchResult, error) {
	return parseDataFromJson(data)
}

func parseDataFromHtml(r io.Reader) (string, error) {
	n, err := html.Parse(r)
	if err != nil {
		return "", err
	}

	n = findNode(n)
	if n == nil {
		return "", errors.New("failed to parse YouTube data from HTML")
	}

	data := n.Data
	data = strings.TrimLeftFunc(data, func(r rune) bool {
		return r != '{'
	})
	data = strings.TrimRightFunc(data, func(r rune) bool {
		return r != '}'
	})

	return data, nil
}

func nodeMatches(n *html.Node) bool {
	if n == nil || n.Type != html.ElementNode || n.Data != "script" || n.FirstChild == nil {
		return false
	}

	return strings.HasPrefix(n.FirstChild.Data, "var ytInitialData")
}

func findNode(n *html.Node) *html.Node {
	if nodeMatches(n) {
		return n.FirstChild
	}

	for c := n.FirstChild; c != nil; c = c.NextSibling {
		result := findNode(c)
		if result != nil {
			return result
		}
	}

	return nil
}

func makeURL(path string, params map[string]string) (string, error) {
	parsedURL, err := url.Parse(path)
	if err != nil {


@@ 128,125 76,3 @@ func makeURL(path string, params map[string]string) (string, error) {
	parsedURL.RawQuery = query.Encode()
	return parsedURL.String(), nil
}

type wrapper struct {
	Contents struct {
		TwoColumnSearchResultsRenderer struct {
			PrimaryContents struct {
				SectionListRenderer struct {
					Contents []struct {
						ItemSectionRenderer struct {
							Contents []struct {
								VideoRenderer videoRenderer
							}
						}
					}
				}
			}
		}
	}
}

type videoRenderer struct {
	DescriptionSnippet struct {
		Runs []struct {
			Text string
		}
	}
	LengthText struct {
		SimpleText string
	}
	OwnerText struct {
		Runs []struct {
			NavigationEndpoint struct {
				BrowseEndpoint struct {
					BrowseID         string
					CanonicalBaseURL string
				}
			}
			Text string
		}
	}
	Title struct {
		Runs []struct {
			Text string
		}
	}
	Thumbnail struct {
		Thumbnails []Thumbnail
	}
	VideoID       string
	ViewCountText struct {
		SimpleText string
	}
}

func parseDataFromJson(data string) ([]SearchResult, error) {
	var w wrapper

	if err := json.Unmarshal([]byte(data), &w); err != nil {
		return nil, err
	}

	sectionListRendererContents := w.Contents.TwoColumnSearchResultsRenderer.
		PrimaryContents.SectionListRenderer.Contents
	if len(sectionListRendererContents) < 1 {
		return nil, errors.New("parse error: SectionListRenderer contents too short")
	}
	videoRenderers := sectionListRendererContents[0].ItemSectionRenderer.Contents

	var videoData []SearchResult
	for _, renderer := range videoRenderers {
		result := convertYoutubeData(renderer.VideoRenderer)
		if result != nil {
			videoData = append(videoData, *result)
		}
	}

	return videoData, nil
}

func convertYoutubeData(renderer videoRenderer) *SearchResult {
	// check for invalid data (video ID should always be non-empty)
	if renderer.VideoID == "" {
		return nil
	}

	description := ""
	if len(renderer.DescriptionSnippet.Runs) > 0 {
		description = renderer.DescriptionSnippet.Runs[0].Text
	}

	var channel Channel
	if len(renderer.OwnerText.Runs) > 0 {
		owner := renderer.OwnerText.Runs[0]
		channel = Channel{
			ID:           owner.NavigationEndpoint.BrowseEndpoint.BrowseID,
			Name:         owner.Text,
			CanonicalURL: owner.NavigationEndpoint.BrowseEndpoint.CanonicalBaseURL,
		}
	}

	title := ""
	if len(renderer.Title.Runs) > 0 {
		title = renderer.Title.Runs[0].Text
	}

	videoURL, err := makeURL(watchURL, map[string]string{
		"v": renderer.VideoID,
	})
	if err != nil {
		videoURL = ""
	}

	return &SearchResult{
		Description: description,
		Channel:     channel,
		ID:          renderer.VideoID,
		Length:      renderer.LengthText.SimpleText,
		Title:       title,
		Thumbnails:  renderer.Thumbnail.Thumbnails,
		ViewCount:   renderer.ViewCountText.SimpleText,
		VideoURL:    videoURL,
	}
}