~hrodude/mvw-dl

ea0771d685eb74325526d1f1d88cb40bccea1239 — Georg Kaemmert 3 months ago
initial, working
4 files changed, 282 insertions(+), 0 deletions(-)

A README.md
A go.mod
A go.sum
A main.go
A  => README.md +45 -0
@@ 1,45 @@
# mediathekviewloader

makes use of https://mediathekviewweb.de to download videos

## usage

for usage with RSS-Feeds
```
mvwloader -f <feed>
```

for usage with a search term
```
mvwloader -s <search term>
```
available filters

| symbol | filter |
| ------------- |:--------:|
| !      | channel         |
| #      | topic           |
| +      | titel           |
| *      | description     |

use _-d_ flag to store to custom directory

```
mvwloader -s "!ARD #tatort" -d "./%C/%c/%t%e"
```

available replacements

| symbol  | meaning |
| ------------- |:-------------:|
| %t      | title               |
| %c      | category (e.g. "Alpha-Centauri") |
| %C      | creator (e.g. "BR") |
| %D      | publication Date    |
| %G      | GUID                |
| %T      | Text                |
| %e      | Extension (e.g. ".mp4") |

## idempotent
Once a video has been downloaded the GUID of that video is stored inside
```./<directory>/.guid``` so it will only be downloaded once.

A  => go.mod +12 -0
@@ 1,12 @@
module mediathekviewloader

go 1.19

require (
	github.com/mattn/go-runewidth v0.0.14 // indirect
	github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
	github.com/rivo/uniseg v0.4.2 // indirect
	github.com/schollz/progressbar/v3 v3.11.0 // indirect
	golang.org/x/sys v0.1.0 // indirect
	golang.org/x/term v0.1.0 // indirect
)

A  => go.sum +25 -0
@@ 1,25 @@
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.2 h1:YwD0ulJSJytLpiaWua0sBDusfsCZohxjxzVTYjwxfV8=
github.com/rivo/uniseg v0.4.2/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/schollz/progressbar/v3 v3.11.0 h1:3nIBUF1Zw/pGUaRHP7PZWmARP7ZQbWQ6vL6hwoQiIvU=
github.com/schollz/progressbar/v3 v3.11.0/go.mod h1:R2djRgv58sn00AGysc4fN0ip4piOGd3z88K+zVBjczs=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220829200755-d48e67d00261/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20220722155259-a9ba230a4035/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.1.0 h1:g6Z6vPFA9dYBAF7DWcH6sCcOntplXsDKcliusYijMlw=
golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=

A  => main.go +200 -0
@@ 1,200 @@
package main

import (
	"log"
	"fmt"
	"flag"
	"encoding/xml"
	"io/ioutil"
	"io"
	"net/http"
	"net/url"
	"path/filepath"
	"strings"
	"os"
	"bufio"
	"github.com/schollz/progressbar/v3"
)

type Item struct {
	Text        string `xml:",chardata"`
	Title       string `xml:"title"`
	Description string `xml:"description"`
	Link        string `xml:"link"`
	Guid        struct {
		Text        string `xml:",chardata"`
		IsPermaLink string `xml:"isPermaLink,attr"`
	} `xml:"guid"`
	Category  string `xml:"category"`
	Creator   string `xml:"creator"`
	PubDate   string `xml:"pubDate"`
	Enclosure struct {
		Text   string `xml:",chardata"`
		URL    string `xml:"url,attr"`
		Length string `xml:"length,attr"`
		Type   string `xml:"type,attr"`
	} `xml:"enclosure"`
	Duration   string `xml:"duration"`
	WebsiteUrl string `xml:"websiteUrl"`
}

type Rss struct {
	XMLName xml.Name `xml:"rss"`
	Text    string   `xml:",chardata"`
	Dc      string   `xml:"dc,attr"`
	Content string   `xml:"content,attr"`
	Atom    string   `xml:"atom,attr"`
	Version string   `xml:"version,attr"`
	Channel struct {
		Text        string `xml:",chardata"`
		Title       string `xml:"title"`
		Description string `xml:"description"`
		Link        struct {
			Text string `xml:",chardata"`
			Href string `xml:"href,attr"`
			Rel  string `xml:"rel,attr"`
			Type string `xml:"type,attr"`
		} `xml:"link"`
		Generator     string `xml:"generator"`
		LastBuildDate string `xml:"lastBuildDate"`
		Ttl           string `xml:"ttl"`
		Item          []*Item `xml:"item"`
	} `xml:"channel"`
}

func openXMLFromURL(URL string) (*Rss, error) {
	resp, err := http.Get(URL)
	if err != nil {
		return &Rss{}, fmt.Errorf("GET error: %v", err)
	}
	defer resp.Body.Close()
	if resp.StatusCode != http.StatusOK {
		return &Rss{}, fmt.Errorf("HTTP-Status error: %v", err)
	}
	xmlBytes, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		return &Rss{}, fmt.Errorf("Read body: %v", err)
	}
	var result *Rss
	xml.Unmarshal(xmlBytes, &result)
	return result, nil
}

func dirExists(dir string) bool {
        _, err := os.Stat(dir)
        return !os.IsNotExist(err)
}

func mkdir(dir string) error {
	var err error
        if !dirExists(dir) {
                err := os.MkdirAll(dir, 0750)
                if err != nil {
                        log.Fatal(err)
                }
        }
        return err
}

func getItemDir(dir string, item *Item) string {
	translate := map[string]string{}
	translate["%t"] = item.Title
	translate["%c"] = item.Category
	translate["%C"] = item.Creator
	translate["%D"] = item.PubDate
	translate["%G"] = item.Guid.Text
	translate["%T"] = item.Text
	translate["%e"] = filepath.Ext(item.Link)
	for k, v := range translate {
		dir = strings.Replace(dir, k, v, -1)
	}
	return dir
}

func itemExists(item *Item, directory string) (bool) {
	readFile, err := os.Open(filepath.Join(directory,".guid"))
	if os.IsNotExist(err) {
		return false
	}
	if err != nil {
		log.Println(err)
	}
	fileScanner := bufio.NewScanner(readFile)
	fileScanner.Split(bufio.ScanLines)
	for fileScanner.Scan() {
		if fileScanner.Text() == item.Guid.Text {
			return true
		}
	}
	readFile.Close()
	return false
}

func processItem(item *Item, directory string) {
	itemDirFull := getItemDir(directory, item)
	itemDirectory := filepath.Dir(itemDirFull)
	itemFile := filepath.Base(itemDirFull)

	err := mkdir(itemDirectory)
	if err != nil {
		fmt.Println(err)
	}
	if itemExists(item, itemDirectory) {
		fmt.Printf("Vorhanden, überspringen: %s, %s, %s\n", item.Creator, item. Category, item.Title)
		return
	}
	//download
	req, _ := http.NewRequest("GET", item.Link, nil)
	resp, _ := http.DefaultClient.Do(req)
	defer resp.Body.Close()

	f, _ := os.OpenFile(filepath.Join(itemDirectory, itemFile), os.O_CREATE|os.O_WRONLY, 0644)
	defer f.Close()
	fmt.Printf("Downloading %s", item.Title)
	bar := progressbar.DefaultBytes(resp.ContentLength, fmt.Sprintf("%s, %s, %s", item.Creator, item. Category, item.Title),)
	io.Copy(io.MultiWriter(f, bar), resp.Body)

	//remember guid
	f, e := os.OpenFile(filepath.Join(itemDirectory,".guid"), os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0600)
	if e != nil {
    		panic(e)
	}
	defer f.Close()
	if _, err = f.WriteString(item.Guid.Text+"\n"); err != nil {
		panic(err)
	}

	//DONE
}

func processFeed(adress, outputDirectory string) {
	feed, err := openXMLFromURL(adress)
	if err != nil {
		fmt.Println(err)
	}
	for _, item := range feed.Channel.Item {
		processItem(item, outputDirectory)
	}
}

func getSearchQuery(searchFlag *string) string {
	ret := "https://mediathekviewweb.de/feed?query="
	ret += url.QueryEscape(*searchFlag)
	return ret
}

func main() {

	searchFlag := flag.String("s","","!Sender #Thema +Titel *Beschreibung")
	feedFlag := flag.String("f","","URI to feed")
	directoryFlag := flag.String("d","./%C/%c/%t%e","directory to safe content")
	flag.Parse()
	if len(*feedFlag) > 0 {
		processFeed(*feedFlag, *directoryFlag)
	} else if len(*searchFlag) > 0 {
		processFeed(getSearchQuery(searchFlag), *directoryFlag)
	} else {
		fmt.Println("kein Feed übergeben")
	}

}