~mendelmaleh/download

d367236e0766f5511a5845f6fd923c6742c0c8e3 — Mendel E 1 year, 10 days ago
Initial commit
3 files changed, 180 insertions(+), 0 deletions(-)

A download.go
A tar.go
A utils.go
A  => download.go +49 -0
@@ 1,49 @@
package download

import (
	"net/http"
	"sync"
)

// A type that satisfies download.Interface can be downloaded with download.Tar etc.
type Interface interface {
	URLs(sep string) []File
}

// File to download. URL is where to get the data, path is where to store it.
type File struct {
	Path, URL string
}

// DL is the downloader.
type DL struct {
	Client *http.Client
	Write  sync.Mutex
}

// Options for the downloader.
type Options struct {
	// Separator to use between files, eg you can use "_" for a flat
	// file structure, or leave the default "/" to have dirs.
	Separator string

	// Max attempts trying to download a file
	MaxAttempts int

	// Concurrent mode
	Concurrent bool
	// Max number of gorountines to spawn
	MaxGoroutines int
}

// Default Options
var DefaultOptions = &Options{
	Separator:     "/",
	MaxAttempts:   3,
	MaxGoroutines: 64,
}

// New DL instance.
func New() *DL {
	return &DL{Client: http.DefaultClient}
}

A  => tar.go +88 -0
@@ 1,88 @@
package download

import (
	"archive/tar"
	"errors"
	"io"
	"os"
	"sync"
)

var (
	ErrNoData       = errors.New("nothing to download")
	ErrNoGoroutines = errors.New("MaxGoroutines should be positive")
)

// Tar downloads data to a tarball at file.
func (dl *DL) Tar(data Interface, file *os.File, opt *Options) error {
	// set default options
	if opt == nil {
		opt = DefaultOptions
	}

	// ensure opt.MaxGoroutines is positive, otherwise it will hang
	// forever in concurrent mode.
	if opt.Concurrent && opt.MaxGoroutines <= 0 {
		return ErrNoGoroutines
	}

	// get files
	files := data.URLs(opt.Separator)
	if len(files) == 0 {
		return ErrNoData
	}

	// prep tar
	archive := tar.NewWriter(file)
	defer archive.Close()

	// download not concurrent
	if !opt.Concurrent {
		for _, f := range files {
			dl.tar(f, archive, opt)
		}

		return nil
	}

	// download concurrent
	var wg sync.WaitGroup
	defer wg.Wait()

	mg := make(chan bool, opt.MaxGoroutines)

	for _, f := range files {
		mg <- true
		wg.Add(1)

		go func(f File) {
			defer wg.Done()
			defer func() { <-mg }()

			dl.tar(f, archive, opt)
		}(f)
	}

	return nil
}

func (dl *DL) tar(f File, a *tar.Writer, opt *Options) {
	buf, err := dl.try(f.URL, opt.MaxAttempts)
	if err != nil {
		panic(err)
	}

	dl.Write.Lock()
	defer dl.Write.Unlock()

	a.WriteHeader(&tar.Header{
		Name: f.Path,
		Mode: 0600,
		Size: int64(buf.Len()),
	})

	_, err = io.Copy(a, buf)
	if err != nil {
		panic(err)
	}
}

A  => utils.go +43 -0
@@ 1,43 @@
package download

import (
	"bytes"
	"errors"
	"io"
	"net"
	"time"
)

func (dl *DL) try(url string, attempts int) (buf *bytes.Buffer, err error) {
	// if attempts <= 0, return an empty buffer
	if attempts <= 0 {
		buf = &bytes.Buffer{}
		return
	}

	for i := 0; i < attempts; i++ {
		buf, err = dl.dl(url)
		if err == nil {
			return
		}

		var e *net.DNSError
		if errors.As(err, &e) && e.Temporary() {
			time.Sleep(10 * time.Millisecond)
		}
	}

	return
}

func (dl *DL) dl(url string) (buf *bytes.Buffer, err error) {
	resp, err := dl.Client.Get(url)
	if err != nil {
		return
	}
	defer resp.Body.Close()

	buf = &bytes.Buffer{}
	_, err = io.Copy(buf, resp.Body)
	return
}