~nilium/mtar

3730d69cb28122d69f73b00fec18df6f7b46ad38 — Noel Cower 6 years ago ffbe98b
Add per-file options, allow adding non-regular files

- Allow reading non-regular files so they can be added to an archive.
  Non-regular files include, primarily, named pipes and devices, as
  well as stdin as a special case (via '-' -- /dev/stdin doesn't get
  special treatment).

  This is supported mainly by buffering those files into memory, since
  the actual size of those files when stat'd is not necessarily useful
  (especially for pipes). I'd like to try to avoid buffering things
  into memory in the future, but my particular use case right now isn't
  harmed by this. It might be worse for people using it in ways I'm not
  expecting.

- File options are supported as a means of changing the properties of
  files as they're added to the archive. This allows you to synthesize
  some files, like directories and links, by doing, for example,
  '-:/etc:dir,owner=root,uid=0' or something along those lines.

  Currently, mtime, atime, and ctime can be modified using options, but
  if they're passed as integers, there's currently nothing to prevent
  integer overflow (and this isn't an error in Go, so it'll just
  silently overflow). This needs to be fixed, but also doesn't affect
  me right now.

Change-Id: I9aca08609c1df5c57c18db5d13894f8c9510fb9e
1 files changed, 349 insertions(+), 30 deletions(-)

M mtar.go
M mtar.go => mtar.go +349 -30
@@ 35,9 35,9 @@
//
//    Writes a tar file to standard output.
//
//    FILE may be a regular filepath for a file, symlink, or directory. If
//    FILE contains a ':', the text after the colon is the path to write to
//    the tar file. For example, the following paths behave differently:
//    FILE may be a filepath for a file, symlink, or directory. If FILE
//    contains a ':', the text after the colon is the path to write to the tar
//    file. For example, the following paths behave differently:
//
//      SRC
//          Add file SRC to the tar file as-is.


@@ 46,8 46,40 @@
//      SRC:DEST
//          Add file SRC as DEST to the tar file.
//
//    There is currently no SRC to accept standard input as a file (other
//    than, for example, using /dev/stdin).
//    To read a file from standard input, you can set '-' as the SRC. If no
//    DEST is given for this, it will default to dev/stdin (relative). File
//    permissions and ownership are taken from fd 1, so overriding them may be
//    necessary. If the link or dir option is set, - can be used to synthesize
//    a file entry.
//
//    In the case of SRC: and SRC:DEST, you can also pass an additional :OPTS
//    with either (i.e., SRC::OPTS or SRC:DEST:OPTS), where FLAGS are
//    comma-separated strings that set options. The following options are
//    available (all option names are case-sensitive):
//
//      norec
//        For directory entries, do not recursively add files from the
//        directory. This will cause only the directory itself to appear as
//        an entry.
//      dir
//        Force file to become a dir entry. Implies norec.
//      link=LINK
//        Force file to become a symlink pointing to LINK.
//      uid=UID | owner=USERNAME
//        Set the owner's uid and/or username for the file entry.
//      gid=GID | group=GROUPNAME
//        Set the gid and/or the group name for the file entry.
//      mode=MODE
//        Set the file mode to MODE (may be hex, octal, or an integer -- octal
//        must begin with a 0, hex with 0x).
//      mtime=TIME | atime=TIME | ctime=TIME
//        Sets the mod time, access time, or changed time to TIME. May be an
//        RFC3339 timestamp or an integer timestamp (since the Unix epoch) in
//        seconds, milliseconds (>=12 digits), or microseconds (>=15 digits).
//
//    Any whitespace preceding an option is trimmed. Whitespace is not trimmed
//    before or after the '=' symbol for options that take values. Commas are
//    not currently permitted inside options.
//
//    In addition, options may be passed in the middle of file arguments to
//    control archive creation:


@@ 73,6 105,9 @@ package main // import "go.spiff.io/mtar"

import (
	"archive/tar"
	"bytes"
	"errors"
	"fmt"
	"io"
	"log"
	"os"


@@ 83,6 118,7 @@ import (
	"strconv"
	"strings"
	"syscall"
	"time"
)

type Args struct{ args []string }


@@ 96,6 132,7 @@ func (m Matcher) matches(s string) bool {
	return m.rx.MatchString(s) == m.want
}

var startupTime = time.Now()
var startupDir string
var skipSrcGlobs []Matcher
var skipDestGlobs []Matcher


@@ 113,9 150,9 @@ func usage() {

Writes a tar file to standard output.

FILE may be a regular filepath for a file, symlink, or directory. If
FILE contains a ':', the text after the colon is the path to write to
the tar file. For example, the following paths behave differently:
FILE may be a filepath for a file, symlink, or directory. If FILE
contains a ':', the text after the colon is the path to write to the tar
file. For example, the following paths behave differently:

  SRC
      Add file SRC to the tar file as-is.


@@ 124,8 161,40 @@ the tar file. For example, the following paths behave differently:
  SRC:DEST
      Add file SRC as DEST to the tar file.

There is currently no SRC to accept standard input as a file (other
than, for example, using /dev/stdin).
To read a file from standard input, you can set '-' as the SRC. If no
DEST is given for this, it will default to dev/stdin (relative). File
permissions and ownership are taken from fd 1, so overriding them may be
necessary. If the link or dir option is set, - can be used to synthesize
a file entry.

In the case of SRC: and SRC:DEST, you can also pass an additional :OPTS
with either (i.e., SRC::OPTS or SRC:DEST:OPTS), where FLAGS are
comma-separated strings that set options. The following options are
available (all option names are case-sensitive):

  norec
    For directory entries, do not recursively add files from the
    directory. This will cause only the directory itself to appear as
    an entry.
  dir
    Force file to become a dir entry. Implies norec.
  link=LINK
    Force file to become a symlink pointing to LINK.
  uid=UID | owner=USERNAME
    Set the owner's uid and/or username for the file entry.
  gid=GID | group=GROUPNAME
    Set the gid and/or the group name for the file entry.
  mode=MODE
    Set the file mode to MODE (may be hex, octal, or an integer -- octal
    must begin with a 0, hex with 0x).
  mtime=TIME | atime=TIME | ctime=TIME
    Sets the mod time, access time, or changed time to TIME. May be an
    RFC3339 timestamp or an integer timestamp (since the Unix epoch) in
    seconds, milliseconds (>=12 digits), or microseconds (>=15 digits).

Any whitespace preceding an option is trimmed. Whitespace is not trimmed
before or after the '=' symbol for options that take values. Commas are
not currently permitted inside options.

In addition, options may be passed in the middle of file arguments to
control archive creation:


@@ 165,6 234,11 @@ func main() {
	w := tar.NewWriter(os.Stdout)
	defer func() { failOnError("error writing output", w.Close()) }()
	argv := Args{args: os.Args[1:]}

	if argv.args[0] == "--" {
		argv.Shift()
	}

	for s, ok := argv.Shift(); ok; s, ok = argv.Shift() {
		switch {
		// Filter flags


@@ 217,28 291,46 @@ func main() {
				src, dest = s[:idx], s[idx+1:]
			}

			addFile(w, src, dest, true)
			var opts *FileOpts
			if idx := strings.IndexByte(dest, ':'); idx > -1 {
				opts, err = parseFileOptions(dest[idx+1:])
				failOnError("cannot parse options for "+src, err)
				dest = dest[:idx]
			}

			addFile(w, src, dest, opts, true)
		}
	}
}

func addFile(w *tar.Writer, src, dest string, allowRecursive bool) {
func addFile(w *tar.Writer, src, dest string, opts *FileOpts, allowRecursive bool) {
	if shouldSkip(skipSrcGlobs, src) {
		return
	}

	st, err := os.Lstat(src)
	var needBuffer bool
	var st os.FileInfo
	var err error

	if src == "-" {
		if dest == "" {
			dest = "dev/stdin"
		}
		st, err = os.Stdin.Stat()
		needBuffer = true
	} else {
		st, err = os.Lstat(src)
	}

	failOnError("add file: stat error", err)
	if dest == "" {
		dest = src
		dest = filepath.ToSlash(src)
		if strings.HasPrefix(dest, "/") {
			dest = path.Clean("." + dest)
		}
	}

	dest = path.Clean(filepath.ToSlash(dest))
	if strings.HasPrefix(dest, "/") {
		dest = "." + dest
	} else if !strings.HasPrefix("./", dest) {
		dest = "./" + dest
	}

	if dest == ".." || strings.HasPrefix(dest, "../") {
		log.Fatal("add file: destination may not contain .. (", dest, ")")
	}


@@ 267,6 359,8 @@ func addFile(w *tar.Writer, src, dest string, allowRecursive bool) {
	switch {
	case st.Mode().IsRegular():
		hdr.Size = st.Size()
	case st.Mode()&(os.ModeCharDevice|os.ModeDevice|os.ModeNamedPipe) != 0:
		needBuffer = true
	case st.IsDir():
		hdr.Typeflag = tar.TypeDir
		hdr.Name = dest + "/"


@@ 277,7 371,7 @@ func addFile(w *tar.Writer, src, dest string, allowRecursive bool) {
		failOnError("cannot resolve symlink", err)
		hdr.Linkname = link
	default:
		log.Print("skipping file: ", src, ": cannot add file with mode ", st.Mode().Perm())
		log.Print("skipping file: ", src, ": cannot add file")
		return
	}



@@ 285,37 379,66 @@ func addFile(w *tar.Writer, src, dest string, allowRecursive bool) {
		return
	}

	opts.setHeaderFields(hdr)

	// Buffer input file if it's not a regular file
	var r io.Reader
	if needBuffer && hdr.Typeflag == tar.TypeReg {
		var file *os.File
		if src == "-" {
			file = os.Stdin
		} else {
			file, err = os.Open(src)
			failOnError("open error: "+src, err)
		}

		var buf bytes.Buffer
		_, err := io.Copy(&buf, file)
		failOnError("unable to buffer "+src, err)
		hdr.Size = int64(buf.Len())
		r = &buf

		if src != "-" {
			failOnError("unable to close "+src, file.Close())
		}
	}

	failOnError("write header: "+hdr.Name, w.WriteHeader(hdr))

	if st.Mode().IsDir() {
		if allowRecursive {
			addRecursive(w, src, dest)
		if allowRecursive && opts.allowRecursive() {
			addRecursive(w, src, dest, opts)
		}
		return
	}

	if !st.Mode().IsRegular() {
	if hdr.Typeflag != tar.TypeReg {
		return
	}

	file, err := os.Open(src)
	failOnError("read error: "+src, err)
	defer file.Close()
	n, err := io.Copy(w, file)
	if r == nil {
		file, err := os.Open(src)
		failOnError("read error: "+src, err)
		defer file.Close()
		r = file
	}
	n, err := io.Copy(w, r)
	failOnError("copy error: "+src, err)
	if n != hdr.Size {
		log.Fatalf("copy error: size mismatch for %s: wrote %d, want %d", src, n, hdr.Size)
	}

	failOnError("flush error: "+src, w.Flush())
}

func addRecursive(w *tar.Writer, src, prefix string) {
func addRecursive(w *tar.Writer, src, prefix string, opts *FileOpts) {
	src = strings.TrimRight(src, "/")
	filepath.Walk(src, func(p string, info os.FileInfo, err error) error {
		if filepath.Clean(p) == filepath.Clean(src) || shouldSkip(skipSrcGlobs, p) {
			return nil
		}
		dest := path.Join(prefix, strings.TrimPrefix(p, src))
		addFile(w, p, dest, false)
		addFile(w, p, dest, opts, false)
		return nil
	})
}


@@ 359,3 482,199 @@ func getUidGid(f os.FileInfo) (*user.User, *user.Group, bool) {
	}
	return u, g, true
}

type FileOpts struct {
	noRecursive bool

	// exclusive:
	dir  bool
	link string

	uid      *int
	username string

	gid   *int
	group string

	mode int64

	mtime time.Time
	atime time.Time
	ctime time.Time
}

func newFileOpts() *FileOpts {
	return &FileOpts{}
}

var timeLayouts = []string{
	time.RFC3339Nano,
	// TODO: add additional layouts if needed
}

func parseFileOptions(opts string) (*FileOpts, error) {
	fields := strings.FieldsFunc(opts, isComma)
	if len(fields) == 0 {
		return nil, nil
	}

	var err error
	fo := newFileOpts()
	for _, f := range fields {
		f = strings.TrimSpace(f)
		if f == "" {
			continue
		}
		switch {
		case f == "norec":
			fo.noRecursive = true
		case f == "dir":
			if fo.link != "" {
				return nil, fmt.Errorf("may not set dir with link=%s", fo.link)
			}
			fo.dir = true
			fo.noRecursive = true
		case strings.HasPrefix(f, "link="):
			if fo.dir {
				return nil, errors.New("may not set link with dir")
			}
			if fo.link = f[len("link="):]; fo.link == "" {
				return nil, errors.New("may not set an empty link name")
			}
		case strings.HasPrefix(f, "uid="):
			if uid, err := strconv.Atoi(f[len("uid="):]); err != nil {
				return nil, fmt.Errorf("invalid uid: %v", err)
			} else {
				fo.uid = &uid
			}
		case strings.HasPrefix(f, "gid="):
			if gid, err := strconv.Atoi(f[len("gid="):]); err != nil {
				return nil, fmt.Errorf("invalid gid: %v", err)
			} else {
				fo.gid = &gid
			}
		case strings.HasPrefix(f, "owner="):
			if fo.username = f[len("owner="):]; fo.username == "" {
				return nil, errors.New("may not set an empty username for an owner")
			}
		case strings.HasPrefix(f, "group="):
			if fo.group = f[len("group="):]; fo.group == "" {
				return nil, errors.New("may not set an empty group name")
			}
		case strings.HasPrefix(f, "mode="):
			if fo.mode, err = strconv.ParseInt(f[len("mode="):], 0, 64); err != nil {
				return nil, fmt.Errorf("invalid mode: %v", err)
			} else if fo.mode == 0 {
				return nil, errors.New("invalid mode: may not be 0")
			}
		case strings.HasPrefix(f, "mtime=") || strings.HasPrefix(f, "atime=") || strings.HasPrefix(f, "ctime="):
			var tp *time.Time
			switch f[0] {
			case 'm':
				tp = &fo.mtime
			case 'a':
				tp = &fo.atime
			case 'c':
				tp = &fo.ctime
			}
			*tp = time.Time{}

			ts := f[len("mtime="):]
			if ts == "now" {
				*tp = startupTime
				continue
			}

			for _, layout := range timeLayouts {
				var t time.Time
				if t, err = time.Parse(layout, ts); err == nil {
					*tp = t
					break
				}
			}

			// Integer timestamp
			if err != nil {
				var ti int64
				ti, err = strconv.ParseInt(ts, 10, 64)
				if err != nil {
					goto timeFailure
				}
				// TODO: handle integer overflow
				if len(ts) >= 15 { // microseconds
					dur := time.Duration(ti) * time.Microsecond
					*tp = time.Unix(int64(dur/time.Second), int64(dur%time.Second))
				} else if len(ts) >= 12 { // milliseconds
					dur := time.Duration(ti) * time.Millisecond
					*tp = time.Unix(int64(dur/time.Second), int64(dur%time.Second))
				} else { // seconds
					*tp = time.Unix(ti, 0)
				}
			}
		timeFailure:
			if tp.IsZero() {
				return nil, fmt.Errorf("invalid %s: %q", f[:len("mtime")], ts)
			}
		default:
			return nil, fmt.Errorf("unexpected option: %q", f)
		}
	}

	return fo, nil
}

func (f *FileOpts) allowRecursive() bool {
	return f == nil || !f.noRecursive
}

func (f *FileOpts) setHeaderFields(hdr *tar.Header) {
	if f == nil {
		return
	}

	if f.uid != nil {
		hdr.Uid = *f.uid
	}
	if f.gid != nil {
		hdr.Gid = *f.gid
	}

	if f.username != "" {
		hdr.Uname = f.username
	}
	if f.group != "" {
		hdr.Gname = f.group
	}

	if f.mode != 0 {
		hdr.Mode = f.mode
	}

	if f.dir {
		hdr.Typeflag = tar.TypeDir
		hdr.Linkname = ""
		hdr.Size = 0
		if !strings.HasSuffix(hdr.Name, "/") {
			hdr.Name += "/"
		}
	} else if f.link != "" {
		hdr.Linkname = f.link
		hdr.Typeflag = tar.TypeSymlink
	}

	if !f.mtime.IsZero() {
		hdr.ModTime = f.mtime
	}

	if !f.atime.IsZero() {
		hdr.AccessTime = f.atime
	}

	if !f.ctime.IsZero() {
		hdr.ChangeTime = f.ctime
	}
}

func isComma(r rune) bool {
	return r == ','
}