~rbn/mhtml

53c6114cd783e30e893d4f05a306028e87f2bfca — Ruben Schuller 6 years ago master
initial commit
1 files changed, 118 insertions(+), 0 deletions(-)

A mhtml.go
A  => mhtml.go +118 -0
@@ 1,118 @@
/*Command mhtml lists and extracts the contents of mhtml files.*/
package main

import (
	"flag"
	"fmt"
	"io"
	"log"
	"mime"
	"mime/multipart"
	"net/mail"
	"net/url"
	"os"
	"path"
	"regexp"
	"strings"
)

var extractFilter = flag.String("e", ".*", "Only extract filenames matching this regular expression. See: https://github.com/google/re2/wiki/Syntax for the syntax used.")
var extractSubmatch = flag.Int("s", 0, "Use this submatch as path for output files.")
var extract = flag.Bool("x", false, "Extract the contents.")
var list = flag.Bool("l", false, "List the output filenames (after submatch processing).")
var verbose = flag.Bool("v", false, "Verbose output, handy for debugging.")

func open(filename string) (*os.File, error) {
	switch filename {
	case "":
		return os.Stdin, nil
	default:
		return os.Open(filename)
	}
}

func main() {
	flag.Parse()
	filename := flag.Arg(0)

	extractRe, err := regexp.Compile(*extractFilter)
	if err != nil {
		log.Fatal(err)
	}

	inFile, err := open(filename)
	if err != nil {
		log.Fatal(err)
	}
	defer inFile.Close()

	m, err := mail.ReadMessage(inFile)
	if err != nil {
		log.Fatal(err)
	}

	mediaType, params, err := mime.ParseMediaType(m.Header.Get("Content-Type"))
	if err != nil {
		log.Fatal(err)
	}

	if strings.HasPrefix(mediaType, "multipart/") {
		mr := multipart.NewReader(m.Body, params["boundary"])
		for {
			p, err := mr.NextPart()
			switch {
			case err == io.EOF:
				return
			case err != nil:
				log.Fatal(err)
			}

			contentUrl, err := url.Parse(p.Header.Get("Content-Location"))
			if err != nil {
				log.Fatal(err)
			}

			contentLocation := path.Join(contentUrl.Host, contentUrl.Path)

			// Test if we want this file.
			if !extractRe.MatchString(contentLocation) {
				if *verbose {
					log.Printf("\"%v\" doesn't match filter.\n", contentLocation)
				}
				continue
			}

			// Use only the submatch.
			matches := extractRe.FindStringSubmatch(contentLocation)
			if len(matches) < *extractSubmatch {
				if *verbose {
					log.Printf("not enough submatches: %#v\n", matches)
				}
				continue
			}
			contentLocation = matches[*extractSubmatch]

			switch {
			case *list:
				fmt.Println(contentLocation)
			case *extract:
				dir := path.Dir(contentLocation)

				err = os.MkdirAll(dir, 0700)
				if err != nil {
					log.Fatal(err)
				}

				outFile, err := os.Create(contentLocation)
				if err != nil {
					log.Fatal(err)
				}

				_, err = io.Copy(outFile, p)
				if err != nil {
					log.Fatal(err)
				}
			}
		}
	}
}