~mendelmaleh/csvgen

60320cafcad72e0436c2d12e0cb48254f0de127a — Mendel E 2 years ago 3de6c1b
Multi CSV support with deduplication
3 files changed, 111 insertions(+), 24 deletions(-)

M go.mod
M go.sum
M main.go
M go.mod => go.mod +4 -1
@@ 2,4 2,7 @@ module git.sr.ht/~mendelmaleh/csvgen

go 1.18

require github.com/dave/jennifer v1.5.0 // indirect
require (
	github.com/dave/jennifer v1.5.0
	github.com/mitchellh/hashstructure v1.1.0
)

M go.sum => go.sum +2 -0
@@ 7,6 7,8 @@ github.com/dave/jennifer v1.5.0/go.mod h1:4MnyiFIlZS3l5tSDn8VnzE6ffAhYBMB2SZntBs
github.com/dave/kerr v0.0.0-20170318121727-bc25dd6abe8e/go.mod h1:qZqlPyPvfsDJt+3wHJ1EvSXDuVjFTK0j2p/ca+gtsb8=
github.com/dave/patsy v0.0.0-20210517141501-957256f50cba/go.mod h1:qfR88CgEGLoiqDaE+xxDCi5QA5v4vUoW0UCX2Nd5Tlc=
github.com/dave/rebecca v0.9.1/go.mod h1:N6XYdMD/OKw3lkF3ywh8Z6wPGuwNFDNtWYEMFWEmXBA=
github.com/mitchellh/hashstructure v1.1.0 h1:P6P1hdjqAAknpY/M1CGipelZgp+4y9ja9kmUZPXP+H0=
github.com/mitchellh/hashstructure v1.1.0/go.mod h1:xUDAozZz0Wmdiufv0uyhnHkUTN6/6d8ulp4AwfLKrmA=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=

M main.go => main.go +105 -23
@@ 2,14 2,15 @@ package main

import (
	"bufio"
	"fmt"
	"go/token"
	"log"
	"os"
	"sort"
	"strings"
	"unicode"

	"github.com/dave/jennifer/jen"
	"github.com/mitchellh/hashstructure"
)

func identifier(id string) string {


@@ 24,43 25,124 @@ func identifier(id string) string {
	return strings.Join(words, "")
}

func main() {
	// TODO: help flag
func groupname(x []int) string {
	r := make([]rune, len(x))
	for i, v := range x {
		r[i] = 'A' + rune(v)
	}

	return string(r)
}

func main() {
	// headers
	scanner := bufio.NewScanner(os.Stdin)

	var header string
	if scanner.Scan() {
		header = scanner.Text()
	var headers [][]string
	for scanner.Scan() {
		headers = append(headers, strings.Split(scanner.Text(), ","))
	}

	if err := scanner.Err(); err != nil {
		log.Fatalf("error reading header from stdin: %s")
	}

	// TODO: sep flag, unquote fields
	fields := strings.Split(header, ",")
	// fields to header indexes
	fields := make(map[string][]int)

	/*
		fmt.Println(strings.Join(fields, "\n"))
		os.Exit(0)
	*/
	for i, header := range headers {
		for _, field := range header {
			fields[field] = append(fields[field], i)
		}
	}

	// TODO: struct name flag
	t := jen.Type().Id("Raw").StructFunc(func(g *jen.Group) {
		for _, v := range fields {
			id := identifier(v)
			if !token.IsIdentifier(id) {
				log.Fatalf("invalid identifier: %q", id)
			}
	// header index group
	var groups [][]int

			g.Id(id).String().Tag(map[string]string{"csv": v})
	// header index group hash to fields
	parts := make(map[uint64][]string)

	for k, v := range fields {
		hash, err := hashstructure.Hash(v, nil)
		if err != nil {
			log.Fatal(err)
		}

		if _, ok := parts[hash]; !ok {
			groups = append(groups, v)
		}

		parts[hash] = append(parts[hash], k)
	}

	// sort groups
	sort.Slice(groups, func(i, j int) bool {
		li := len(groups[i])
		lj := len(groups[j])

		if li == lj {
			for x := 0; x < li; x++ {
				xi := groups[i][x]
				xj := groups[j][x]

				if xi != xj {
					return xi < xj
				}
			}
		}

		// descending, biggest group (most common part) to least
		return li > lj
	})

	if err := t.Render(os.Stdout); err != nil {
		log.Fatal(err)
	// header index to header groups
	backrefs := make([][]int, len(headers))

	for i, group := range groups {
		if len(group) == 1 {
			break
		}

		for _, v := range group {
			backrefs[v] = append(backrefs[v], i)
		}
	}

	// output
	f := jen.NewFile("main")

	for _, v := range groups {
		hash, err := hashstructure.Hash(v, nil)
		if err != nil {
			log.Fatal(err)
		}

		sort.Strings(parts[hash])
		// spew.Dump(parts[hash])

		f.Type().Id(groupname(v)).StructFunc(func(g *jen.Group) {
			// common parts
			if len(v) == 1 {
				for _, v := range backrefs[v[0]] {
					g.Id(groupname(groups[v]))
				}
				g.Line()
			}

			// fields
			for _, v := range parts[hash] {
				id := identifier(v)
				if !token.IsIdentifier(id) {
					log.Fatalf("invalid identifier: %q", id)
				}

				g.Id(id).String().Tag(map[string]string{"csv": v})
			}
		}).Line()

	}

	fmt.Println()
	if err := f.Render(os.Stdout); err != nil {
		log.Fatal(err)
	}
}