~mna/runes

477d801d40f4183ec3ca23a836de27cd65fd99e0 — Martin Angers 1 year, 2 months ago d10eed9
simplify arg processing
5 files changed, 70 insertions(+), 221 deletions(-)

A .gitignore
M main.go
M print.go
D table.go
D table_test.go
A .gitignore => .gitignore +5 -0
@@ 0,0 1,5 @@
# output files
*.out

# binary
/runes

M main.go => main.go +59 -63
@@ 7,7 7,6 @@ import (
	"flag"
	"fmt"
	"os"
	"sort"
	"strconv"
	"strings"
)


@@ 34,88 33,78 @@ func main() {
	}

	args := flag.Args()

	var rs []rune
	for _, arg := range args {
	lastIx := -1
	for i, arg := range args {
		if len(arg) == 0 {
			continue
		}
		if arg == "-" {
			lastIx = i
			break
		}

		switch p0 := arg[0]; p0 {
		case 'u', 'U', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
			// it is either a number or a range
			parts := []string{arg}
			if rangeIx := strings.Index(arg, "-"); rangeIx >= 0 {
				parts = []string{arg[:rangeIx], arg[rangeIx+1:]}
		var nums []int
		parts := strings.Split(arg, "-")
		if len(parts) > 2 {
			fmt.Fprintf(os.Stderr, "invalid rune argument: too many parts in range %s\n", arg)
			os.Exit(1)
		}
		for _, part := range parts {
			if strings.HasPrefix(part, "u+") || strings.HasPrefix(part, "U+") {
				part = "0x" + part[2:]
			}
			n, err := strconv.ParseUint(part, 0, 32)
			if err != nil {
				fmt.Fprintf(os.Stderr, "invalid rune argument: %s\n", arg)
				os.Exit(1)
			}
			nums = append(nums, int(n))
		}

		default:
			rs = append(rs, runesSet(arg)...)
		if len(nums) == 1 {
			rs = append(rs, rune(nums[0]))
			continue
		}
		rs = append(rs, runesInRange(nums[0], nums[1])...)
	}

	// if there are remaining arguments, treat them as strings to print the
	// runes of.
	args = args[lastIx+1:]
	for _, arg := range args {
		rs = append(rs, runesSet(arg)...)
	}

	if err := p.printStart(os.Stdout); err != nil {
		fmt.Fprintln(os.Stderr, err)
		os.Exit(1)
	}

	if err := printRunes(p, rs); err != nil {
		fmt.Fprintln(os.Stderr, err)
		os.Exit(1)
	}
	if err := p.printEnd(); err != nil {
		fmt.Fprintln(os.Stderr, err)
		os.Exit(1)
	}
}

// decode a command-line argument into a list of runes to print,
// and return true as second value if this is a range in the form
// <start>-<end> (inclusive).
func decode(arg string) (runes []rune, isRange bool, err error) {
	if len(arg) == 0 {
		return nil, false, nil
	}

	p0 := arg[0]
	base := 10
	start := 0
	switch p0 {
	case 'u', 'U':
		if len(arg) == 1 || arg[1] != '+' {
			return runesSet(arg), false, nil
		}
		base = 16
		start = 2 // skip u+
	case '0':
		if len(arg) > 1 && arg[1] == 'x' || arg[1] == 'X' {
			base = 16
			start = 2 // skip 0x
			break
		}
	case '1', '2', '3', '4', '5', '6', '7', '8', '9':
		// ok, decimal number
	default:
		return runesSet(arg), false, nil
func runesInRange(start, end int) []rune {
	rs := make([]rune, 0, end-start+1)
	for i := start; i <= end; i++ {
		rs = append(rs, rune(i))
	}

	num, err := strconv.ParseUint(arg[start:], base, 32)
	if err != nil {
		return nil, false, err
	}
	runes = append(runes, rune(num))
	return runes, false, nil
	return rs
}

// returns a slice of runes where each distinct rune in arg is returned.
func runesSet(arg string) []rune {
	m := make(map[rune]bool)
	for _, r := range arg {
		m[r] = true
	rs := make([]rune, len(arg))
	for i, r := range arg {
		rs[i] = r
	}
	rs := make([]rune, 0, len(m))
	for k := range m {
		rs = append(rs, k)
	}
	sort.Slice(rs, func(l, r int) bool {
		lr, rr := rs[l], rs[r]
		return lr < rr
	})
	return rs
}



@@ 126,16 115,23 @@ func usage() {

func help() {
	const msg = `
The runes command prints information about Unicode code points. Without argument,
all code points are printed; specific code points can be requested as arguments,
and ranges of code points are supported. Code points starting with '0x' or 'u+'
are considered in hexadecimal (the 'x' and 'u' are case insensitive), otherwise
the number is processed as decimal.
The runes command prints information about Unicode code points. Without
argument, all code points are printed; specific code points can be requested as
arguments, and ranges of code points are supported (e.g. 0x17-0x60). Code
points starting with '0x' or 'u+' are considered in hexadecimal (the 'x' and
'u' are case insensitive), otherwise the number is processed as decimal.

A single dash '-' can be used so that subsequent arguments are treated as
strings for which each rune will be printed.

The output follows the order of runes as specified on the command-line,
the same rune will be printed multiple times if it is specified or included
in multiple arguments.

Examples:
    runes
    runes 0x2318 40-60
    runes u+1f970
    runes u+1f970 0X55-0XA0 - "Some string"
`
	usage()
	fmt.Println(msg)

M print.go => print.go +6 -14
@@ 64,27 64,19 @@ func info(r rune) runeInfo {
}

// print a single rune.
func printRune(p printer, r rune) {
func printRune(p printer, r rune) error {
	ri := info(r)
	p.printRune(ri)
	return p.printRune(ri)
}

// print an explicit list of runes.
func printRunes(p printer, rs []rune) {
func printRunes(p printer, rs []rune) error {
	for _, r := range rs {
		printRune(p, r)
	}
}

// print a range of runes, ignoring invalid ones.
func printRange(p printer, start, end rune) {
	for i := start; i <= end; i++ {
		// for ranges, ignore invalid utf8 runes
		if !utf8.ValidRune(i) {
			continue
		if err := printRune(p, r); err != nil {
			return err
		}
		printRune(p, i)
	}
	return nil
}

type textPrinter struct {

D table.go => table.go +0 -104
@@ 1,104 0,0 @@
package main

import (
	"fmt"
	"strings"
	"unicode"
)

// table encodes the allowed Unicode code points on 17_408 uint64s (for 1_114_112 bits).
type table [17408]uint64

// set the runes.
func (t *table) set(rs ...rune) {
	for _, r := range rs {
		if r > unicode.MaxRune {
			panic(fmt.Sprintf("%#U is outside the Unicode range", r))
		}
		t[r/64] |= 1 << uint64(r%64)
	}
}

// unset the runes.
func (t *table) unset(rs ...rune) {
	for _, r := range rs {
		if r > unicode.MaxRune {
			panic(fmt.Sprintf("%#U is outside the Unicode range", r))
		}
		t[r/64] &^= 1 << uint64(r%64)
	}
}

// setRange sets all runes in [from, to] inclusively.
func (t *table) setRange(from, to rune) {
	t.setUnsetRange(from, to, true)
}

// unsetRange unsets all runes in [from, to] inclusively.
func (t *table) unsetRange(from, to rune) {
	t.setUnsetRange(from, to, false)
}

func (t *table) setUnsetRange(from, to rune, set bool) {
	if from > to {
		panic(fmt.Sprintf("from rune %#U is greater than to rune %#U", from, to))
	}
	if to > unicode.MaxRune {
		panic(fmt.Sprintf("%#U is outside the Unicode range", to))
	}
	rng := make([]rune, to-from+1)
	for i := from; i <= to; i++ {
		rng[i-from] = i
	}
	if set {
		t.set(rng...)
	} else {
		t.unset(rng...)
	}
}

// is returns true if r is set.
func (t *table) is(r rune) bool {
	if r > unicode.MaxRune {
		return false
	}
	return t[r/64]&(1<<uint64(r%64)) != 0
}

// String returns the string representation of the Unicode table.
func (t *table) String() string {
	var buf strings.Builder
	buf.WriteByte('[')

	var last rune = -1
	writeFromLastTo := func(end rune) {
		if buf.Len() > 1 {
			buf.WriteByte(',')
		}
		if last == end {
			fmt.Fprintf(&buf, "%#U", last)
		} else {
			fmt.Fprintf(&buf, "%#U-%#U", last, end)
		}
	}

	for i := rune(0); i <= unicode.MaxRune; i++ {
		if t.is(i) {
			if last == -1 {
				last = i
			}
			continue
		}
		if last == -1 {
			continue
		}
		writeFromLastTo(i - 1)
		last = -1
	}
	if last != -1 {
		writeFromLastTo(unicode.MaxRune)
	}

	buf.WriteByte(']')
	return buf.String()
}

D table_test.go => table_test.go +0 -40
@@ 1,40 0,0 @@
package main

import (
	"testing"
	"unicode"
)

func TestTable(t *testing.T) {
	var tbl table

	tbl.set([]rune("abcd")...)
	got := tbl.String()
	want := "[U+0061 'a'-U+0064 'd']"
	if got != want {
		t.Fatalf("want %s, got %s", want, got)
	}

	tbl.setRange('A', 'Z')
	tbl.unsetRange('M', 'Q')
	got = tbl.String()
	want = "[U+0041 'A'-U+004C 'L',U+0052 'R'-U+005A 'Z',U+0061 'a'-U+0064 'd']"
	if got != want {
		t.Fatalf("want %s, got %s", want, got)
	}

	tbl.setRange('╒', '╟')
	tbl.unsetRange('A', 'z')
	got = tbl.String()
	want = "[U+2552 '╒'-U+255F '╟']"
	if got != want {
		t.Fatalf("want %s, got %s", want, got)
	}

	tbl.set(unicode.MaxRune)
	got = tbl.String()
	want = "[U+2552 '╒'-U+255F '╟',U+10FFFF]"
	if got != want {
		t.Fatalf("want %s, got %s", want, got)
	}
}