~mna/siberian

603d3c1b8c8ae6f468cccd4899fae4bb6499bc3f — Martin Angers 4 years ago c518be4
implement ascii table for efficient lookup of allowed ascii chars
4 files changed, 147 insertions(+), 64 deletions(-)

M .golangci.toml
M ebnf.go
M ebnf_test.go
A go.mod
M .golangci.toml => .golangci.toml +0 -64
@@ 23,67 23,3 @@
    "varcheck",
  ]

[issues]
  # regexps of issue texts to exclude
  # NOTE: using this instead of [[issues.exclude-rules]] as SpaceVim respects those
  # exclusions, but does not respect the specific exclude-rules.
  exclude = [
    # cmd/*: package-level vars for binaries, assigned with linker flags
    "`version` is a global variable",
    "`buildDate` is a global variable",
    "`shortUsage` is a global variable",
    "`longUsage` is a global variable",
    "`commands` is a global variable",

    # pkg/codegen
    "`basicIdentsToGo` is a global variable",
    "`goKeywords` is a global variable",
    "`tokenToGoToken` is a global variable",
    "`testUpdateCodegenTests` is a global variable",

    # pkg/internal/filetest
    "`testUpdateAllTests` is a global variable",

    # pkg/parser
    "`declStart` is a global variable",
    "`stmtStart` is a global variable",
    "`testUpdateParserTests` is a global variable",

    # pkg/scanner
    "`PrintError` is a global variable",
    "`testUpdateScannerTests` is a global variable",

    # pkg/semantic
    "`basicKindSizes` is a global variable",
    "`selectorTypeContext` is a global variable",

    # pkg/token
    "`NewFileSet` is a global variable",
    "`tokens` is a global variable",
    "`keywords` is a global variable",
    "`operators` is a global variable",

    # pkg/typecheck
    "`basicKindStrings` is a global variable",
    "`binaryOpsTable` is a global variable",
    "`boolsCompare` is a global variable",
    "`floatsCompare` is a global variable",
    "`floatsWidening` is a global variable",
    "`identToBasicKind` is a global variable",
    "`intsCompare` is a global variable",
    "`intsWidening` is a global variable",
    "`nodeType` is a global variable",
    "`objectType` is a global variable",
    "`stringsCompare` is a global variable",
    "`stringsWidening` is a global variable",
    "`testUpdateCheckTests` is a global variable",
    "`testUpdateScopesTests` is a global variable",
    "`testUpdateStaticTests` is a global variable",
    "`testUpdateTypesTests` is a global variable",
    "`typeContextStrings` is a global variable",
    "`typeContextValues` is a global variable",
    "`typeType` is a global variable",
    "`unaryOpsTable` is a global variable",
    "`universeIdents` is a global variable",
  ]


M ebnf.go => ebnf.go +75 -0
@@ 1,15 1,90 @@
package siberian

import (
	"fmt"
	"strings"
	"unicode"
)

// asciiTable encodes the allowed ASCII values on 128 bits.
type asciiTable [2]uint64

func (t *asciiTable) set(chars ...byte) {
	for _, ch := range chars {
		if ch > unicode.MaxASCII {
			panic(fmt.Sprintf("%x is outside the ASCII range", ch))
		}
		t[ch/64] |= 1 << uint64(ch%64)
	}
}

func (t *asciiTable) unset(chars ...byte) {
	for _, ch := range chars {
		if ch > unicode.MaxASCII {
			panic(fmt.Sprintf("%x is outside the ASCII range", ch))
		}
		t[ch/64] &^= 1 << uint64(ch%64)
	}
}

// returns -1, true if all bytes in b are allowed by this ascii table,
// or the index of the first disallowed byte and false.
func (t *asciiTable) allowedBytes(b []byte) (n int, ok bool) {
	for i, ch := range b {
		if !t.allowed(ch) {
			return i, false
		}
	}
	return -1, true
}

func (t *asciiTable) allowed(ch byte) bool {
	if ch > unicode.MaxASCII {
		panic(fmt.Sprintf("%x is outside the ASCII range", ch))
	}
	return t[ch/64]&(1<<uint64(ch%64)) != 0
}

func (t asciiTable) GoString() string {
	return fmt.Sprintf("...6.........5.........4.........3.........2.........1.........0\n%064b\n.......2.........1.........0.........9.........8.........7......\n%064b", t[0], t[1])
}

func (t *asciiTable) printable(b byte) string {
	if b > 0x20 && b < 0x7f {
		return string(b)
	}
	return fmt.Sprintf("x%02x", b)
}

func (t *asciiTable) String() string {
	var buf strings.Builder
	buf.WriteByte('[')

	var last byte = 0xff
	for i := byte(0); i <= unicode.MaxASCII; i++ {
		if t.allowed(i) {
			if last == 0xff {
				last = i
			}
			continue
		}
		if last == 0xff {
			continue
		}
		if buf.Len() > 1 {
			buf.WriteByte(',')
		}
		if last == i-1 {
			buf.WriteString(t.printable(last))
		} else {
			buf.WriteString(t.printable(last))
			buf.WriteByte('-')
			buf.WriteString(t.printable(i - 1))
		}
		last = 0xff
	}
	buf.WriteByte(']')
	return buf.String()
}

type Grammar struct{}

M ebnf_test.go => ebnf_test.go +69 -0
@@ 1,11 1,80 @@
package siberian

import (
	"fmt"
	"strings"
	"testing"
	"unicode"
	"unsafe"
)

func TestASCIITable(t *testing.T) {
	cases := []struct {
		set   string
		unset string
		want  string
	}{
		{"", "", "[]"},
		{"a", "", "[a]"},
		{"a", "a", "[]"},
		{"ab", "", "[a-b]"},
		{"abcde", "", "[a-e]"},
		{"abcde", "d", "[a-c,e]"},
		{"", "a", "[]"},
		{"\x00\x01\x02\x03IBM", "\x03B", "[x00-x02,I,M]"},
	}
	for _, c := range cases {
		t.Run(fmt.Sprintf("set: %q, unset: %q", c.set, c.unset), func(t *testing.T) {
			var at asciiTable

			at.set([]byte(c.set)...)
			mustAllowASCII(t, &at, c.set, true)

			at.unset([]byte(c.unset)...)
			mustDisallowASCII(t, &at, c.unset, false)

			t.Logf("\n%#v\n", at)
			if got := at.String(); got != c.want {
				t.Fatalf("want\n%qgot\n%q\n", c.want, got)
			}
		})
	}
}

func mustAllowASCII(t *testing.T, at *asciiTable, allow string, strict bool) {
	t.Helper()
	mustASCII(t, at, allow, "", strict)
}

func mustDisallowASCII(t *testing.T, at *asciiTable, disallow string, strict bool) {
	t.Helper()
	mustASCII(t, at, "", disallow, strict)
}

func mustASCII(t *testing.T, at *asciiTable, allow, disallow string, strict bool) {
	for i := byte(0); i <= unicode.MaxASCII; i++ {
		if strings.IndexByte(allow, i) >= 0 {
			if !at.allowed(i) {
				t.Errorf("%x (%[1]d) should be allowed", i)
			}
		} else if strict && allow != "" {
			if at.allowed(i) {
				t.Errorf("%x (%[1]d) should not be allowed", i)
			}
		}

		if strings.IndexByte(disallow, i) >= 0 {
			if at.allowed(i) {
				t.Errorf("%x (%[1]d) should not be allowed", i)
			}
		} else if strict && disallow != "" {
			if !at.allowed(i) {
				t.Errorf("%x (%[1]d) should be allowed", i)
			}
		}
	}
}

func TestASCIITableSize(t *testing.T) {
	var b bool
	szBool := unsafe.Sizeof(b)

A go.mod => go.mod +3 -0
@@ 0,0 1,3 @@
module git.sr.ht/~mna/siberian

go 1.14