~shabbyrobe/go-porter2

bf9adefac9710d8b26a5e4467fe88547beb46af7 — shabbyrobe 4 years ago a84fbea
Remove bounds checks from suffix functions

Good for another 5%:
    benchmark                old ns/op     new ns/op     delta
    BenchmarkStemBytes-8     145           137           -5.52%
    BenchmarkStem-8          296           289           -2.36%
3 files changed, 94 insertions(+), 94 deletions(-)

M porter2.go
M suffix.go
M tree.go
M porter2.go => porter2.go +12 -18
@@ 70,29 70,23 @@ func StemBytes(word []byte, flag StemFlag) []byte {
	s = removeSuffix_apos(s)

	// Step 1a
	if i := suffixPos_sses(s); i != -1 {
	if i := suffixPos_sses(s); i >= 0 {
		// sses, replace by ss
		s = s[:i+2]
		goto step1b
	}
	{
		i := suffixPos_ied(s)
		if i == -1 {
			i = suffixPos_ies(s)
		}
		if i != -1 {
			// ied+   ies*
			// replace by i if preceded by more than one letter,
			// otherwise by ie (so ties -> tie, cries -> cri)
			if i > 1 {
				s = s[:i+1] // equivalent: append(s[:i], 'i')
			} else {
				s = s[:i+2] // equivalent: append(s[:i], 'i', 'e')
			}
			goto step1b
	if i := suffixPos_ied_ies(s); i >= 0 {
		// ied+   ies*
		// replace by i if preceded by more than one letter,
		// otherwise by ie (so ties -> tie, cries -> cri)
		if i > 1 {
			s = s[:i+1] // equivalent: append(s[:i], 'i')
		} else {
			s = s[:i+2] // equivalent: append(s[:i], 'i', 'e')
		}
		goto step1b
	}
	if suffixPos_us(s) != -1 || suffixPos_ss(s) != -1 {
	if suffixHas_ss_us(s) {
		// do nothing
		goto step1b
	}


@@ 130,7 124,7 @@ step1b:
		} else {
			goto step1c
		}
		if suffixPos_at(s) != -1 || suffixPos_bl(s) != -1 || suffixPos_iz(s) != -1 {
		if suffixHas_at_bl_iz(s) {
			s = append(s, 'e')
			goto step1c
		}

M suffix.go => suffix.go +77 -71
@@ 2,122 2,128 @@ package porter2

// FIXME: IsInBounds abounds

func suffixPos_s(s []byte) int {
	l := len(s)
	if l >= 1 && s[l-1] == 's' {
		return len(s) - 1
func suffixHas_at_bl_iz(s []byte) bool {
	off := len(s) - 2
	if off < 0 {
		return false
	}
	return -1
	s = s[off:]
	_ = s[1]
	return (s[0] == 'a' && s[1] == 't') ||
		(s[0] == 'b' && s[1] == 'l') ||
		(s[0] == 'i' && s[1] == 'z')
}

func suffixPos_at(s []byte) int {
	l := len(s)
	if l >= 2 && s[l-2] == 'a' && s[l-1] == 't' {
		return len(s) - 2
	}
	return -1
}

func suffixPos_bl(s []byte) int {
	l := len(s)
	if l >= 2 && s[l-2] == 'b' && s[l-1] == 'l' {
		return len(s) - 2
func suffixHas_ss_us(s []byte) bool {
	off := len(s) - 2
	if off < 0 {
		return false
	}
	return -1
	s = s[off:]
	_ = s[1]
	return s[1] == 's' && (s[0] == 'u' || s[0] == 's')
}

func suffixPos_iz(s []byte) int {
	l := len(s)
	if l >= 2 && s[l-2] == 'i' && s[l-1] == 'z' {
		return len(s) - 2
func suffixPos_s(s []byte) int {
	last := len(s) - 1
	if last >= 0 && s[last] == 's' {
		return last
	}
	return -1
}

func suffixPos_li(s []byte) int {
	l := len(s)
	if l >= 2 && s[l-2] == 'l' && s[l-1] == 'i' {
		return len(s) - 2
	}
	return -1
}

func suffixPos_ss(s []byte) int {
	l := len(s)
	if l >= 2 && s[l-2] == 's' && s[l-1] == 's' {
		return len(s) - 2
	}
	return -1
}

func suffixPos_us(s []byte) int {
	l := len(s)
	if l >= 2 && s[l-2] == 'u' && s[l-1] == 's' {
		return len(s) - 2
	off := len(s) - 2
	if off >= 0 {
		s = s[off:]
		_ = s[1]
		if s[0] == 'l' && s[1] == 'i' {
			return off
		}
	}
	return -1
}

func suffixPos_eed(s []byte) int {
	l := len(s)
	if l >= 3 && s[l-3] == 'e' && s[l-2] == 'e' && s[l-1] == 'd' {
		return len(s) - 3
	off := len(s) - 3
	if off >= 0 {
		s = s[off:]
		_ = s[2]
		if s[0] == 'e' && s[1] == 'e' && s[2] == 'd' {
			return off
		}
	}
	return -1
}

func suffixPos_ied(s []byte) int {
	l := len(s)
	if l >= 3 && s[l-3] == 'i' && s[l-2] == 'e' && s[l-1] == 'd' {
		return len(s) - 3
	}
	return -1
}

func suffixPos_ies(s []byte) int {
	l := len(s)
	if l >= 3 && s[l-3] == 'i' && s[l-2] == 'e' && s[l-1] == 's' {
		return len(s) - 3
func suffixPos_ied_ies(s []byte) int {
	off := len(s) - 3
	if off >= 0 {
		s = s[off:]
		_ = s[2]
		if s[0] == 'i' && s[1] == 'e' && (s[2] == 'd' || s[2] == 's') {
			return off
		}
	}
	return -1
}

func suffixPos_ion(s []byte) int {
	l := len(s)
	if l >= 3 && s[l-3] == 'i' && s[l-2] == 'o' && s[l-1] == 'n' {
		return len(s) - 3
	off := len(s) - 3
	if off >= 0 {
		s = s[off:]
		_ = s[2]
		if s[0] == 'i' && s[1] == 'o' && s[2] == 'n' {
			return off
		}
	}
	return -1
}

func suffixPos_ogi(s []byte) int {
	l := len(s)
	if l >= 3 && s[l-3] == 'o' && s[l-2] == 'g' && s[l-1] == 'i' {
		return len(s) - 3
	off := len(s) - 3
	if off >= 0 {
		s = s[off:]
		_ = s[2]
		if s[0] == 'o' && s[1] == 'g' && s[2] == 'i' {
			return off
		}
	}
	return -1
}

func suffixPos_sses(s []byte) int {
	l := len(s)
	if l >= 4 && s[l-4] == 's' && s[l-3] == 's' && s[l-2] == 'e' && s[l-1] == 's' {
		return len(s) - 4
	off := len(s) - 4
	if off >= 0 {
		s = s[off:]
		_ = s[3]
		if s[0] == 's' && s[1] == 's' && s[2] == 'e' && s[3] == 's' {
			return off
		}
	}
	return -1
}

func suffixPos_ative(s []byte) int {
	l := len(s)
	if l >= 5 && s[l-5] == 'a' && s[l-4] == 't' && s[l-3] == 'i' && s[l-2] == 'v' && s[l-1] == 'e' {
		return len(s) - 5
	off := len(s) - 5
	if off >= 0 {
		s = s[off:]
		_ = s[4]
		if s[0] == 'a' && s[1] == 't' && s[2] == 'i' && s[3] == 'v' && s[4] == 'e' {
			return off
		}
	}
	return -1
}

func suffixPos_eedly(s []byte) int {
	l := len(s)
	if l >= 5 && s[l-5] == 'e' && s[l-4] == 'e' && s[l-3] == 'd' && s[l-2] == 'l' && s[l-1] == 'y' {
		return len(s) - 5
	off := len(s) - 5
	if off >= 0 {
		s = s[off:]
		_ = s[4]
		if s[0] == 'e' && s[1] == 'e' && s[2] == 'd' && s[3] == 'l' && s[4] == 'y' {
			return off
		}
	}
	return -1
}

M tree.go => tree.go +5 -5
@@ 111,19 111,19 @@ func buildRevTree(items [][]byte, exact bool) *revTree {

func (node *revTree) Find(item []byte) (found bool, idx, n int) {
	cur := node
	for i := len(item) - 1; i >= 0; i-- {
		b := item[i]
		if cur.next[b] == nil {
	sz := len(item) - 1
	for i := sz; i >= 0; i-- {
		cur = cur.next[item[i]]
		if cur == nil {
			break
		}
		cur = cur.next[b]
		if cur.match {
			found = true
			idx = cur.idx
			n = i
		}
	}
	if !found || (node.exact && n != len(item)-1) {
	if !found || (node.exact && n != sz) {
		return false, -1, 0
	}
	return found, idx, n