~samwhited/xmpp

b6caaad7dcabe5b3cf8f40ee26259927dbcc8904 — Sam Whited 5 years ago f37d35b
jid: Add helpers for XEP-0106: Jid Escaping
3 files changed, 123 insertions(+), 0 deletions(-)

M jid/benchmark_test.go
M jid/jid.go
M jid/jid_test.go
M jid/benchmark_test.go => jid/benchmark_test.go +12 -0
@@ 58,3 58,15 @@ func BenchmarkString(b *testing.B) {
		_ = j.String()
	}
}

func BenchmarkEscape(b *testing.B) {
	for i := 0; i < b.N; i++ {
		_ = Escape(escape)
	}
}

func BenchmarkUnescape(b *testing.B) {
	for i := 0; i < b.N; i++ {
		_ = Unescape(allescaped)
	}
}

M jid/jid.go => jid/jid.go +94 -0
@@ 16,6 16,100 @@ import (
	"golang.org/x/text/secure/precis"
)

const escape = ` "&'/:<>@\`

func shouldEscape(c byte) bool {
	return c == ' ' || c == '"' || c == '&' || c == '\'' || c == '/' || c == ':' || c == '<' || c == '>' || c == '@' || c == '\\'
}

// I just wrote these all out because it's a lot faster and not likely to
// change; is it really worth the confusing logic though?
func shouldUnescape(s string) bool {
	return (s[0] == '2' && (s[1] == '0' || s[1] == '2' || s[1] == '6' || s[1] == '7' || s[1] == 'f')) || (s[0] == '3' && (s[1] == 'a' || s[1] == 'c' || s[1] == 'e')) || (s[0] == '4' && s[1] == '0') || (s[0] == '5' && s[1] == 'c')
	// return s == `\20` || s == `\22` || s == `\26` || s == `\27` || s == `\2f` || s == `\3a` || s == `\3c` || s == `\3e` || s == `\40` || s == `\5c`
}

func unhex(c byte) byte {
	switch {
	case '0' <= c && c <= '9':
		return c - '0'
	case 'a' <= c && c <= 'f':
		return c - 'a' + 10
	case 'A' <= c && c <= 'F':
		return c - 'A' + 10
	}
	return 0
}

// Unescape returns an unescaped version of the specified localpart using the
// escaping mechanism defined in XEP-0106: JID Escaping. It only unescapes
// sequences documented in XEP-0106 and does not guarantee that the resulting
// localpart is well formed.
func Unescape(s string) string {
	// Count well-formed \.
	n := 0
	for i := 0; i < len(s); i++ {
		if len(s) < i+3 {
			break
		}
		if s[i] == '\\' && shouldUnescape(s[i+1:i+3]) {
			n++
			i += 2
		}
	}

	if n == 0 {
		return s
	}

	t := make([]byte, len(s)-2*n)
	j := 0
	for i := 0; i < len(s); i++ {
		if s[i] == '\\' && len(s) > i+2 && shouldUnescape(s[i+1:i+3]) {
			t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
			i += 2
		} else {
			t[j] = s[i]
		}
		j++
	}
	return string(t)
}

// Escape returns an escaped version of the specified localpart using the
// escaping mechanism defined in XEP-0106: JID Escaping. It is not applied
// by any of the JID methods, and must be applied manually before constructing a
// JID.
func Escape(s string) string {
	count := 0
	for i := 0; i < len(s); i++ {
		c := s[i]
		if shouldEscape(c) {
			count++
		}
	}

	if count == 0 {
		return s
	}

	t := make([]byte, len(s)+2*count)
	j := 0
	for i := 0; i < len(s); i++ {
		switch c := s[i]; {
		case shouldEscape(c):
			t[j] = '\\'
			t[j+1] = "0123456789abcdef"[c>>4]
			t[j+2] = "0123456789abcdef"[c&15]
			j += 3
		default:
			t[j] = s[i]
			j++
		}
	}
	return string(t)
}

// JID represents an XMPP address (Jabber ID) comprising a localpart,
// domainpart, and resourcepart. All parts of a JID are guaranteed to be valid
// UTF-8 and will be represented in their canonical form which gives comparison

M jid/jid_test.go => jid/jid_test.go +17 -0
@@ 165,3 165,20 @@ func TestCopy(t *testing.T) {
		t.Error("Copying a JID should result in a different JID pointer")
	}
}

const allescaped = `\20\22\26\27\2f\3a\3c\3e\40\5c`

func TestEscape(t *testing.T) {
	if e := Escape(escape); e != allescaped {
		t.Errorf("Escaped localpart should be `%s` but got: `%s`", allescaped, e)
	}
}

func TestUnescape(t *testing.T) {
	if u := Unescape(allescaped); u != escape {
		t.Errorf("Unescaped localpart should be `%s` but got: `%s`", escape, u)
	}
	if u := Unescape(`\20\aa\\\`); u != ` \aa\\\` {
		t.Errorf("Unescaped localpart should be ` \\aa\\\\\\` but got: `%s`", u)
	}
}