## ~samwhited/xmpp

b6caaad7dcabe5b3cf8f40ee26259927dbcc8904 — Sam Whited 5 years ago
```jid: Add helpers for XEP-0106: Jid Escaping
```
```3 files changed, 123 insertions(+), 0 deletions(-)

M jid/benchmark_test.go
M jid/jid.go
M jid/jid_test.go
```
`M jid/benchmark_test.go => jid/benchmark_test.go +12 -0`
```@@ 58,3 58,15 @@ func BenchmarkString(b *testing.B) {
_ = j.String()
}
}
+
+func BenchmarkEscape(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		_ = Escape(escape)
+	}
+}
+
+func BenchmarkUnescape(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		_ = Unescape(allescaped)
+	}
+}

```
`M jid/jid.go => jid/jid.go +94 -0`
```@@ 16,6 16,100 @@ import (
"golang.org/x/text/secure/precis"
)

+const escape = ` "&'/:<>@\`
+
+func shouldEscape(c byte) bool {
+	return c == ' ' || c == '"' || c == '&' || c == '\'' || c == '/' || c == ':' || c == '<' || c == '>' || c == '@' || c == '\\'
+}
+
+// I just wrote these all out because it's a lot faster and not likely to
+// change; is it really worth the confusing logic though?
+func shouldUnescape(s string) bool {
+	return (s[0] == '2' && (s[1] == '0' || s[1] == '2' || s[1] == '6' || s[1] == '7' || s[1] == 'f')) || (s[0] == '3' && (s[1] == 'a' || s[1] == 'c' || s[1] == 'e')) || (s[0] == '4' && s[1] == '0') || (s[0] == '5' && s[1] == 'c')
+	// return s == `\20` || s == `\22` || s == `\26` || s == `\27` || s == `\2f` || s == `\3a` || s == `\3c` || s == `\3e` || s == `\40` || s == `\5c`
+}
+
+func unhex(c byte) byte {
+	switch {
+	case '0' <= c && c <= '9':
+		return c - '0'
+	case 'a' <= c && c <= 'f':
+		return c - 'a' + 10
+	case 'A' <= c && c <= 'F':
+		return c - 'A' + 10
+	}
+	return 0
+}
+
+// Unescape returns an unescaped version of the specified localpart using the
+// escaping mechanism defined in XEP-0106: JID Escaping. It only unescapes
+// sequences documented in XEP-0106 and does not guarantee that the resulting
+// localpart is well formed.
+func Unescape(s string) string {
+	// Count well-formed \.
+	n := 0
+	for i := 0; i < len(s); i++ {
+		if len(s) < i+3 {
+			break
+		}
+		if s[i] == '\\' && shouldUnescape(s[i+1:i+3]) {
+			n++
+			i += 2
+		}
+	}
+
+	if n == 0 {
+		return s
+	}
+
+	t := make([]byte, len(s)-2*n)
+	j := 0
+	for i := 0; i < len(s); i++ {
+		if s[i] == '\\' && len(s) > i+2 && shouldUnescape(s[i+1:i+3]) {
+			t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
+			i += 2
+		} else {
+			t[j] = s[i]
+		}
+		j++
+	}
+	return string(t)
+}
+
+// Escape returns an escaped version of the specified localpart using the
+// escaping mechanism defined in XEP-0106: JID Escaping. It is not applied
+// by any of the JID methods, and must be applied manually before constructing a
+// JID.
+func Escape(s string) string {
+	count := 0
+	for i := 0; i < len(s); i++ {
+		c := s[i]
+		if shouldEscape(c) {
+			count++
+		}
+	}
+
+	if count == 0 {
+		return s
+	}
+
+	t := make([]byte, len(s)+2*count)
+	j := 0
+	for i := 0; i < len(s); i++ {
+		switch c := s[i]; {
+		case shouldEscape(c):
+			t[j] = '\\'
+			t[j+1] = "0123456789abcdef"[c>>4]
+			t[j+2] = "0123456789abcdef"[c&15]
+			j += 3
+		default:
+			t[j] = s[i]
+			j++
+		}
+	}
+	return string(t)
+}
+
// JID represents an XMPP address (Jabber ID) comprising a localpart,
// domainpart, and resourcepart. All parts of a JID are guaranteed to be valid
// UTF-8 and will be represented in their canonical form which gives comparison

```
`M jid/jid_test.go => jid/jid_test.go +17 -0`
```@@ 165,3 165,20 @@ func TestCopy(t *testing.T) {
t.Error("Copying a JID should result in a different JID pointer")
}
}
+
+const allescaped = `\20\22\26\27\2f\3a\3c\3e\40\5c`
+
+func TestEscape(t *testing.T) {
+	if e := Escape(escape); e != allescaped {
+		t.Errorf("Escaped localpart should be `%s` but got: `%s`", allescaped, e)
+	}
+}
+
+func TestUnescape(t *testing.T) {
+	if u := Unescape(allescaped); u != escape {
+		t.Errorf("Unescaped localpart should be `%s` but got: `%s`", escape, u)
+	}
+	if u := Unescape(`\20\aa\\\`); u != ` \aa\\\` {
+		t.Errorf("Unescaped localpart should be ` \\aa\\\\\\` but got: `%s`", u)
+	}
+}

```