~samwhited/xmpp

ref: e4775d3d358693be4bffe36c1dd0cb81a8bd7afe xmpp/src/xmpp/jid.go -rw-r--r-- 7.8 KiB
e4775d3dSam Whited Fix an error message mismatch 7 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
// Copyright 2014 Sam Whited.
// Use of this source code is governed by the BSD 2-clause license that can be
// found in the LICENSE file.

// Package jid implements XMPP addresses (JIDs) as described in RFC 6122. The
// syntax for a JID is defined as follows using the Augmented Backus-Naur Form:
//
//      jid           = [ localpart "@" ] domainpart [ "/" resourcepart ]
//      localpart     = 1*(nodepoint)
//                      ;
//                      ; a "nodepoint" is a UTF-8 encoded Unicode code
//                      ; point that satisfies the Nodeprep profile of
//                      ; stringprep
//                      ;
//      domainpart    = IP-literal / IPv4address / ifqdn
//                      ;
//                      ; the "IPv4address" and "IP-literal" rules are
//                      ; defined in RFC 3986, and the first-match-wins
//                      ; (a.k.a. "greedy") algorithm described in RFC
//                      ; 3986 applies to the matching process
//                      ;
//                      ; note well that reuse of the IP-literal rule
//                      ; from RFC 3986 implies that IPv6 addresses are
//                      ; enclosed in square brackets (i.e., beginning
//                      ; with '[' and ending with ']'), which was not
//                      ; the case in RFC 3920
//                      ;
//      ifqdn         = 1*(namepoint)
//                      ;
//                      ; a "namepoint" is a UTF-8 encoded Unicode
//                      ; code point that satisfies the Nameprep
//                      ; profile of stringprep
//                      ;
//      resourcepart  = 1*(resourcepoint)
//                      ;
//                      ; a "resourcepoint" is a UTF-8 encoded Unicode
//                      ; code point that satisfies the Resourceprep
//                      ; profile of stringprep
//                      ;
package jid

import (
	"code.google.com/p/go.text/unicode/norm"
	// TODO: Use a proper stringprep library like "code.google.com/p/go-idn/idna"
	"errors"
	"regexp"
	"strings"
	"unicode/utf8"
)

// Define some reusable error messages.
const (
	ERROR_INVALID_STRING = "String is not valid UTF-8"
	ERROR_EMPTY_PART     = "JID parts must be greater than 0 bytes"
	ERROR_LONG_PART      = "JID parts must be less than 1023 bytes"
	ERROR_INVALID_JID    = "String is not a valid JID"
	ERROR_ILLEGAL_RUNE   = "String contains an illegal chartacter"
	ERROR_ILLEGAL_SPACE  = "String contains illegal whitespace"
)

// The Unicode normalization form to use. According to RFC 6122:
//
//      This profile specifies the use of Unicode Normalization Form KC, as
//      described in [STRINGPREP].
//
const NF norm.Form = norm.NFKC

// The jid struct is left unexported so that setters (which provide validation)
// must be used when creating or modifying JIDs.
type jid struct {
	localpart    string
	domainpart   string
	resourcepart string
}

// Create a new JID from the given string.
func NewJID(s string) (jid, error) {
	j := jid{}
	err := j.FromString(s)
	return j, err
}

// Get the local part of a JID
func (address jid) LocalPart() string {
	return address.localpart
}

// Get the domainpart of a JID
func (address jid) DomainPart() string {
	return address.domainpart
}

// Get the resourcepart of a JID
func (address jid) ResourcePart() string {
	return address.resourcepart
}

// Verify that the JID part is valid and return a normalized string.
func normalizeJIDPart(part string) (string, error) {
	switch normalized := NF.String(part); {
	case len(normalized) == 0:
		// The normalized length should be > 0 bytes
		return "", errors.New(ERROR_EMPTY_PART)
	case len(normalized) > 1023:
		// The normalized length should be ≤ 1023 bytes
		return "", errors.New(ERROR_LONG_PART)
	case !utf8.ValidString(part):
		// The original string should be valid UTF-8
		return "", errors.New(ERROR_INVALID_STRING)
	case strings.ContainsAny(part, "\"&'/:<>@"):
		// The original string should not contain any illegal characters. After
		// normalization some of these characters maybe present.
		return "", errors.New(ERROR_ILLEGAL_RUNE)
	case len(strings.Fields(normalized)) != 1:
		// There should be no whitespace in the normalized part.
		return "", errors.New(ERROR_ILLEGAL_SPACE)
		// TODO: Use a proper stringprep library to make sure this is all correct.
	default:
		return normalized, nil
	}
}

// Set the localpart of a JID and verify that it is a valid/normalized UTF-8
// string which is greater than 0 bytes and less than 1023 bytes.
func (address jid) SetLocalPart(localpart string) error {
	normalized, err := normalizeJIDPart(localpart)
	if err != nil {
		return err
	}
	address.localpart = normalized
	return nil
}

// Set the domainpart of a JID and verify that it is a valid/normalized  UTF-8
// string which is greater than 0 bytes and less than 1023 bytes.
func (address jid) SetDomainPart(domainpart string) error {
	normalized, err := normalizeJIDPart(domainpart)
	if err != nil {
		return err
	}
	// Remove brackets if they already exist so that we can validate IPv6
	// TODO: Check if brackets exist and don't allow them if this isn't a v6 address
	normalized = strings.TrimPrefix(normalized, "[")
	normalized = strings.TrimSuffix(normalized, "]")
	// If the domain is a valid IPv6 address without brackets (it's a valid IP and
	// does not fit in 4 bytes), wrap it in brackets.
	// TODO: This is not very future proof.
	if ip := net.ParseIP(normalized); ip != nil && !ip.To4() != nil {
		normalized = "[" + normalized + "]"
	}
	// According to RFC 6122:
	// If the domainpart includes a final character considered to be a label
	// separator (dot) by [IDNA2003] or [DNS], this character MUST be stripped
	// from the domainpart before the JID of which it is a part is used for the
	// purpose of routing an XML stanza, comparing against another JID, or
	// constructing an [XMPP-URI].
	normalized = strings.TrimSuffix(normalized, ".")
	address.domainpart = normalized
	return nil
}

// Set the resourcepart of a JID and verify that it is a valid/normalized UTF-8
// string which is greater than 0 bytes and less than 1023 bytes.
func (address jid) SetResourcePart(resourcepart string) error {
	normalized, err := verifyJIDPart(resourcepart)
	if err != nil {
		return err
	}
	address.resourcepart = normalized
	return nil
}

// Return the full JID as a string
func (address jid) String() string {
	return address.LocalPart() + "@" + address.DomainPart() + "/" + address.ResourcePart()
}

// Set the JIDs properties from a string.
// Technically the only required part of a JID is the domainpart.
const JIDMatch = "[^@/]+@[^@/]+/[^@/]+"

func (address jid) FromString(s string) error {
	// Make sure the string is valid UTF-8
	if !utf8.ValidString(s) {
		return errors.New(ERROR_INVALID_STRING)
	}
	// According to RFC 6122:
	//
	//     Implementation Note: When dividing a JID into its component parts, an
	//     implementation needs to match the separator characters '@' and '/'
	//     before applying any transformation algorithms, which might decompose
	//     certain Unicode code points to the separator characters (e.g., U+FE6B
	//     SMALL COMMERCIAL AT might decompose into U+0040 COMMERCIAL AT).
	//
	// So don't normalize before we check the regex.
	switch matched, err := regexp.MatchString(JIDMatch, s); {
	case err != nil:
		return err
	case !matched:
		return errors.New(ERROR_INVALID_JID)
	}
	s = strings.TrimSpace(s)
	// Set the various parts of the JID
	atLoc := strings.IndexRune(s, '@')
	slashLoc := strings.IndexRune(s, '/')

	// TODO: We don't want just one part to be set and the next part to error;
	// perform checks before we set parts
	err := address.SetLocalPart(s[0:atLoc])
	if err != nil {
		return err
	}
	err = address.SetDomainPart(s[atLoc+1 : slashLoc])
	if err != nil {
		return err
	}
	err = address.SetResourcePart(s[slashLoc+1:])
	if err != nil {
		return err
	}
	return nil
}