@@ 37,6 37,9 @@ func HttpLinks(r io.Reader) (io.Reader, []string) {
scheme = j - i
j = scheme
+ // "inline" email without a mailto: prefix - add some extra checks for those
+ inlineEmail := len(match) > 4 && match[2] == -1 && match[4] == -1
+
for !emitUrl && j < len(b) && bytes.IndexByte(urichars, b[j]) != -1 {
switch b[j] {
case '[':
@@ 69,9 72,21 @@ func HttpLinks(r io.Reader) (io.Reader, []string) {
} else {
j++
}
+ case '&':
+ if inlineEmail {
+ emitUrl = true
+ } else {
+ j++
+ }
default:
j++
}
+
+ // we don't want those in inline emails
+ if inlineEmail && (paren > 0 || ltgt > 0 || bracket > 0) {
+ j--
+ emitUrl = true
+ }
}
// Heuristic to remove trailing characters that are
@@ 91,7 106,7 @@ func HttpLinks(r io.Reader) (io.Reader, []string) {
continue
}
url := string(b[:j])
- if match[2] == -1 && match[4] == -1 {
+ if inlineEmail {
// Email address with missing mailto: scheme. Add it.
url = "mailto:" + url
}
@@ 114,6 114,26 @@ func TestHyperlinks(t *testing.T) {
text: "You can reach me via the somewhat strange, but nonetheless valid, email mailto:~mpldr/list@[2001:db8::7]?subject=whazzup%3F",
links: []string{"mailto:~mpldr/list@[2001:db8::7]?subject=whazzup%3F"},
},
+ {
+ name: "simple email in <a href>",
+ text: `<a href="mailto:a@abc.com" rel="noopener noreferrer">`,
+ links: []string{"mailto:a@abc.com"},
+ },
+ {
+ name: "simple email in <a> body",
+ text: `<a href="#" rel="noopener noreferrer">a@abc.com</a><br/><p>more text</p>`,
+ links: []string{"mailto:a@abc.com"},
+ },
+ {
+ name: "emails in <a> href and body",
+ text: `<a href="mailto:a@abc.com" rel="noopener noreferrer">b@abc.com</a><br/><p>more text</p>`,
+ links: []string{"mailto:a@abc.com", "mailto:b@abc.com"},
+ },
+ {
+ name: "email in <...>",
+ text: `<div>01.02.2023, 10:11, "Firstname Lastname" <a@abc.com>:</div>`,
+ links: []string{"mailto:a@abc.com"},
+ },
}
for i, test := range tests {