@@ 54,6 54,7 @@ Options:
:iso (sequence (+ "ISO" "iso") "-8859-1" (? "5"))
:win (sequence (set "Ww") "indows-1252")
:charset (sequence "=?" (+ :iso :win) "?Q?")}))
+(def no-ascii (peg/compile ~(range "\x80\xFF")))
(defn second [xs] (get xs 1))
(defn third [xs] (get xs 2))
@@ 228,6 229,14 @@ Options:
(test (decode-iso8859-q "=?iso-8859-1?Q?M=FCller?=") "Müller")
+(defn invalid-ascii [str]
+ (peg/find no-ascii str))
+
+(test (invalid-ascii "hello") nil)
+(test (invalid-ascii "für") 1)
+(test (invalid-ascii "Größe") 2)
+(test (invalid-ascii "Bestätigung") 4)
+
(defn sanitize [mailbox]
(var sanitized mailbox)
# ignore addresses containing noreply, no-reply or no_reply
@@ 298,6 307,11 @@ Options:
(var results (match-mailboxes line))
(if results
(each mailbox results
+ # headers must use only ASCII characters (0x00 - 0x7F)
+ (when (invalid-ascii mailbox)
+ (eprintf "No valid ASCII: %s (skipped)" mailbox)
+ (break nil)) # "continue" would be useful here
+
(def mailbox-sanitized (-> mailbox decode-utf8 decode-iso8859-q sanitize))
(cond
(nil? mailbox-sanitized)