@@ 0,0 1,178 @@
+use ascii;
+use encoding::base64;
+use encoding::utf8;
+use regex;
+use strconv;
+use strings;
+
+let re_encoded_word: regex::regex = regex::regex { ... };
+
+@init fn init() void = {
+ re_encoded_word = regex::compile(`=\?([^? ]+)\?([bBqQ])\?([^? ]+)\?=`)!;
+};
+
+@fini fn fini() void = {
+ regex::finish(&re_encoded_word);
+};
+
+// See RFC 2047, Section 4, for the definition of Q and B encodings:
+// https://datatracker.ietf.org/doc/html/rfc2047#section-4
+type recommended_encoding = enum {
+ NONE,
+ Q,
+ B,
+};
+
+fn decode_utf8q(s: str) str = {
+ let result: []u8 = alloc([0...], len(s));
+ let bytes = strings::toutf8(s);
+ let j = 0z;
+ for (let i = 0z; i < len(s); i += 1) {
+ if (bytes[i] == '=' && i+2 < len(s)) {
+ i += 1;
+ const byte = match (strconv::stou8(strings::sub(s, i, i+2), 16)) {
+ case let b: u8 => yield b;
+ case => yield '?': u8;
+ };
+ result[j] = byte;
+ i += 1;
+ } else if (bytes[i] == '_') {
+ result[j] = ' ';
+ } else {
+ result[j] = bytes[i];
+ };
+ j += 1;
+ };
+ return strings::fromutf8(result[0..j])!;
+};
+
+@test fn decode_utf8q() void = {
+ assert(decode_utf8q("M=C3=BCller") == "Müller");
+ assert(decode_utf8q("B=C3=A9la_Bart=C3=B3k") == "Béla Bartók");
+ assert(decode_utf8q("=F0=9F=98=8E") == "😎");
+};
+
+fn decode_encoded_words(line: str) str = {
+ let matches: regex::result = [];
+ for (strings::contains(line, "=?")) {
+ matches = regex::find(&re_encoded_word, line);
+ defer regex::result_free(matches);
+ if (len(matches) == 0)
+ break;
+
+ const charset = ascii::strlower(matches[1].content);
+ const encoding = ascii::strlower(matches[2].content);
+ const encoded_text = matches[3].content;
+
+ switch (charset) {
+ case "utf-8" =>
+ switch (encoding) {
+ case "b" =>
+ const decoded_slice = encoding::base64::decodestr(
+ &encoding::base64::std_encoding, encoded_text)!;
+ defer free(decoded_slice);
+ const decoded_string = strings::fromutf8_unsafe(decoded_slice);
+ line = strings::replace(line, matches[0].content, decoded_string);
+ case "q" =>
+ const decoded = decode_utf8q(encoded_text);
+ defer free(decoded);
+ line = strings::replace(line, matches[0].content, decoded);
+ case => return line; // warning?
+ };
+ case =>
+ // TODO: Handle charsets other than UTF-8,
+ // especially ISO-8859-1(5) and Windows-1252
+ break;
+ };
+ };
+ return line;
+};
+
+@test fn decode_encoded_words() void = {
+ assert(decode_encoded_words("=?UTF-8?Q?M=C3=B6ller?=") == "Möller");
+ assert(decode_encoded_words("=?UTF-8?B?5byg5LiJ?= <zhang.san@example.com>")
+ == "张三 <zhang.san@example.com>");
+};
+
+fn get_recommended_encoding(s: str) recommended_encoding = {
+ let iter = strings::iter(s);
+ let ascii_count = 0z;
+ let rune_count = 0z;
+ for (let r => strings::next(&iter)) {
+ rune_count += 1;
+ if (ascii::isprint(r)) {
+ ascii_count += 1;
+ };
+ };
+ // RFC 2047, Section 4:
+ // The "Q" encoding is recommended for use when most of the characters
+ // to be encoded are in the ASCII character set; otherwise, the "B"
+ // encoding should be used.
+ if (ascii_count == rune_count) {
+ return recommended_encoding::NONE;
+ } else if (ascii_count: f32 >= rune_count: f32 / 2.0) {
+ return recommended_encoding::Q;
+ } else {
+ return recommended_encoding::B;
+ };
+};
+
+@test fn get_recommended_encoding() void = {
+ assert(get_recommended_encoding("John Doe <john@example.org>")
+ == recommended_encoding::NONE);
+ assert(get_recommended_encoding("Möller") == recommended_encoding::Q);
+ assert(get_recommended_encoding("张三 <zhang.san@example.com>")
+ == recommended_encoding::Q);
+ assert(get_recommended_encoding("张三") == recommended_encoding::B);
+ assert(get_recommended_encoding("😎") == recommended_encoding::B);
+};
+
+fn encode_utf8q(value: str) str = {
+ let bytes_encoded: []u8 = [];
+ let iter = strings::iter(value);
+ for (let r => strings::next(&iter)) {
+ if (r == ' ') {
+ append(bytes_encoded, '_');
+ } else if (ascii::isprint(r)) {
+ append(bytes_encoded, r: u8);
+ } else {
+ const bytes = encoding::utf8::encoderune(r);
+ for (let b .. bytes) {
+ const byte_encoded =
+ strings::toutf8(strings::dup(strconv::u8tos(b, 16)));
+ append(bytes_encoded, '=');
+ append(bytes_encoded, byte_encoded...);
+ };
+ };
+ };
+ return strings::fromutf8_unsafe(bytes_encoded);
+};
+
+@test fn encode_utf8q() void = {
+ assert(encode_utf8q("Dr. Möller") == "Dr._M=C3=B6ller");
+ assert(encode_utf8q("张三") == "=E5=BC=A0=E4=B8=89");
+ assert(encode_utf8q("😎") == "=F0=9F=98=8E");
+};
+
+fn encode(value: str) str = {
+ switch (get_recommended_encoding(value)) {
+ case recommended_encoding::B =>
+ return strings::concat("=?UTF-8?B?",
+ encoding::base64::encodestr(&encoding::base64::std_encoding,
+ strings::toutf8(value)), "?=");
+ case recommended_encoding::Q =>
+ return strings::concat("=?UTF-8?Q?", encode_utf8q(value), "?=");
+ case recommended_encoding::NONE =>
+ return value;
+ };
+};
+
+@test fn encode() void = {
+ assert(encode("John Doe <john@example.org>")
+ == "John Doe <john@example.org>");
+ assert(encode("Möller") == "=?UTF-8?Q?M=C3=B6ller?=");
+ assert(encode("张三") == "=?UTF-8?B?5byg5LiJ?=");
+ assert(encode("张三 <zhang.san@example.com>")
+ == "=?UTF-8?Q?=E5=BC=A0=E4=B8=89_<zhang.san@example.com>?=");
+ assert(encode("😎") == "=?UTF-8?B?8J+Yjg==?=");
+};
@@ 84,6 84,7 @@ export fn header_add(head: *header, key: str, val: str) void = {
defer free(key);
let map = header_get_mapkey(head, key);
const val = encode(val);
const field = alloc(new_header_field(key, val, []));
append(head.fields, field);
append(map.fields, field);
@@ 101,7 102,7 @@ export fn header_get(head: *header, key: str) str = {
if (map.key != key) {
continue;
};
return map.fields[len(map.fields) - 1].val;
return decode_encoded_words(map.fields[len(map.fields) - 1].val);
};
return "";
@@ 123,6 124,9 @@ export fn header_get(head: *header, key: str) str = {
header_add(&head, "User-Agent", "Harriet");
assert(header_get(&head, "User-Agent") == "Harriet");
header_add(&head, "To", "=?UTF-8?Q?A._D=C3=BCrer?= <duerer@example.org>");
assert(header_get(&head, "To") == "A. Dürer <duerer@example.org>");
assert(header_get(&head, "foobar") == "");
};
@@ 356,6 360,7 @@ export fn read_header(
};
const val = decode_header_value(kv[i+1..]);
const val = decode_encoded_words(val);
const field = alloc(header_field {
raw = kv,
key = key,
@@ 371,6 376,7 @@ export fn read_header(
const input =
"To: Drew DeVault <sir@cmpwn.com>\r\n"
"From: Harriet <harriet@harelang.org>\r\n"
"Cc: =?UTF-8?Q?=E5=BC=A0=E4=B8=89_<zhang.san@example.com>?=\r\n"
"Content-Type: text/plain\r\n"
"DKIM-Signature: a=rsa-sha256;\r\n"
" bh=uI/rVH7mLBSWkJVvQYKz3TbpdI2BLZWTIMKcuo0KHOI=; c=simple/simple;\r\n"
@@ 383,6 389,7 @@ export fn read_header(
assert(header_get(&head, "To") == "Drew DeVault <sir@cmpwn.com>");
assert(header_get(&head, "From") == "Harriet <harriet@harelang.org>");
assert(header_get(&head, "Cc") == "张三 <zhang.san@example.com>");
assert(header_get(&head, "Content-Type") == "text/plain");
assert(header_get(&head, "Dkim-Signature") == "a=rsa-sha256; bh=uI/rVH7mLBSWkJVvQYKz3TbpdI2BLZWTIMKcuo0KHOI=; c=simple/simple; d=example.org; h=Subject:To:From; s=default; t=1577562184; v=1; b=;");
};
@@ 410,6 417,7 @@ export fn write_header(sink: io::handle, head: *header) (size | io::error) = {
header_add(&head, "Content-Type", "text/plain");
header_add(&head, "FROM", "Harriet <harriet@harelang.org>");
header_add(&head, "to", "Drew DeVault <sir@cmpwn.com>");
header_add(&head, "cc", "张三 <zhang.san@example.com>");
const sink = memio::dynamic();
defer io::close(&sink)!;
@@ 417,6 425,7 @@ export fn write_header(sink: io::handle, head: *header) (size | io::error) = {
const result = memio::string(&sink)!;
const expect =
"Cc: =?UTF-8?Q?=E5=BC=A0=E4=B8=89_<zhang.san@example.com>?=\r\n"
"To: Drew DeVault <sir@cmpwn.com>\r\n"
"From: Harriet <harriet@harelang.org>\r\n"
"Content-Type: text/plain\r\n"
@@ 435,6 444,7 @@ export fn write_header(sink: io::handle, head: *header) (size | io::error) = {
const input =
"To: Drew DeVault <sir@cmpwn.com>\r\n"
"From: Harriet <harriet@harelang.org>\r\n"
"Cc: =?UTF-8?Q?=E5=BC=A0=E4=B8=89_<zhang.san@example.com>?=\r\n"
"Content-Type: text/plain\r\n"
"DKIM-Signature: a=rsa-sha256;\r\n"
" bh=uI/rVH7mLBSWkJVvQYKz3TbpdI2BLZWTIMKcuo0KHOI=; c=simple/simple;\r\n"
@@ 481,7 491,7 @@ fn header_field_raw(hf: *header_field) ([]u8 | errors::invalid) = {
const rn = match (strings::next(&iter)) {
case let rn: rune =>
yield rn;
case void =>
case done =>
break;
};