~apreiml/hare-openpgp

543fe5f6ccd041c1a707aa68463b8973a48e4281 — Armin Preiml 10 months ago 2d35ee8 asciiarmor
asciiarmor: refactor
2 files changed, 196 insertions(+), 264 deletions(-)

M encoding/asciiarmor/+test.ha
M encoding/asciiarmor/asciiarmor.ha
M encoding/asciiarmor/+test.ha => encoding/asciiarmor/+test.ha +44 -11
@@ 9,29 9,52 @@ use strings;
use memio;

@test fn read() void = {
	const testmsg_str = fmt::asprintf(
		"garbage\ngarbage\ngarbage\n{}garbage\n", msg_str);
	defer free(testmsg_str);
	const in = memio::fixed(strings::toutf8(testmsg_str));
	const dec = newdecoder(&in);
	defer finish(&dec);
	assert_read_msg(msg_str);
	assert_read_msg(msg_unaligned_str);
};

	const stream = next(&dec)! as (str, aadecoder);
fn assert_read_msg(msg: str) void = {
	const in = memio::fixed(strings::toutf8(msg));
	const dec = newdecoder(&in)!;
	defer io::close(&dec)!;

	assert(stream.0 == "PGP MESSAGE");
	assert(dec.label == "PGP MESSAGE");

	let hdrs = headers(&stream.1);
	let hdrs = headers(&dec);
	assert(len(hdrs) == 1);
	let h = hdrs[0];
	assert(h.0 == "Version");
	assert(h.1 == "OpenPrivacy 0.99");

	const data = io::drain(&stream.1)!;
	const data = io::drain(&dec)!;
	defer free(data);
	assert(bytes::equal(data, msg_bin));
};

@test fn read_small_chunks() void = {
	const in = memio::fixed(strings::toutf8(msg_str));
	const dec = newdecoder(&in)!;
	defer io::close(&dec)!;

	let dest = memio::dynamic();
	defer io::close(&dest)!;

	assert(next(&dec) is io::EOF);
	for (let i = 0z; i < 120; i += 1) {
		let buf: [1]u8 = [0...];

		match (io::read(&dec, buf)!) {
		case let n: size =>
			io::writeall(&dest, buf[..n])!;
		case io::EOF =>
			break;
		};
	};

	assert(bytes::equal(memio::buffer(&dest), msg_bin));
};



const msg_str: str =
`-----BEGIN PGP MESSAGE-----
Version: OpenPrivacy 0.99


@@ 42,6 65,16 @@ vBSFjNSiVHsuAA==
-----END PGP MESSAGE-----
`;

const msg_unaligned_str: str =
`-----BEGIN PGP MESSAGE-----
Version: OpenPrivacy 0.99

yDgBO22WxBHv7O8X7O/jygAEzol56iUKiXmV+XmpCtmpqQUKiQrFqclFqUDBovzSv
BSFjNSiVHsuAA==
=njUN
-----END PGP MESSAGE-----
`;

const msg_bin: [_]u8 = [
	0xc8, 0x38, 0x01, 0x3b, 0x6d, 0x96, 0xc4, 0x11, 0xef, 0xec, 0xef, 0x17,
	0xec, 0xef, 0xe3, 0xca, 0x00, 0x04, 0xce, 0x89, 0x79, 0xea, 0x25, 0x0a,

M encoding/asciiarmor/asciiarmor.ha => encoding/asciiarmor/asciiarmor.ha +152 -253
@@ 1,176 1,89 @@
// License: MPL-2.0
// (c) 2022 Drew DeVault <sir@cmpwn.com>
use ascii;
use bufio;
use bytes;
use encoding::base64;
use errors;
use fmt;
use io;
use os;
use strings;
use memio;

// This is a modified version of the stdlib's pem encoder/decoder. Will need
// more love though.

const begin: str = "-----BEGIN ";
const end: str = "-----END ";
const suffix: str = "-----";

export type decoder = struct {
	in: bufio::stream,
	label: memio::stream,
	buf: []u8,
};
def MAX_LINESZ = 80z;

export type b64stream = struct {
	stream: io::stream,
	in: *bufio::stream,
	line: []u8,
	linepos: size,
};
export type header = (str, str);

export type aadecoder = struct {
export type decoder = struct {
	stream: io::stream,
	in: *bufio::stream,
	b64_in: b64stream,
	b64: base64::decoder,
	// XXX: kind of dumb but it saves us some memory management problems
	b64_ready: bool,
	// TODO free headers
	src: bufio::scanner,
	label: str,
	headers: []header,
	lbuf: [MAX_LINESZ]u8,
	lbufsz: size,
	lbufoff: size,
	eof: bool,
};

const aadecoder_vt: io::vtable = io::vtable {
	reader = &aa_read,
const decoder_vt = io::vtable {
	reader = &read,
	closer = &close,
	...
};

const b64stream_r_vt: io::vtable = io::vtable {
	reader = &b64_read,
	...
};
// Creates a new Ascii Armor decoder. The caller must call [[io::close]] to free
// state associated with the parser. [[io::close]] does not close the underlying
// stream.
export fn newdecoder(in: io::handle) (decoder | io::error) = {
	let src = bufio::newscanner(in, MAX_LINESZ);
	let valid = false;
	defer if (!valid) bufio::finish(&src);

// Creates a new Ascii Armor decoder. The caller must either read it until it
// returns
// [[io::EOF]], or call [[finish]] to free state associated with the parser.
export fn newdecoder(in: io::handle) decoder = {
	let buf: []u8 = alloc([0...], os::BUFSZ);
	return decoder {
		in = bufio::init(in, buf, []),
		buf = buf,
		label = memio::dynamic(),
	};
};

// Frees state associated with this [[decoder]].
export fn finish(dec: *decoder) void = {
	io::close(&dec.label)!;
	bytes::zero(dec.buf);
	free(dec.buf);
};

// Converts an I/O error returned from a PEM decoder into a human-friendly
// string.
export fn strerror(err: io::error) const str = {
	match (err) {
	case errors::invalid =>
		return "Invalid Ascii Armor data";
	let head = match (bufio::scan_line(&src)) {
	case let h: const str =>
		yield strings::rtrim(h);
	case let e: io::error =>
		return e;
	case =>
		return io::strerror(err);
		return errors::invalid;
	};
};

export type header = (str, str);
export fn headers(dec: *aadecoder) []header = dec.headers;

// Finds the next Ascii Armor boundary in the stream, ignoring any non-Ascii
// Armor data, and returns the label and a [[aadecoder]] from which the encoded
// data may be read, or [[io::EOF]] if no further PEM boundaries are found. The
// user must completely read the aadecoder until it returns [[io::EOF]] before
// calling [[next]] again.
//
// The label returned by this function is borrowed from the decoder state and
// does not contain "-----BEGIN " or "-----".
export fn next(dec: *decoder) ((str, aadecoder) | io::EOF | io::error) = {
	for (true) {
		// XXX: This can be improved following
		// https://todo.sr.ht/~sircmpwn/hare/562
		const line = match (bufio::read_line(&dec.in)?) {
		case io::EOF =>
			return io::EOF;
		case let line: []u8 =>
			yield match (strings::fromutf8(line)) {
			case let s: str =>
				yield s;
			case =>
				return errors::invalid;
			};
		};
		defer free(line);
		const line = strings::rtrim(line, '\r');

		if (!strings::hasprefix(line, begin)
				|| !strings::hassuffix(line, suffix)) {
			continue;
		};
	if (!strings::hasprefix(head, begin) || !strings::hassuffix(head, suffix)) {
		return errors::invalid;
	};

		memio::reset(&dec.label);
		const label = strings::sub(line,
			len(begin), len(line) - len(suffix));
		memio::concat(&dec.label, label)!;
	const labelend = len(head) - len(suffix);
	let label = strings::dup(strings::sub(head, len(begin), labelend));
	let headers = parseheaders(&src)?;

		let adec = aadecoder {
			stream = &aadecoder_vt,
			in = &dec.in,
			b64_in = b64stream {
				stream = &b64stream_r_vt,
				in = &dec.in,
				line = [],
				linepos = 0,
			},
			// forcefully zero field (it's actually set in aa_read)
			b64 = *(&([0...]: [size(base64::decoder)]u8): *base64::decoder),
			b64_ready = false,
			headers = [],
		};
		parseheaders(&adec)?;
		return (memio::string(&dec.label)!, adec);
	valid = true;
	return decoder {
		stream = &decoder_vt,
		src = src,
		label = strings::dup(label),
		headers = headers,
		lbuf = [0...],
		lbufsz = 0,
		lbufoff = 0,
		eof = false,
	};

	abort(); // Unreachable
};

fn parseheaders(dec: *aadecoder) (void | io::error) = {
fn parseheaders(s: *bufio::scanner) ([]header | io::error) = {
	let valid = false;
	let headers: []header = [];
	defer if (!valid) free_headers(headers);

	for (true) {
		const line = match (bufio::read_line(dec.in)?) {
		case io::EOF =>
			return errors::invalid;
		case let line: []u8 =>
			yield match (strings::fromutf8(line)) {
			case let s: str =>
				yield s;
			case =>
				return errors::invalid;
			};
		};

		const line = expect_line(s)?;
		if (!strings::contains(line, ": ")) {
			line = strings::trim(line);
			if (line == "") {
				free(line);
			} else {
				dec.b64_in.line = strings::toutf8(line);
				dec.b64_in.linepos = 0;
			if (strings::trim(line) != "") {
				return errors::invalid;
			};
			dec.headers = headers;
			return;
			valid = true;
			return headers;
		};

		defer free(line);
		let pair = strings::cut(line, ": ");
		pair.1 = strings::trim(pair.1, '\r', '\n');



@@ 184,146 97,132 @@ fn parseheaders(dec: *aadecoder) (void | io::error) = {
		};
	};

	// unreachable
	abort();
};

fn aa_read(st: *io::stream, buf: []u8) (size | io::EOF | io::error) = {
	// We need to set up two streams. This is the stream which is actually
	// returned to the caller, which calls the base64 decoder against a
	// special stream (b64stream) which trims out whitespace and EOF's on
	// -----END.
	const st = st: *aadecoder;
	assert(st.stream.reader == &aa_read);
	if (!st.b64_ready) {
		st.b64 = base64::newdecoder(&base64::std_encoding, &st.b64_in);
		st.b64_ready = true;
fn free_headers(h: []header) void ={
	for (let i = 0z; i < len(h); i += 1) {
		free(h[i].0);
		free(h[i].1);
	};
	free(h);
};

	match (io::read(&st.b64, buf)?) {
	case let z: size =>
		return z;
	case io::EOF =>
		yield;
	};
export fn headers(d: *decoder) []header = d.headers;

	const line = match (bufio::read_line(st.in)?) {
	case io::EOF =>
fn read(s: *io::stream, buf: []u8) (size | io::EOF | io::error) = {
	let d = s: *decoder;
	if (d.eof) {
		return io::EOF;
	case let line: []u8 =>
		yield match (strings::fromutf8(line)) {
		case let s: str =>
			yield s;
		case =>
			return errors::invalid;
		};
	};
	defer free(line);
	const line = strings::rtrim(line, '\r');

	if (!strings::hasprefix(line, end)
			|| !strings::hassuffix(line, suffix)) {
		return errors::invalid;
	};

	// XXX: We could verify the trailer matches but the RFC says it's
	// optional.
	return io::EOF;
};

fn b64_read(st: *io::stream, buf: []u8) (size | io::EOF | io::error) = {
	// different to the encoding::pem b64_read, this one buffers lines
	// and operates on them

	const st = st: *b64stream;
	assert(st.stream.reader == &b64_read);

	if (st.linepos >= len(st.line)) {
		let line = match (bufio::read_line(st.in)?) {
		case io::EOF =>
			return errors::invalid;
		case let line: []u8 =>
			yield match (strings::fromutf8(line)) {
			case let s: str =>
				yield strings::trim(s);
			case =>
				return errors::invalid;
			};
	if (d.lbufsz - d.lbufoff == 0) {
		let cur = expect_line(&d.src)?;
		let cur = strings::toutf8(strings::rtrim(cur));
		if (cur[0] == '=': u8 && cur[1] != '=': u8) {
			let cur = expect_line(&d.src)?;
			d.eof = true;
			return io::EOF;
		};

		let line = strings::toutf8(line);
		if (len(line) > 2 && line[0] == '=': u32: u8
			&& !ascii::isspace(line[1]: u32: rune)) {
			// TODO CRC check
			// fmt::println("CRC", strings::fromutf8(line)!)!;
			free(line);
		if (cur[0] == '-': u8) {
			d.eof = true;
			return io::EOF;
		};

		free(st.line);
		st.line = line;
		st.linepos = 0;
		const sz = len(cur) - (len(cur) % 4);
		const n = b64_decode(d.lbuf, cur[..sz])?;
		d.lbufsz = n;
		d.lbufoff = 0;

		bufio::unread(&d.src, cur[sz..]);
	};

	let avail = st.line[st.linepos..];
	const n = if (len(buf) > len(avail)) len(avail) else len(buf);
	buf[..n] = avail[..n];
	st.linepos += n;
	return n;
	const avail = d.lbufsz - d.lbufoff;
	const max = if (avail > len(buf)) len(buf) else avail;
	buf[..max] = d.lbuf[d.lbufoff..d.lbufoff + max];
	d.lbufoff += max;
	return max;
};

export type pemencoder = struct {
	stream: io::stream,
	out: io::handle,
	b64: base64::encoder,
	label: str,
	buf: [48]u8,
	ln: u8,
fn expect_line(s: *bufio::scanner) (const str | io::error) = {
	match (bufio::scan_line(s)) {
	case let l: const str=>
		return l;
	case let e: io::error =>
		return e;
	case =>
		return errors::invalid;
	};
};

const pemencoder_vt: io::vtable = io::vtable {
	writer = &pem_write,
	closer = &pem_wclose,
	...
};
fn b64_decode(dest: []u8, src: []u8) (size | errors::invalid) = {
	const decmap = base64::std_encoding.decmap;
	assert(len(src) % 4 == 0);
	assert(len(dest) >= base64::decodedsize(len(src)));

// Creates a new PEM encoder stream. The stream has to be closed to write the
// trailer.
export fn newencoder(label: str, s: io::handle) (pemencoder | io::error) = {
	fmt::fprintf(s, "{}{}{}\n", begin, label, suffix)?;
	return pemencoder {
		stream = &pemencoder_vt,
		out = s,
		b64 = base64::newencoder(&base64::std_encoding, s),
		label = label,
		...
	// TODO make this work without allocation
	let r = base64::decodeslice(&base64::std_encoding, src)?;
	defer {
		bytes::zero(r);
		free(r);
	};
};

fn pem_write(s: *io::stream, buf: const []u8) (size | io::error) = {
	let s = s: *pemencoder;
	let buf = buf: []u8;
	if (len(buf) < len(s.buf) - s.ln) {
		s.buf[s.ln..s.ln+len(buf)] = buf[..];
		s.ln += len(buf): u8;
		return len(buf);
	dest[..len(r)] = r[..];
	return len(r);
};

// Frees state associated with the parser.
fn close(s: *io::stream) (void | io::error) = {
	let d = s: *decoder;
	// XXX: access to undocumented member sucks
	bytes::zero(d.src.buffer);
	bytes::zero(d.lbuf);
	bufio::finish(&d.src);
	free(d.label);
	free_headers(d.headers);
};

@test fn decode() void = {
	// RFC 4648 test vectors
	const cases: [_](str, str) = [
		("", ""),
		("Zg==", "f"),
		("Zm8=", "fo"),
		("Zm9v", "foo"),
		("Zm9vYg==", "foob"),
		("Zm9vYmE=", "fooba"),
		("Zm9vYmFy", "foobar"),
	];
	let buf: [16]u8 = [0...];
	for (let i = 0z; i < len(cases); i += 1) {
		let in = strings::toutf8(cases[i].0);
		let out = strings::toutf8(cases[i].1);

		assert(b64_decode(buf, in)! == len(out));
		assert(bytes::equal(out, buf[..len(out)]));
	};
	let z = 0z;
	s.buf[s.ln..] = buf[..len(s.buf) - s.ln];
	z += io::writeall(&s.b64, s.buf)?;
	z += io::write(s.out, ['\n'])?;
	buf = buf[len(s.buf) - s.ln..];
	for (len(buf) >= 48; buf = buf[48..]) {
		z += io::writeall(&s.b64, buf[..48])?;
		z += io::write(s.out, ['\n'])?;

	const invalid: [_]str = [
		// invalid characters
		"@Zg=",
		"êg=",
		// data after padding is encountered
		"Zg==Zg==",
		"Zm8=Zm8=",
	];
	for (let i = 0z; i < len(invalid); i += 1) {
		let in = strings::toutf8(invalid[i]);
		assert(b64_decode(buf, in) is errors::invalid);
	};
	s.ln = len(buf): u8;
	s.buf[..s.ln] = buf;
	return z + s.ln;
};

fn pem_wclose(s: *io::stream) (void | io::error) = {
	let s = s: *pemencoder;
	io::writeall(&s.b64, s.buf[..s.ln])?;
	io::close(&s.b64)?;
	fmt::fprintf(s.out, "\n{}{}{}\n", end, s.label, suffix)?;
// Converts an I/O error returned from a PEM olddecoder into a human-friendly
// string.
export fn strerror(err: io::error) const str = {
	match (err) {
	case errors::invalid =>
		return "Invalid Ascii Armor data";
	case =>
		return io::strerror(err);
	};
};