~sircmpwn/hare-tar

93e3aa70c9f0651b9e179e8a01ea79e86dabc43b — Drew DeVault 2 years ago
format::tar: new module

Just contains a reader for now.
3 files changed, 268 insertions(+), 0 deletions(-)

A README
A reader.ha
A types.ha
A  => README +8 -0
@@ 1,8 @@
This module provides an implementation of the tar archive format for Unix. The
specific format implemented is USTAR, however, it is capable of reading most tar
variants which are backwards-compatible with the original format (e.g. GNU tar).

To read an archive, use [[read]] to create a reader, and [[next]] to enumerate
its entries. The return value from [[next]] contains the file metadata and is an
[[io::stream]] that you may read the file contents from. You may call [[skip]]
to skip an archive entry without reading it.

A  => reader.ha +195 -0
@@ 1,195 @@
// License: MPL-2.0
// (c) 2022 Drew DeVault <sir@cmpwn.com>
use bufio;
use bytes;
use endian;
use errors;
use io;
use strconv;
use strings;
use strio;

export type reader = struct {
	src: io::handle,
	name: [255]u8,
};

// Creates a new reader for a tar file. Use [[next]] to iterate through entries
// present in the tar file.
export fn read(src: io::handle) reader = {
	return reader {
		src = src,
		...
	};
};

// Returns the next entry from a tar [[reader]]. Parts of this structure
// (specifically the file name) are borrowed from the reader itself and will not
// be valid after subsequent calls.
//
// If the return value is a file (i.e. entry.etype == entry_type::FILE), the
// caller must either call [[io::read]] using the return value until it returns
// [[io::EOF]], or call [[skip]] to seek to the next entry in the archive.
//
// Note that reading from the header will modify the file size.
export fn next(rd: *reader) (entry | error | io::EOF) = {
	static let buf: [BLOCKSIZE]u8 = [0...];
	match (io::read(rd.src, buf)?) {
	case let z: size =>
		if (z != len(buf)) {
			return truncated;
		};
	case io::EOF =>
		return truncated;
	};

	if (zeroed(buf)) {
		match (io::read(rd.src, buf)?) {
		case let z: size =>
			if (z != len(buf)) {
				return truncated;
			};
		case io::EOF =>
			return truncated;
		};
		if (!zeroed(buf)) {
			return truncated;
		};
		return io::EOF;
	};

	let ent = entry { ... };
	const reader = bufio::fixed(buf, io::mode::READ);
	const name = readstr(&reader, 100);
	ent.mode = readoct(&reader, 8)?;
	ent.uid = readoct(&reader, 8)?;
	ent.gid = readoct(&reader, 8)?;
	ent.fsize = readsize(&reader, 12)?;
	ent.mtime = readoct(&reader, 12)?;
	ent.checksum = readoct(&reader, 8)?;
	ent.etype = readoct(&reader, 1)?: entry_type;
	ent.link = readstr(&reader, 100);

	if (ent.etype == entry_type::FILE) {
		ent.vtable = &file_vtable;
		ent.src = rd.src;
		ent.orig = ent.fsize;
		ent.remain = ent.orig;
	};

	const ustar = readstr(&reader, 6);
	if (ustar != "ustar") {
		ent.name = name;
		return ent;
	};

	const version = readstr(&reader, 2);
	// XXX: We could check the version here
	ent.uname = readstr(&reader, 32);
	ent.gname = readstr(&reader, 32);
	ent.devmajor = readoct(&reader, 8)?;
	ent.devminor = readoct(&reader, 8)?;
	const prefix = readstr(&reader, 155);
	let writer = strio::fixed(rd.name);
	strio::join(&writer, prefix, name)!;
	ent.name = strio::string(&writer);
	return ent;
};

// Seeks the underlying tar file to the entry following this one.
export fn skip(ent: *entry) (void | io::error) = {
	let amt = ent.remain;
	if (amt % BLOCKSIZE != 0) {
		amt += BLOCKSIZE - (amt % BLOCKSIZE);
	};
	match (io::seek(ent.src, amt: io::off, io::whence::CUR)) {
	case io::off =>
		return;
	case io::error =>
		yield;
	};
	io::copy(io::empty, ent)?;
};

const file_vtable: io::vtable = io::vtable {
	reader = &file_read,
	...
};

fn file_read(s: *io::stream, buf: []u8) (size | io::EOF | io::error) = {
	let ent = s: *ent_reader;
	assert(ent.vtable == &file_vtable);
	if (ent.remain == 0) {
		return io::EOF;
	};

	let z = len(buf);
	if (z > ent.remain) {
		z = ent.remain;
	};
	z = match (io::read(ent.src, buf[..z])?) {
	case let z: size =>
		yield z;
	case io::EOF =>
		// TODO: Truncated flag
		return io::EOF;
	};
	ent.remain -= z;

	// Read until we reach the block size
	static let buf: [BLOCKSIZE]u8 = [0...];
	if (ent.remain == 0 && ent.orig % BLOCKSIZE != 0) {
		let remain = BLOCKSIZE - (ent.orig % BLOCKSIZE);
		for (remain > 0) {
			match (io::read(ent.src, buf[..remain])?) {
			case let z: size =>
				remain -= z;
			case io::EOF =>
				// TODO: Set a truncated flag or something
				break;
			};
		};
	};

	return z;
};

fn readstr(rd: *bufio::memstream, ln: size) str = {
	const buf = match (bufio::borrowedread(rd, ln)) {
	case let buf: []u8 =>
		assert(len(buf) == ln);
		yield buf;
	case io::EOF =>
		abort();
	};
	return strings::fromc(buf: *[*]u8: *const char);
};

fn readoct(rd: *bufio::memstream, ln: size) (uint | invalid) = {
	const string = readstr(rd, ln);
	match (strconv::stoub(string, strconv::base::OCT)) {
	case let u: uint =>
		return u;
	case =>
		return invalid;
	};
};

fn readsize(rd: *bufio::memstream, ln: size) (size | invalid) = {
	const string = readstr(rd, ln);
	match (strconv::stozb(string, strconv::base::OCT)) {
	case let z: size =>
		return z;
	case =>
		return invalid;
	};
};

fn zeroed(buf: []u8) bool = {
	for (let i = 0z; i < len(buf); i += 1) {
		if (buf[i] != 0) {
			return false;
		};
	};
	return true;
};

A  => types.ha +65 -0
@@ 1,65 @@
// License: MPL-2.0
// (c) 2022 Drew DeVault <sir@cmpwn.com>
use io;

// The size of each block in a tar file.
export def BLOCKSIZE: size = 512;

// A file or directory in a tar file.
export type entry = struct {
	ent_reader,
	name: str,
	mode: uint,
	uid: uint,
	gid: uint,
	fsize: size,
	mtime: uint,
	checksum: uint,
	etype: entry_type,
	link: str,
	uname: str,
	gname: str,
	devmajor: u64,
	devminor: u64,
};

export type ent_reader = struct {
	vtable: io::stream,
	src: io::handle,
	orig: size,
	remain: size,
};

// A tar file entry. Note that some systems create tarballs with additional
// vendor-specific values for the entry type, so a default case is recommended
// when switching against this.
export type entry_type = enum u8 {
	FILE,
	HARDLINK,
	SYMLINK,
	CHARDEV,
	BLOCKDEV,
	DIRECTORY,
	FIFO,
};

// Returned if the source file size is not aligned on [[BLOCKSIZE]].
export type truncated = !void;

// Returned if the source file does not contain a valid ustar archive.
export type invalid = !void;

// Tagged union of all possible error types.
export type error = !(truncated | invalid | io::error);

// Converts an [[error]] to a human-friendly representation.
export fn strerror(err: error) const str = {
	match (err) {
	case truncated =>
		return "Tar file is truncated";
	case invalid =>
		return "Tar file is invalid";
	case let err: io::error =>
		return io::strerror(err);
	};
};