From 93e3aa70c9f0651b9e179e8a01ea79e86dabc43b Mon Sep 17 00:00:00 2001 From: Drew DeVault Date: Wed, 29 Jun 2022 19:34:03 +0200 Subject: [PATCH] format::tar: new module Just contains a reader for now. --- README | 8 +++ reader.ha | 195 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ types.ha | 65 ++++++++++++++++++ 3 files changed, 268 insertions(+) create mode 100644 README create mode 100644 reader.ha create mode 100644 types.ha diff --git a/README b/README new file mode 100644 index 0000000..ca09d8a --- /dev/null +++ b/README @@ -0,0 +1,8 @@ +This module provides an implementation of the tar archive format for Unix. The +specific format implemented is USTAR, however, it is capable of reading most tar +variants which are backwards-compatible with the original format (e.g. GNU tar). + +To read an archive, use [[read]] to create a reader, and [[next]] to enumerate +its entries. The return value from [[next]] contains the file metadata and is an +[[io::stream]] that you may read the file contents from. You may call [[skip]] +to skip an archive entry without reading it. diff --git a/reader.ha b/reader.ha new file mode 100644 index 0000000..62397f5 --- /dev/null +++ b/reader.ha @@ -0,0 +1,195 @@ +// License: MPL-2.0 +// (c) 2022 Drew DeVault +use bufio; +use bytes; +use endian; +use errors; +use io; +use strconv; +use strings; +use strio; + +export type reader = struct { + src: io::handle, + name: [255]u8, +}; + +// Creates a new reader for a tar file. Use [[next]] to iterate through entries +// present in the tar file. +export fn read(src: io::handle) reader = { + return reader { + src = src, + ... + }; +}; + +// Returns the next entry from a tar [[reader]]. Parts of this structure +// (specifically the file name) are borrowed from the reader itself and will not +// be valid after subsequent calls. +// +// If the return value is a file (i.e. entry.etype == entry_type::FILE), the +// caller must either call [[io::read]] using the return value until it returns +// [[io::EOF]], or call [[skip]] to seek to the next entry in the archive. +// +// Note that reading from the header will modify the file size. +export fn next(rd: *reader) (entry | error | io::EOF) = { + static let buf: [BLOCKSIZE]u8 = [0...]; + match (io::read(rd.src, buf)?) { + case let z: size => + if (z != len(buf)) { + return truncated; + }; + case io::EOF => + return truncated; + }; + + if (zeroed(buf)) { + match (io::read(rd.src, buf)?) { + case let z: size => + if (z != len(buf)) { + return truncated; + }; + case io::EOF => + return truncated; + }; + if (!zeroed(buf)) { + return truncated; + }; + return io::EOF; + }; + + let ent = entry { ... }; + const reader = bufio::fixed(buf, io::mode::READ); + const name = readstr(&reader, 100); + ent.mode = readoct(&reader, 8)?; + ent.uid = readoct(&reader, 8)?; + ent.gid = readoct(&reader, 8)?; + ent.fsize = readsize(&reader, 12)?; + ent.mtime = readoct(&reader, 12)?; + ent.checksum = readoct(&reader, 8)?; + ent.etype = readoct(&reader, 1)?: entry_type; + ent.link = readstr(&reader, 100); + + if (ent.etype == entry_type::FILE) { + ent.vtable = &file_vtable; + ent.src = rd.src; + ent.orig = ent.fsize; + ent.remain = ent.orig; + }; + + const ustar = readstr(&reader, 6); + if (ustar != "ustar") { + ent.name = name; + return ent; + }; + + const version = readstr(&reader, 2); + // XXX: We could check the version here + ent.uname = readstr(&reader, 32); + ent.gname = readstr(&reader, 32); + ent.devmajor = readoct(&reader, 8)?; + ent.devminor = readoct(&reader, 8)?; + const prefix = readstr(&reader, 155); + let writer = strio::fixed(rd.name); + strio::join(&writer, prefix, name)!; + ent.name = strio::string(&writer); + return ent; +}; + +// Seeks the underlying tar file to the entry following this one. +export fn skip(ent: *entry) (void | io::error) = { + let amt = ent.remain; + if (amt % BLOCKSIZE != 0) { + amt += BLOCKSIZE - (amt % BLOCKSIZE); + }; + match (io::seek(ent.src, amt: io::off, io::whence::CUR)) { + case io::off => + return; + case io::error => + yield; + }; + io::copy(io::empty, ent)?; +}; + +const file_vtable: io::vtable = io::vtable { + reader = &file_read, + ... +}; + +fn file_read(s: *io::stream, buf: []u8) (size | io::EOF | io::error) = { + let ent = s: *ent_reader; + assert(ent.vtable == &file_vtable); + if (ent.remain == 0) { + return io::EOF; + }; + + let z = len(buf); + if (z > ent.remain) { + z = ent.remain; + }; + z = match (io::read(ent.src, buf[..z])?) { + case let z: size => + yield z; + case io::EOF => + // TODO: Truncated flag + return io::EOF; + }; + ent.remain -= z; + + // Read until we reach the block size + static let buf: [BLOCKSIZE]u8 = [0...]; + if (ent.remain == 0 && ent.orig % BLOCKSIZE != 0) { + let remain = BLOCKSIZE - (ent.orig % BLOCKSIZE); + for (remain > 0) { + match (io::read(ent.src, buf[..remain])?) { + case let z: size => + remain -= z; + case io::EOF => + // TODO: Set a truncated flag or something + break; + }; + }; + }; + + return z; +}; + +fn readstr(rd: *bufio::memstream, ln: size) str = { + const buf = match (bufio::borrowedread(rd, ln)) { + case let buf: []u8 => + assert(len(buf) == ln); + yield buf; + case io::EOF => + abort(); + }; + return strings::fromc(buf: *[*]u8: *const char); +}; + +fn readoct(rd: *bufio::memstream, ln: size) (uint | invalid) = { + const string = readstr(rd, ln); + match (strconv::stoub(string, strconv::base::OCT)) { + case let u: uint => + return u; + case => + return invalid; + }; +}; + +fn readsize(rd: *bufio::memstream, ln: size) (size | invalid) = { + const string = readstr(rd, ln); + match (strconv::stozb(string, strconv::base::OCT)) { + case let z: size => + return z; + case => + return invalid; + }; +}; + +fn zeroed(buf: []u8) bool = { + for (let i = 0z; i < len(buf); i += 1) { + if (buf[i] != 0) { + return false; + }; + }; + return true; +}; diff --git a/types.ha b/types.ha new file mode 100644 index 0000000..c844f0d --- /dev/null +++ b/types.ha @@ -0,0 +1,65 @@ +// License: MPL-2.0 +// (c) 2022 Drew DeVault +use io; + +// The size of each block in a tar file. +export def BLOCKSIZE: size = 512; + +// A file or directory in a tar file. +export type entry = struct { + ent_reader, + name: str, + mode: uint, + uid: uint, + gid: uint, + fsize: size, + mtime: uint, + checksum: uint, + etype: entry_type, + link: str, + uname: str, + gname: str, + devmajor: u64, + devminor: u64, +}; + +export type ent_reader = struct { + vtable: io::stream, + src: io::handle, + orig: size, + remain: size, +}; + +// A tar file entry. Note that some systems create tarballs with additional +// vendor-specific values for the entry type, so a default case is recommended +// when switching against this. +export type entry_type = enum u8 { + FILE, + HARDLINK, + SYMLINK, + CHARDEV, + BLOCKDEV, + DIRECTORY, + FIFO, +}; + +// Returned if the source file size is not aligned on [[BLOCKSIZE]]. +export type truncated = !void; + +// Returned if the source file does not contain a valid ustar archive. +export type invalid = !void; + +// Tagged union of all possible error types. +export type error = !(truncated | invalid | io::error); + +// Converts an [[error]] to a human-friendly representation. +export fn strerror(err: error) const str = { + match (err) { + case truncated => + return "Tar file is truncated"; + case invalid => + return "Tar file is invalid"; + case let err: io::error => + return io::strerror(err); + }; +}; -- 2.45.2