~ft/libtags

c074cc2a8b2d8e2721457f3c8723298dfd1fd212 — Sigrid Solveig Haflínudóttir 2 months ago 1a9591b
redo parts of the API to fit more tag-kind-specific info - attached image type
9 files changed, 218 insertions(+), 83 deletions(-)

M examples/readtags.c
M flac.c
M harness.c
M id3v2.c
M m4a.c
M tags.c
M tags.h
M tagspriv.h
M vorbis.c
M examples/readtags.c => examples/readtags.c +37 -11
@@ 33,19 33,48 @@ static const char *t2s[] = {

static bool image;

static const char *
imagetype(int type)
{
	static const char *types[] = {
		[ITother] = "Other",
		[IT32x32_file_icon] = "32x32 pixels 'file icon' (PNG only)",
		[ITother_file_icon] = "Other file icon",
		[ITcover_front] = "Cover (front)",
		[ITcover_back] = "Cover (back)",
		[ITleaflet] = "Leaflet page",
		[ITmedia] = "Media (e.g. label side of CD)",
		[ITlead] = "Lead artist/lead performer/soloist",
		[ITartist] = "Artist/performer",
		[ITconductor] = "Conductor",
		[ITband] = "Band/orchestra",
		[ITcomposer] = "Composer",
		[ITlyricist] = "Lyricist/text writer",
		[ITlocation] = "Recording location",
		[ITrecording] = "During recording",
		[ITperformance] = "During performance",
		[ITmovie_capture] = "Movie/video screen capture",
		[ITfish] = "A bright coloured fish",
		[ITillustration] = "Illustration",
		[ITlogo_band] = "Band/artist logotype",
		[ITlogo_publisher] = "Publisher/studio logotype",
	};
	return type >= 0 && type < ITnum ? types[type] : "???";
}

static void
tag(Tagctx *ctx, int t, const char *k, const char *v, int offset, int size, Tagread f)
tag(Tagctx *ctx, int type, Tag *tag)
{
	USED(k); USED(f);
	if(image){
		if(t != Timage)
		if(type != Timage)
			return;
		int size = tag->image.size;
		char *raw = malloc(size);
		Aux *aux = ctx->aux;
		int prevoffset = lseek(aux->fd, 0, 1);
		if(lseek(aux->fd, offset, 0) != offset ||
		if(lseek(aux->fd, tag->image.offset, 0) != tag->image.offset ||
		   read(aux->fd, raw, size) != size ||
		   (f != NULL && f(raw, &size) != 0)){
		   (tag->image.decode != NULL && tag->image.decode(raw, &size) != 0)){
			fprintf(stderr, "failed to read the image\n");
			exit(1);
		}


@@ 54,12 83,10 @@ tag(Tagctx *ctx, int t, const char *k, const char *v, int offset, int size, Tagr
		exit(0);
		return;
	}
	if(t == Timage)
		printf("%-12s %s %d %d\n", t2s[t], v, offset, size);
	else if(t == Tunknown)
		printf("%-12s %s\n", k, v);
	if(type == Timage)
		printf("%-12s %s %d %d (%s)\n", t2s[type], tag->image.mime, tag->image.offset, tag->image.size, imagetype(tag->image.type));
	else
		printf("%-12s %s\n", t2s[t], v);
		printf("%-12s %s\n", type == Tunknown ? tag->text.k : t2s[type], tag->text.v);
}

static void


@@ 118,7 145,6 @@ main(int argc, char **argv)
		else{
			if(tagsget(&ctx) != 0){
				fprintf(stderr, "no tags or failed to read tags\n");
				return 1;
			}else if(image){
				fprintf(stderr, "no images found\n");
				return 1;

M flac.c => flac.c +11 -4
@@ 7,7 7,7 @@ int
tagflac(Tagctx *ctx)
{
	uint8_t *d;
	int sz, last;
	int sz, last, type;
	uint64_t g;

	d = (uint8_t*)ctx->buf;


@@ 44,7 44,8 @@ tagflac(Tagctx *ctx)
			if(sz < 8+4+20 || ctx->read(ctx, d, 8) != 8) /* type, mime length */
				return -1;
			sz -= 8;
			n = beuint(&d[4]);
			type = beuint(d); /* type */
			n = beuint(&d[4]); /* mime length */
			mime = ctx->buf+20;
			if(n < 0 || n >= sz-4-20 || n >= ctx->bufsz-20 || ctx->read(ctx, mime, n) != n)
				return -1;


@@ 57,8 58,14 @@ tagflac(Tagctx *ctx)
			sz -= 20;
			if((n = beuint(&d[16])) < 0)
				return -1;
			if(n > 0)
				tagscallcb(ctx, Timage, "", mime, offset, n, nil);
			if(n > 0){
				tagscallcb(ctx, Timage, &(Tag){.image = {
					.mime = mime,
					.offset = offset,
					.size = n,
					.type = type,
				}});
			}
			if(ctx->seek(ctx, sz, 1) <= 0)
				return -1;
		}else if((d[0] & 0x7f) == 4){ /* 4 = vorbis comment */

M harness.c => harness.c +16 -2
@@ 1,6 1,7 @@
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "tags.h"


@@ 28,9 29,22 @@ struct Aux {
__AFL_FUZZ_INIT()

static void
tag(Tagctx *ctx, int t, const char *k, const char *v, int offset, int size, Tagread f)
tag(Tagctx *ctx, int t, Tag *tag)
{
	USED(ctx); USED(t);  USED(k); USED(v); USED(offset); USED(size); USED(f);
	if(t == Timage){
		static uint8_t *buf;
		static int bufsz;
		int size = tag->image.size;
		if(bufsz < size){
			buf = realloc(buf, size);
			bufsz = size;
		}
		Aux *aux = ctx->aux;
		memcpy(buf, aux->in+tag->image.offset, size);
		if(tag->image.decode != NULL)
			tag->image.decode(buf, &size);
		assert(size >= 0 && size <= bufsz);
	}
}

static void

M id3v2.c => id3v2.c +23 -8
@@ 159,11 159,11 @@ nontext(Tagctx *ctx, uint8_t *d, int tsz, int unsync)
{
	int n, offset;
	char *b, *tag;
	Tagread f;
	Tagdecode decf;

	tag = ctx->buf;
	n = 0;
	f = unsync ? unsyncread : nil;
	decf = unsync ? unsyncread : nil;
	if(strcmp((char*)d, "APIC") == 0){
		offset = ctx->seek(ctx, 0, 1);
		if((n = ctx->read(ctx, tag, 255)) == 255){ /* APIC mime and description should fit */


@@ 180,12 180,20 @@ nontext(Tagctx *ctx, uint8_t *d, int tsz, int unsync)
					break;
				}
			}
			tagscallcb(ctx, Timage, "APIC", b, offset+n, tsz-n, f);
			n = 256;
			if(tsz > n){
				tagscallcb(ctx, Timage, &(Tag){.image = {
					.type = b[strlen(b)+1],
					.mime = b,
					.offset = offset+n,
					.size = tsz-n,
					.decode = decf,
				}});
			}
			n = 255;
		}
	}else if(strcmp((char*)d, "PIC") == 0){
		offset = ctx->seek(ctx, 0, 1);
		if((n = ctx->read(ctx, tag, 256)) == 256){ /* PIC description should fit */
		if((n = ctx->read(ctx, tag, 255)) == 255){ /* PIC description should fit */
			b = tag + 1; /* mime type */
			for(n = 5; n < 253; n++){
				if(tag[0] == 0 || tag[0] == 3){ /* one zero byte */


@@ 198,9 206,16 @@ nontext(Tagctx *ctx, uint8_t *d, int tsz, int unsync)
					break;
				}
			}
			if(tsz > n)
				tagscallcb(ctx, Timage, "PIC", strcmp(b, "JPG") == 0 ? "image/jpeg" : "image/png", offset+n, tsz-n, f);
			n = 256;
			if(tsz > n){
				tagscallcb(ctx, Timage, &(Tag){.image = {
					.type = b[strlen(b)+1],
					.mime = strcmp(b, "JPG") == 0 ? "image/jpeg" : "image/png",
					.offset = offset+n,
					.size = tsz-n,
					.decode = decf,
				}});
			}
			n = 255;
		}
	}else if(strcmp((char*)d, "RVA2") == 0 && tsz >= 6+5){
		/* replay gain. 6 = "track\0", 5 = other */

M m4a.c => m4a.c +10 -4
@@ 186,10 186,16 @@ tagm4a(Tagctx *ctx)
			d[sz] = 0;
			txtcb(ctx, type, "", d);
			sz = 0;
		}else if(type == Timage && dtype == 13) /* jpeg cover image */
			tagscallcb(ctx, Timage, "", "image/jpeg", ctx->seek(ctx, 0, 1), sz, nil);
		else if(type == Timage && dtype == 14) /* png cover image */
			tagscallcb(ctx, Timage, "", "image/png", ctx->seek(ctx, 0, 1), sz, nil);
		}else if(type == Timage){
			tagscallcb(ctx, Timage, &(Tag){.image = {
				.mime = dtype == 13 ?
					"image/jpeg" :
					(dtype == 14 ? "image/png" : ""),
				.type = ITcover_front,
				.offset = ctx->seek(ctx, 0, 1),
				.size = sz,
			}});
		}
	}

	return 0;

M tags.c => tags.c +11 -12
@@ 24,25 24,24 @@ static const Getter g[] =
};

void
tagscallcb(Tagctx *ctx, int type, const char *k, char *s, int offset, int size, Tagread f)
tagscallcb(Tagctx *ctx, int type, Tag *tag)
{
	char *e;
	char *s, *e;

	if(f == nil && size == 0){
		while((uint8_t)*s <= ' ' && *s)
			s++;
	if(type != Timage){
		for(s = tag->text.v; (uint8_t)*s <= ' ' && *s; s++);
		e = s + strlen(s);
		while(e != s && (uint8_t)e[-1] <= ' ')
			e--;
		if(*e != 0)
		  *e = 0;
	}
	if(*s){
		ctx->tag(ctx, type, k, s, offset, size, f);
		if(type != Tunknown){
			ctx->found |= 1<<type;
		}
			*e = 0;
		if(*s == 0)
			return;
		tag->text.v = s;
	}
	ctx->tag(ctx, type, tag);
	if(type != Tunknown)
		ctx->found |= 1<<type;
}

int

M tags.h => tags.h +58 -9
@@ 4,8 4,9 @@
extern "C" {
#endif

typedef union Tag Tag;
typedef struct Tagctx Tagctx;
typedef int (*Tagread)(void *buf, int *cnt);
typedef int (*Tagdecode)(void *buf, int *cnt);

/* Tag type. */
enum {


@@ 54,6 55,57 @@ enum {
	Fogg __attribute__((deprecated("use Fvorbis instead"))) = Fvorbis,
};

/* Image types, according to id3v2. */
enum {
	ITother,
	IT32x32_file_icon,
	ITother_file_icon,
	ITcover_front,
	ITcover_back,
	ITleaflet,
	ITmedia,
	ITlead,
	ITartist,
	ITconductor,
	ITband,
	ITcomposer,
	ITlyricist,
	ITlocation,
	ITrecording,
	ITperformance,
	ITmovie_capture,
	ITfish,
	ITillustration,
	ITlogo_band,
	ITlogo_publisher,

	ITnum,
};

/* Tag itself, either text or image, depending on the "type" passed to the tag callback. */
union Tag {
	/* Any tag type except Timage */
	struct {
		char *k; /* "TPE1", "replaygain_album_peak" etc */
		char *v; /* value */
	}text;

	/* Tag type Timage */
	struct {
		/* If not NULL, reading the image cover requires additional decoding of the data.
		 * In that case you will need to read the raw data (specified by "offset" and "size")
		 * and call this function on the buffer (tag->decode(offset, &size)).
		 * "Size" will be updated with the actual image size after decoding if the function
		 * returned 0, else there was an error.
		 */
		Tagdecode decode;
		const char *mime; /* "image/png", "image/jpeg" etc */
		int offset; /* offset from the beginning of the file */
		int size; /* number of bytes occupied by the raw image data (not decoded) */
		int type; /* type of the image (ITcover_front, ...) */
	}image;
};

/* Tag parser context. You need to set it properly before parsing an audio file using libtags. */
struct Tagctx {
	/* Read function. This is what libtags uses to read the file. */


@@ 63,19 115,16 @@ struct Tagctx {
	int (*seek)(Tagctx *ctx, int offset, int whence);

	/* Callback that is used by libtags to inform about the tags of a file.
	 * "type" is the tag's type (Tartist, ...) or Tunknown if libtags doesn't know how to map a tag kind to
	 * any of these. "k" is the raw key like "TPE1", "TPE2", etc. "s" is the null-terminated string unless "type" is
	 * Timage. "offset" and "size" define the placement and size of the image cover ("type" = Timage)
	 * inside the file, and "f" is not NULL in case reading the image cover requires additional
	 * operations on the data, in which case you need to read the image cover as a stream and call this
	 * function to apply these operations on the contents read.
	 * "type" is the tag's type (Tartist, ...) or Tunknown if libtags doesn't
	 * know how to map a tag kind to any of these.
	 * ANY tag type that isn't Timage is considered a text tag.
	 */
	void (*tag)(Tagctx *ctx, int type, const char *k, const char *s, int offset, int size, Tagread f);
	void (*tag)(Tagctx *ctx, int type, Tag *tag);

	/* Approximate millisecond-to-byte offsets within the file, if available. This callback is optional. */
	void (*toc)(Tagctx *ctx, int ms, int offset);

	/* Auxiliary data. Not used by libtags. */
	/* Auxiliary data. Not used by libtags, left for the library user. */
	void *aux;

	/* Memory buffer to work in. */

M tagspriv.h => tagspriv.h +4 -3
@@ 65,11 65,12 @@ int debase64(uint8_t *in, int insz, uint8_t *out, int outsz);
/*
 * METADATA_BLOCK_PICTURE reader function.
 */
int mbpdec(void *buf, int *cnt);
int cbmbp(Tagctx *ctx, char *v, int ssz, int off, int picsz);

void tagscallcb(Tagctx *ctx, int type, const char *k, char *s, int offset, int size, Tagread f);
void tagscallcb(Tagctx *ctx, int type, Tag *tag);

#define txtcb(ctx, type, k, s) tagscallcb(ctx, type, k, (char*)s, 0, 0, nil)
#define txtcb(ctx, type, k_, v_) \
	tagscallcb(ctx, type, &(Tag){.text = {.k = (k_), .v = (char*)(v_)}})

int tagflac(Tagctx *ctx);
int tagid3v1(Tagctx *ctx);

M vorbis.c => vorbis.c +48 -30
@@ 43,7 43,7 @@ cbvorbiscomment(Tagctx *ctx, char *k, char *v)
		txtcb(ctx, Tunknown, k, v);
}

int
static int
mbpdec(void *buf, int *cnt)
{
	int sz, n;


@@ 53,7 53,7 @@ mbpdec(void *buf, int *cnt)
	if((n = debase64(v, *cnt, v, *cnt)) <= 0)
		return -1;

	beuint(v); /* id3v2 APIC type */
	/* skip id3v2 APIC type */
	v += 4; n -= 4;
	sz = beuint(v); /* mime size */
	v += 4; n -= 4;


@@ 77,11 77,48 @@ mbpdec(void *buf, int *cnt)
}

int
cbmbp(Tagctx *ctx, char *v, int ssz, int off, int picsz)
{
	char *mime;
	int type, n, sz;

	n = ssz; /* at most this amount is available */
	n &= ~3; /* modulo 4 sextets, so debase64 gets complete bytes */
	n = debase64((uint8_t*)v, n, (uint8_t*)ctx->buf, ctx->bufsz);
	/* https://xiph.org/flac/format.html#metadata_block_picture */
	if(n <= 4+4+0+4+0+4+4+4+4+4+0)
		return 0;
	v = ctx->buf;
	type = beuint(v); /* id3v2 APIC type */
	v += 4; n -= 4;
	sz = beuint(v); /* mime size */
	v += 4; n -= 4;
	if(sz < 0 || sz >= n-4-4-4-4-4-4)
		return -1;
	mime = v;
	v += sz; n -= sz; /* skip MIME */
	sz = beuint(v); /* description size */
	v += 4; n -= 4;
	if(sz < 0 || sz >= n-4-4-4-4-4)
		return -1;
	*v = 0; /* null-terminate MIME */
	tagscallcb(ctx, Timage, &(Tag){.image = {
		.mime = mime,
		.offset = off,
		.size = picsz,
		.type = type,
		.decode = mbpdec,
	}});

	return 0;
}

int
tagvorbis(Tagctx *ctx)
{
	char *v, *mime;
	char *v;
	uint8_t *d, h[4];
	int sz, picsz, numtags, i, npages, pgend, skip, off, n;
	int sz, numtags, i, npages, pgend, skip;

	d = (uint8_t*)ctx->buf;
	/* need to find vorbis frame with type=3 */


@@ 148,32 185,13 @@ tagvorbis(Tagctx *ctx)
			*v++ = 0;
			if(strcasecmp(ctx->buf, "metadata_block_picture") != 0)
				cbvorbiscomment(ctx, ctx->buf, v);
			else{
				/* off and picsz will point at the base64-encoded picture block */
				off = ctx->seek(ctx, 0, 1) - sz + (v - ctx->buf);
				picsz = sz + skip - (v - ctx->buf);
				n = sz - (v - ctx->buf); /* at most this amount is available */
				n &= ~3; /* modulo 4 sextets, so debase64 gets complete bytes */
				n = debase64((uint8_t*)v, n, (uint8_t*)ctx->buf, ctx->bufsz);
				/* https://xiph.org/flac/format.html#metadata_block_picture */
				if(n > 4+4+0+4+0+4+4+4+4+4+0){
					v = ctx->buf;
					beuint(v); /* id3v2 APIC type */
					v += 4; n -= 4;
					sz = beuint(v); /* mime size */
					v += 4; n -= 4;
					if(sz < 0 || sz >= n-4-4-4-4-4-4)
						return -1;
					mime = v;
					v += sz; n -= sz; /* skip MIME */
					sz = beuint(v); /* description size */
					v += 4; n -= 4;
					if(sz < 0 || sz >= n-4-4-4-4-4)
						return -1;
					*v = 0; /* null-terminate MIME */
					tagscallcb(ctx, Timage, "", mime, off, picsz, mbpdec);
				}
			}
			else if(cbmbp(ctx, v,
					sz - (v - ctx->buf), /* at most this amount is available */
					ctx->seek(ctx, 0, 1) - sz + (v - ctx->buf), /* offset */
					sz + skip - (v - ctx->buf) /* total pic size (still encoded) */
					) != 0)
				return -1;

			if(ctx->seek(ctx, skip, 1) < 0)
				return -1;
		}