~ft/pdffs

b574ea6ce2f6fb2aa129da26687d4affeb1faabc — Noam Preil 2 months ago 8a7f9b4
Significantly improved text output
5 files changed, 508 insertions(+), 162 deletions(-)

M main.c
M misc.c
M object.c
M op.c
M pdf.h
M main.c => main.c +19 -9
@@ 17,17 17,21 @@ usage(void)
	threadexitsall("usage");
}

static void
static int
dumppage(Object *page)
{
	int ret;
	Page p;
	pageinit(&p);
	if(pagerender(&p, page) && p.buf.sz != 0)
		write(1, p.buf.b, p.buf.sz);
	pageinit(&p, page);
	ret = pagerender(&p);
	if(ret)
		if(p.buf.sz != 0)
			fprint(1, "%s", (char*)p.buf.b);
	pagefree(&p);
	return ret;
}

static void
static int
dumppages(Object *pages)
{
	Object *page, *kids, *type;


@@ 39,13 43,19 @@ dumppages(Object *pages)
		// Must be a dict, either Page or Pages
		type = dictget(page, "Type");
		// MUST be a name.
		if(strcmp(type->name, "Pages") == 0)
			dumppages(page);
		else if(strcmp(type->name, "Page") == 0)
			dumppage(page);
		if(strcmp(type->name, "Pages") == 0){
			if(!dumppages(page))
				return 0;
		}
		else if(strcmp(type->name, "Page") == 0){
			if(!dumppage(page))
				return 0;
			print("\n");
		}
		else
			sysfatal("Unexpected page node type '%s'", type->name);
	}
	return 1;
}



M misc.c => misc.c +1 -0
@@ 50,6 50,7 @@ Ofmt(Fmt *f)
		return fmtprint(f, "%g", o->num.d);

	case Ostr:
	case Oop:
		if(isutf8(o->str, o->len))
			return fmtprint(f, "%q", o->str);
		return fmtprint(f, "<%.*H>", o->len, o->str);

M object.c => object.c +74 -40
@@ 8,21 8,90 @@ Object *pdfname(Stream *s);
Object *pdfarray(Pdf *pdf, Stream *s);
Object *pdfdict(Pdf *pdf, Stream *s);

/* returns 1 if str is at the beginning of the stream, and
	is followed either by whitespace or, if delim is 1,
	a delimiter.
	strlen(str) must be in (0, 16)
	on match, the stream seeks to right after the string.
	otherwise, the stream position is unchanged. */
static int
sismatch(Stream *s, char *str, int delim)
{
	long len = strlen(str);
	vlong off = Soffset(s);
	char b[16];
	if(len == 0 || len > 16)
		return 0;
	if(Sread(s, b, len + 1) == len + 1 && memcmp(b, str, len) == 0 && (isws(b[len]) || (delim && isdelim(b[len])))){
		Sungetc(s);
		return 1;
	}

	Sseek(s, off, 0);
	return 0;
}

char *
suntilend(Stream *s)
{
	int sz, c, full = 0;
	char buf[8];
	for(sz = 0; sz < 7;){
		c = Sgetc(s);
		if(c < 0)
			break;
		if(isws(c) || isdelim(c)){
			Sungetc(s);
			full = 1;
			break;
		}
		buf[sz] = c;
		sz += 1;
	}
	if(!full)
		return nil;
	buf[sz] = 0;
	return strdup(buf);
}

/* General function to parse an object of any type. */
Object *
pdfobj(Pdf *pdf, Stream *s)
{
	Object *o, *o2;
	vlong off;
	int c, tf;
	int c;
	Xref xref;
	char b[16];

	o = o2 = nil;
	do; while(isws(c = Sgetc(s)));
	if(c < 0)
		goto err;

	if(isascii(c) && isalpha(c)){
		Sungetc(s);
		// bool, null, or op
		if(sismatch(s, "null", 1)){
			fprint(1, "NULL\n");
			return &null;
		}
		if((o = calloc(1, sizeof(*o))) == nil)
			goto err;
		o->type = Obool;
		o->pdf = pdf;
		if(sismatch(s, "true", 1)){
			o->bool = 1;
			return o;
		}
		if(sismatch(s, "false", 1)){
			o->bool = 0;
			return o;
		}
		o->type = Oop;
		o->str = suntilend(s);
		return o;
	}

	switch(c){
	case '<': /* dictionary or a string */
		c = Sgetc(s);


@@ 33,7 102,8 @@ pdfobj(Pdf *pdf, Stream *s)
				off = Soffset(s);
				do; while(isws(Sgetc(s)));
				Sungetc(s);
				if(Sread(s, b, 7) == 7 && memcmp(b, "stream", 6) == 0 && isws(c = b[6])){
				if(sismatch(s, "stream", 0)){
					c = Sgetc(s);
					/* there IS a stream */
					if(c == '\r' && (c = Sgetc(s)) < 0)
						goto err;


@@ 71,45 141,8 @@ pdfobj(Pdf *pdf, Stream *s)
			o->pdf = pdf;
		return o;

	case 'n':
		off = Soffset(s);
		if(Sgetc(s) == 'u' && Sgetc(s) == 'l' && Sgetc(s) == 'l' && (isws(c = Sgetc(s)) || isdelim(c))){
			Sungetc(s);
			return &null;
		}
		Sseek(s, off, 0);
		c = 'n';
		goto unexpected;

	case 't':
		off = Soffset(s);
		tf = 1;
		if(Sgetc(s) == 'r' && Sgetc(s) == 'u' && Sgetc(s) == 'e' && (isws(c = Sgetc(s)) || isdelim(c)))
			goto bool;
		Sseek(s, off, 0);
		c = 't';
		goto unexpected;

	case 'f':
		off = Soffset(s);
		tf = 0;
		if(Sgetc(s) == 'a' && Sgetc(s) == 'l' && Sgetc(s) == 's' && Sgetc(s) == 'e' && (isws(c = Sgetc(s)) || isdelim(c)))
			goto bool;
		Sseek(s, off, 0);
		c = 'f';
		goto unexpected;
bool:
		Sungetc(s);
		if((o = calloc(1, sizeof(*o))) == nil)
			goto err;
		o->type = Obool;
		o->pdf = pdf;
		o->bool = tf;
		return o;

	default:
		if(!isdigit(c) && c != '-'){
unexpected:
			Sungetc(s);
			werrstr("unexpected char '%c' at %#x+%#x (%d left)", c, Sobjoffset(s), Soffset(s), Ssize(s));
			goto err;


@@ 178,6 211,7 @@ pdfobjfree(Object *o)
		return;

	case Ostr:
	case Oop:
	case Oname:
		free(o->str);
		break;

M op.c => op.c +363 -102
@@ 12,6 12,30 @@ enum {

typedef struct Op Op;

static void
matidentity(double *arr)
{
	double src[6] = {
					1, 0,
					0, 1,
					0, 0
	};
	memcpy(arr, src, sizeof(double) * 6);
}

static void
matmult(double *m1, double *m2, double *out)
{
	double result[6];
	result[0] = m1[0] * m2[0] + m1[1] * m2[2];
	result[1] = m1[0] * m2[1] + m1[1] * m2[3];
	result[2] = m1[2] * m2[0] + m1[3] * m2[2];
	result[3] = m1[2] * m2[1] + m1[3] * m2[3];
	result[4] = m1[4] * m2[0] + m1[5] * m2[2] + m2[4];
	result[5] = m1[4] * m2[1] + m1[5] * m2[3] + m2[5];
	memcpy(out, result, sizeof(double) * 6);
}

struct Op {
	char *s;
	int (*f)(Op *op, Page *p);


@@ 20,6 44,16 @@ struct Op {
};

static int
flagless(Op *op)
{
	if(op->flags != 0){
		fprint(2, "Op '%s' expected no flags\n", op->s);
		return 0;
	}
	return 1;
}

static int
cobegin(Op *op, Page *p)
{
	USED(op, p);


@@ 36,29 70,54 @@ coend(Op *op, Page *p)
static int
gspush(Op *op, Page *p)
{
	USED(op, p);
	return 0;
	USED(op);
	GS *r = realloc(p->GS, sizeof(GS) * (p->nGS + 1));
	if(r == nil)
		return 0;
	p->GS = r;
	p->nGS += 1;
	p->GSactive = &p->GS[p->nGS - 1];
	*(p->GSactive) = p->GS[p->nGS - 2];
	return 1;
}

static int
gspop(Op *op, Page *p)
{
	USED(op, p);
	return 0;
	USED(op);
	GS *r = realloc(p->GS, sizeof(GS) * (p->nGS - 1));
	if(r == nil)
		return 0;
	p->GS = r;
	p->nGS -= 1;
	p->GSactive = &p->GS[p->nGS - 1];
	return 1;
}

/* six parameters give the inputs a,b,c,d,e,f for the matrix
	[a b 0]
	[c d 0]
	[e f 1]
 That matrix should be premultiplied with the current matrix
 newCTM = input x oldCTM
 (8.3.4)
 */
static int
gsctm(Op *op, Page *p)
{
	USED(op, p);
	return 0;
	double input[6];
	int i;
	for(i = 0; i < 6; i += 1)
		input[i] = arrayget(p->stack, i)->num.d;
	matmult(input, p->GSactive->CTM, p->GSactive->CTM);
	return flagless(op);
}

static int
gswidth(Op *op, Page *p)
{
	USED(op, p);
	return 0;
	p->GSactive->LW = arrayget(p->stack, 0)->num.i;
	return flagless(op);
}

static int


@@ 99,8 158,8 @@ gsintent(Op *op, Page *p)
static int
gsflatness(Op *op, Page *p)
{
	USED(op, p);
	return 0;
	p->GSactive->FL = arrayget(p->stack, 0)->num.d;
	return flagless(op);
}

static int


@@ 114,21 173,21 @@ static int
pcmove(Op *op, Page *p)
{
	USED(op, p);
	return 0;
	return 1;
}

static int
pcline(Op *op, Page *p)
{
	USED(op, p);
	return 0;
	return 1;
}

static int
pccurve(Op *op, Page *p)
{
	USED(op, p);
	return 0;
	return 1;
}

static int


@@ 149,7 208,7 @@ static int
ppstroke(Op *op, Page *p)
{
	USED(op, p);
	return 0;
	return 1;
}

static int


@@ 218,8 277,21 @@ ccolour2(Op *op, Page *p)
static int
cgray(Op *op, Page *p)
{
	USED(op, p);
	return 0;
	int value = 255 * arrayget(p->stack, 0)->num.d;
	int i;
	u32int *color;
	if(op->flags & Nonstroking){
		color = &p->GSactive->NSC;
		p->GSactive->NSCS = DeviceGray;
	} else{
		color = &p->GSactive->SC;
		p->GSactive->SCS = DeviceGray;
	}
	*color = 0;
	for(i = 0; i < 3; i += 1)
		*color = (*color | value) << 8;
	*color |= 255;
	return 1;
}

static int


@@ 295,21 367,51 @@ tshscale(Op *op, Page *p)
static int
tslead(Op *op, Page *p)
{
	int d = arrayget(p->stack, 0)->num.d / 20;
	while(d > 0){
		d -= 1;
		if(bufput(&p->buf, (uchar*)"\n", 1) == -1)
			sysfatal("OOM");
	p->TS.TL = arrayget(p->stack, 0)->num.d;
	return flagless(op);
}

static int
fontwidths(Page *p)
{
	Object *o;
	int i;
	if(p->GSactive->Font.widths != nil)
		free(p->GSactive->Font.widths);
	o = dictget(p->GSactive->Font.font, "FirstChar");
	if(o == nil)
		return 1;
	p->GSactive->Font.first = o->num.i;
	p->GSactive->Font.last = dictget(p->GSactive->Font.font, "LastChar")->num.i;
	p->GSactive->Font.widths = malloc(sizeof(int) * (p->GSactive->Font.last - p->GSactive->Font.first + 1));
	if(p->GSactive->Font.widths == nil){
		print("Failed to allocate for (%d, %d): %d\n", p->GSactive->Font.first, p->GSactive->Font.last, p->GSactive->Font.last - p->GSactive->Font.first + 1);
		return 1;
	}
	USED(op, p);
	return 0;
	o = dictget(p->GSactive->Font.font, "Widths");
	if(o == nil)
		return 0;
	for(i = 0; i < arraylen(o); i += 1)
		p->GSactive->Font.widths[i] = arrayget(o, i)->num.i;
	o = dictget(p->GSactive->Font.font, "FontDescriptor");
	p->GSactive->Font.defwidth = dictget(o, "MissingWidth")->num.i;
	return 1;
}

static int
tsfontsz(Op *op, Page *p)
{
	USED(op, p);
	return 0;
	char *name = arrayget(p->stack, 0)->name;
	p->GSactive->Font.font = dictget(dictget(dictget(p->obj, "Resources"), "Font"), name);
	if(p->GSactive->Font.font == nil){
		werrstr("Font not found: '%s'", name);
		return 0;
	}
	p->GSactive->Font.enc = dictget(p->GSactive->Font.font, "Encoding");
	if(p->GSactive->Font.enc)
		p->GSactive->Font.enc = dictget(p->GSactive->Font.enc, "Differences");
	p->GSactive->Font.size = arrayget(p->stack, 1)->num.d;
	return fontwidths(p) && flagless(op);
}

static int


@@ 329,15 431,39 @@ tsrise(Op *op, Page *p)
static int
tobegin(Op *op, Page *p)
{
	USED(op, p);
	return 0;
	if(p->TS.inobj){
		werrstr("Text objects must not be nested");
		return 0;
	}
	matidentity(p->TS.Tm);
	matidentity(p->TS.Tlm);
	p->TS.inobj = 1;
	p->GSactive->Font.font = nil;
	return flagless(op);
}

static int
toend(Op *op, Page *p)
{
	USED(op, p);
	return 0;
	if(!p->TS.inobj){
		werrstr("ET found without BT");
		return 0;
	}
	p->TS.inobj = 0;
	return flagless(op);
}

static int
tmove(Page *p, double x, double y, int tlm)
{
	double shift[6] = {1, 0, 0, 1, x, y};
	if(tlm){
		matmult(shift, p->TS.Tlm, p->TS.Tlm);
		memcpy(p->TS.Tm, p->TS.Tlm, sizeof(double) * 6);
	} else{
		matmult(shift, p->TS.Tm, p->TS.Tm);
	}
	return 1;
}

static int


@@ 346,62 472,141 @@ tpmove(Op *op, Page *p)
	Object *x, *y;
	x = arrayget(p->stack, 0);
	y = arrayget(p->stack, 1);
	if(y->num.d != 0){
		if(bufput(&p->buf, (uchar*)"\n", 1) == -1)
			sysfatal("OOM");
	}
	else if(x->num.d < 50)
		if(bufput(&p->buf, (uchar*)" ", 1) == -1)
			sysfatal("OOM");
	USED(op, p);
	return 0;
	if(op->flags & Leading)
		p->TS.TL = -y->num.d;
	return tmove(p, x->num.d, y->num.d, 1);
}

static int
tpmatrix(Op *op, Page *p)
{
	USED(op, p);
	return 0;
	int i;
	for(i = 0; i < 6; i += 1){
		p->TS.Tm[i] = arrayget(p->stack, i)->num.d;
		p->TS.Tlm[i] = p->TS.Tm[i];
	}
	return flagless(op);
}

static int
tpmove0(Op *op, Page *p)
{
	USED(op, p);
	if(bufput(&p->buf, (uchar*)"\n", 1) == -1)
		sysfatal("OOM");
	return 0;
	return tmove(p, 0, 0 - p->TS.TL, 1) && flagless(op);
}

static int
writepatched(Page *p, uchar c)
{
	int i, len, d = 0;
	Object *o;
	if(p->GSactive->Font.enc != nil){
		len = arraylen(p->GSactive->Font.enc);
		for(i = 0; i < len; i += 1){
			o = arrayget(p->GSactive->Font.enc, i);
			if(o->type == Onum)
				d = o->num.i;
			else if(d == c){
				if(strcmp(o->name, "endash") == 0)
					return bufput(&p->buf, (uchar*)"-", 1) == 1;
				if(strcmp(o->name, "fi") == 0)
					return bufput(&p->buf, (uchar*)"fi", 2) == 2;
				if(strcmp(o->name, "ff") == 0)
					return bufput(&p->buf, (uchar*)"ff", 2) == 2;
				if(strcmp(o->name, "ffi") == 0)
					return bufput(&p->buf, (uchar*)"ffi", 3) == 3;
				if(strcmp(o->name, "bullet") == 0)
					return bufput(&p->buf, (uchar*)"•", strlen("•")) == 3;
				if(strcmp(o->name, "quotedblleft") == 0)
					return bufput(&p->buf, (uchar*)"\"", 1) == 1;
				if(strcmp(o->name, "quotedblright") == 0)
					return bufput(&p->buf, (uchar*)"\"", 1) == 1;
				if(strcmp(o->name, "quoteleft") == 0)
					return bufput(&p->buf, (uchar*)"'", 1) == 1;
				if(strcmp(o->name, "quoteright") == 0)
					return bufput(&p->buf, (uchar*)"'", 1) == 1;
				fprint(2, "TODO: recognize glyph name '%s'\n", o->name);
				return 1;
			} else
				d += 1;
		}
	}
	return bufput(&p->buf, (uchar*)&c, 1) == 1;
}

/* Renders one character / glyph and updates the text state */
static int
tchar(Page *p, ulong c)
{
	double Trm[6] = {p->GSactive->Font.size, 0, 0, p->GSactive->Font.size, 0, 0};
	double tx;
	int i;
	matmult(Trm, p->TS.Tm, Trm);
	matmult(Trm, p->GSactive->CTM, Trm);
	tx = p->GSactive->Font.size / 1000;
	if(c >= p->GSactive->Font.first && c <= p->GSactive->Font.last)
		tx = tx * (double)p->GSactive->Font.widths[c - p->GSactive->Font.first];
	else
		tx = tx * (double)p->GSactive->Font.defwidth;
	// Check if whitespace is needed
	if(p->buf.sz > 1){
		if(p->TS.y != Trm[5]){
			for(i = 0; i < (int)((p->TS.y - Trm[5]) / p->GSactive->Font.size); i += 1)
				if(bufput(&p->buf, (uchar*)"\n", 1) != 1)
					return 0;
		}
		if(Trm[4] - p->TS.x > 2.5){
			if(bufput(&p->buf, (uchar*)" ", 1) != 1)
				return 0;
		}
	}
	if(!writepatched(p, c) || !tmove(p, tx, 0, 0))
		return 0;
	p->TS.x = Trm[4] + tx;
	p->TS.y = Trm[5];
	return 1;
}

static int
tstr(Page *p, char *str, ulong len)
{
	ulong i;
	for(i = 0; i < len; i += 1)
		if(!tchar(p, str[i]))
			return 0;
	return 1;
}

static int
thshow(Op *op, Page *p)
{
	if(op->flags != 0){
		fprint(2, "TODO: thshow != Tj\n");
		return 0;
	}
	Object *o = arrayget(p->stack, 0);
	if(bufput(&p->buf, (uchar*)o->str, o->len) == -1)
		sysfatal("OOM");
	USED(op);
	return 0;
	if(!tstr(p, o->str, o->len))
		return 0;
	return 1;
}

static int
thshowarr(Op *op, Page *p)
{
	Object *arr = arrayget(p->stack, 0);
	Object *o;
	Object *o, *arr = arrayget(p->stack, 0);
	int i;
	for(i = 0; i < arraylen(arr); i += 1){
		o = arrayget(arr, i);
		if(o->type == Ostr){
			if(bufput(&p->buf, (uchar*)o->str, o->len) == -1)
				sysfatal("OOM");
			if(!tstr(p, o->str, o->len))
				return 0;
		}
		else if(o->num.d < -150){
			if(bufput(&p->buf, (uchar*)" ", 1) == -1)
				sysfatal("OOM");
		else{
			double shift = 0 - (p->GSactive->Font.size * o->num.d / 1000);
			if(!tmove(p, shift, 0, 0))
				return 0;
		}
	}
	USED(op);
	return 0;
	return flagless(op);
}

static int


@@ 746,7 951,7 @@ static int
opignore(Op *op, Page *p)
{
	USED(op, p);
	return 1;
	return 0;
}

static Op ops[] = {


@@ 833,8 1038,8 @@ static Op ops[] = {
	/* 9.4.2 Text position operators */
	{"Td", tpmove, 2,},           /* move, next line */
	{"TD", tpmove, 2, Leading,},  /* move, next line, leading */
	{"Tm", tpmatrix, 6,},         /* (line) matrix */
	{"T*", tpmove0, 0, Leading,}, /* move, next line, leading */
	{"Tm", tpmatrix, 6,},         /* set Tm and Tlm */
	{"T*", tpmove0, 0,}, /* move, next line, leading */

	/* 9.4.3 Text showing operators */
	{"Tj", thshow, 1,},                /* show string */


@@ 902,56 1107,92 @@ static Op ops[] = {
	{nil, nil, 0,},
};

// If an op is found at the current position in the stream, the associated Op is
// returned and the stream is advanced. Otherwise, nil is returned and the stream
// is left unchanged.
Op *
opfind(Stream *s)
opfind(char *name)
{
	int i;
	uint len;
	int i = 0;
	Op *op;
	char *b = (char*)s->buf.b + s->buf.off;
	i = 0;
	while(ops[i].s != nil){
		op = &ops[i];
		len = strlen(op->s);
		if(strncmp(op->s, b, len) == 0 && (isws(b[len]) || isdelim(b[len]))){
			s->buf.off += len;
		if(strcmp(op->s, name) == 0)
			return op;
		}
		i += 1;
	}
	return nil;
}

void
pageinit(Page *page)
pageinit(Page *page, Object *o)
{
	bufinit(&page->buf, 0, 0);
	// Stack is per-content-stream, so we don't create it here
	page->stack = nil;
	page->obj = o;
	page->TS.inobj = 0;
	page->TS.x = 0;
	page->TS.y = 0;
}

void
pagefree(Page *p)
gsinit(Page *p, GS *gs)
{
	USED(p);
	/* todo: actually initialize the full state */
	/* CTM maps user coords to device coords. 
	TODO: use mediabox and screen info to init CTM
	*/
	matidentity(gs->CTM);
	gs->LW = 1;
	gs->LC = 0;
	gs->LJ = 0;
	gs->ML = 10;
	gs->SCS = gs->NSCS = DeviceGray;
	// Alpha is lowest byte; this is (0, 0, 0, 255) == black
	gs->SC = gs->NSC = 255;
	gs->Font.font = nil;
	gs->Font.enc = nil;
	gs->Font.widths = nil;
}

void
gsfree(GS gs)
{
	buffree(&p->buf);
	pdfobjfree(p->stack);
	free(gs.Font.widths);
	pdfobjfree(gs.Font.font);
	gs.Font.font = nil;
	gs.Font.enc = nil;
	gs.Font.widths = nil;
}

static void
stackreset(Object *stack)
void
pagegsclean(Page *p)
{
	int i;
	for(i = 0; i < stack->array.ne; i += 1)
		pdfobjfree(stack->array.e[i]);
	stack->array.ne = 0;
	free(stack->array.e);
	stack->array.e = nil;
	p->GSactive = nil;
	for(i = 0; i < p->nGS; i += 1)
		gsfree(p->GS[i]);
	free(p->GS);
	p->GS = nil;
	p->nGS = 0;
}

static void
static int
stackreset(Page *p)
{
	pdfobjfree(p->stack);
	p->stack = arraynew(p->obj->pdf);
	return p->stack != nil;
}

void
pagefree(Page *p)
{
	buffree(&p->buf);
	pdfobjfree(p->stack);
	pagegsclean(p);
}

static int
pagerendercontent(Page *p, Object *content)
{
	Stream *s;


@@ 964,43 1205,63 @@ pagerendercontent(Page *p, Object *content)
	}
	p->stack = arraynew(content->pdf);
	if(p->stack == nil)
		return;
		return 0;
	while(s->buf.off != s->buf.sz){
		while(isws(s->buf.b[s->buf.off]) && s->buf.off != s->buf.sz)
			s->buf.off += 1;
		if(s->buf.off == s->buf.sz)
			break;
		op = opfind(s);
		if(op != nil){
			op->f(op, p);
			stackreset(p->stack);
		} else{
			o = pdfobj(content->pdf, s);
			if(o == nil){
				fprint(2, "failed to read operand: %r\n");
				break;
		o = pdfobj(content->pdf, s);
		if(o == nil)
			return 0;
		if(o->type == Oop){
			op = opfind(o->str);
			if(op == nil){
				fprint(2, "Unknown op: %s\n", o->str);
				pdfobjfree(o);
				return 0;
			}
			pdfobjfree(o);
			if(!op->f(op, p)){
				fprint(2, "'%s' failed!\n", op->s);
				return 0;
			}
			if(!stackreset(p))
				return 0;
		} else{
			if(!arrayadd(p->stack, o)){
				fprint(2, "Failed to push operand to stack: %r\n");
				break;
				return 0;
			}
		}
	}
	if(bufput(&p->buf, (uchar*)"\n", 1) == -1)
		sysfatal("OOM");
	if(bufput(&p->buf, (uchar*)"\n\0", 2) != 2)
		return 0;
	Sclose(s);
	return 1;
}

int
pagerender(Page *p, Object *o)
pagerender(Page *p)
{
	Object *content;
	int i;
	content = dictget(o, "Contents");
	if(content->type == Oarray)
	p->nGS = 1;
	p->GS = malloc(sizeof(GS));
	if(p->GS == nil){
		werrstr("Out of memory");
		return 0;
	}
	gsinit(p, p->GS);
	content = dictget(p->obj, "Contents");
	if(content->type == Oarray){
		for(i = 0; i < arraylen(content); i += 1)
			pagerendercontent(p, arrayget(content, i));
			if(!pagerendercontent(p, arrayget(content, i)))
				return 0;
	}
	else if(content->type != Onull)
		pagerendercontent(p, content);
		if(!pagerendercontent(p, content))
			return 0;
	pagegsclean(p);
	return 1;
}

M pdf.h => pdf.h +51 -11
@@ 8,10 8,12 @@ enum {
	Ostream, /* 7.3.8 */
	Onull,   /* 7.3.9 */
	Oindir,  /* 7.3.10 */
	Oop,     /* 7.8.2 */
};

typedef struct Buffer Buffer;
typedef struct Filter Filter;
typedef struct TS TS;
typedef struct GS GS;
typedef struct GSD GSD;
typedef struct GSFont GSFont;


@@ 35,11 37,6 @@ struct Buffer {
	void *(*memimage)(Buffer *b);
};

struct Page {
	Object *stack;
	Buffer buf;
};

struct Filter {
	char *name;
	int (*readall)(void *aux, Buffer *bi, Buffer *bo);


@@ 104,22 101,65 @@ struct GSD {
struct GSFont {
	Object *font;
	double size;
	Object *enc; /* TODO: drop enc, use the encoding table */
	struct{
		// If a character c is in [first, last], replace it with values[c], which may be multibyte.
		int first, last;
		char **values;
	} encoding;
	struct{
		int first, last;
		int *widths;
		int defwidth;
	};
};

/* Color spaces; 8.6.3 / table 61 */
typedef enum ColorSpace {
	DeviceGray, DeviceRGB, DeviceCMYK, /* Device family */
	CalGray, CalRGB, Lab, ICCBased, /* CIE-based family */
	Pattern, Indexed, Separation, DeviceN, /* Special family */
} ColorSpace;

struct GS {
	double CTM[6]; /* current transformation matrix ; 8.3 */
	Object *BG, *UCR, *UCR2, *TR, *TR2, *HT, *BM, *SMask, *UseBlackPTComp, *HTO;
	int LW, LC, LJ, ML, RI, OP, op, OPM, SA, AIS, TK;
	double SM, CA, ca;
	struct {
		GSFont *Font;
		int nFont;
	double SM, CA, ca, FL;
	struct{ /* coloring info */
		ColorSpace SCS, NSCS; /* stroking color space and nonstroking color space */
		u32int SC, NSC;
	};
	GSFont Font;
	struct {
		GSD *d;
		int nd;
	};
};

struct TS {
	double Tm[6]; /* text matrix */
	double Tlm[6]; /* text line matrix */
	/* Tracks if we're in a text object; nesting is verboten */
	int inobj;
	double TL;
	/* Temporary, for pdf2txt functionality: tracks the last character's position so we know whether whitespace is needed */
	double x, y;
};

struct Page {
	Object *obj;
	Object *stack;
	Buffer buf;
	/* The graphical state stack. GSactive is always a shortcut for the top of the stack, GS[nGS - 1] */
	struct{
		GS *GS;
		GS *GSactive;
		int nGS;
	};
	TS TS;
};

struct Pdf {
	Stream *s;
	Xref *xref;


@@ 246,8 286,8 @@ int bufput(Buffer *b, uchar *d, int sz);
int bufget(Buffer *b, uchar *d, int sz);
void bufdump(Buffer *b);

void pageinit(Page *p);
int pagerender(Page *p, Object *o);
void pageinit(Page *p, Object *o);
int pagerender(Page *p);
void pagefree(Page *p);

#pragma varargck type "O" Object*