~boringcactus/crowbar-qbe

5107b84f1ef995f8fa388c2268674f0a48084f69 — Melody Horn 3 years ago 17b693c
get closer to implementing win64 fastcall
4 files changed, 32 insertions(+), 25 deletions(-)

M amd64/all.h
M amd64/targ.c
M amd64/win.c
M qbe.vcxproj
M amd64/all.h => amd64/all.h +8 -1
@@ 2,7 2,7 @@

typedef struct Amd64Op Amd64Op;

enum Amd64Reg {
enum Amd64Reg_SysV {
	RAX = RXX+1, /* caller-save */
	RCX,
	RDX,


@@ 45,6 45,13 @@ enum Amd64Reg {
	NFPS = NFPR,
	NCLR = R15 - RBX + 1,
};

enum Amd64Reg_Win {
	// https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention?view=vs-2019
	NGPS_Win = 7, // RAX, RCX, RDX, R8, R9, R10, R11
	NCLR_Win = 7, // RBX, RDI, RSI, R12, R13, R14, R15
};

MAKESURE(reg_not_tmp, XMM15 < (int)Tmp0);

struct Amd64Op {

M amd64/targ.c => amd64/targ.c +1 -1
@@ 37,7 37,7 @@ Target T_amd64_win = {
	.rglob = BIT(RBP) | BIT(RSP),
	.nrglob = 2,
	.rsave = amd64_win_rsave,
	.nrsave = {NGPS, NFPS},
	.nrsave = {NGPS_Win, NFPS},
	.retregs = amd64_win_retregs,
	.argregs = amd64_win_argregs,
	.memargs = amd64_memargs,

M amd64/win.c => amd64/win.c +21 -22
@@ 5,6 5,7 @@ typedef struct RAlloc RAlloc;

struct AClass {
	int inmem;
	int asPointer;
	int align;
	uint size;
	int cls[2];


@@ 75,17 76,20 @@ typclass(AClass *a, Typ *t)
	a->size = sz;
	a->align = t->align;

	if (t->dark || sz > 16 || sz == 0) {
	if (t->dark || sz > 8 || sz == 0) {
		/* large or unaligned structures are
		 * required to be passed in memory
		 * required to be passed as pointers
		 */
		a->inmem = 1;
		a->inmem = 0;
		a->asPointer = 1;
		a->size = 8;
		return;
	}

	a->cls[0] = Kx;
	a->cls[1] = Kx;
	a->inmem = 0;
	a->asPointer = 0;
	classify(a, t, 0);
}



@@ 158,9 162,9 @@ argsclass(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret, Ref *env)
	Ins *i;

	if (aret && aret->inmem)
		nint = 5; /* hidden argument */
		nint = 3; /* hidden argument */
	else
		nint = 6;
		nint = 4;
	nsse = 8;
	for (i=i0, a=ac; i<i1; i++, a++)
		switch (i->op - op + Oarg) {


@@ 203,19 207,19 @@ argsclass(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret, Ref *env)
			break;
		}

	return ((6-nint) << 4) | ((8-nsse) << 8);
	return ((4-nint) << 4) | ((8-nsse) << 8);
}

int amd64_win_rsave[] = {
	RCX, RDX, R8, R9, RDI, RSI, R10, R11, RAX,
	RCX, RDX, R8, R9, R10, R11, RAX,
	XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
	XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1
};
int amd64_win_rclob[] = {RBX, R12, R13, R14, R15, -1};
int amd64_win_rclob[] = {RBX, RDI, RSI, R12, R13, R14, R15, -1};

MAKESURE(win_arrays_ok,
	sizeof amd64_win_rsave == (NGPS+NFPS+1) * sizeof(int) &&
	sizeof amd64_win_rclob == (NCLR+1) * sizeof(int)
	sizeof amd64_win_rsave == (NGPS_Win+NFPS+1) * sizeof(int) &&
	sizeof amd64_win_rclob == (NCLR_Win+1) * sizeof(int)
);

/* layout of call's second argument (RCall)


@@ 224,7 228,7 @@ MAKESURE(win_arrays_ok,
 *  |0...00|x|xxxx|xxxx|xx|xx|                  range
 *          |    |    |  |  ` gp regs returned (0..2)
 *          |    |    |  ` sse regs returned   (0..2)
 *          |    |    ` gp regs passed         (0..6)
 *          |    |    ` gp regs passed         (0..4)
 *          |    ` sse regs passed             (0..8)
 *          ` 1 if rax is used to pass data    (0..1)
 */


@@ 305,8 309,8 @@ selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
	} else
		ca = argsclass(i0, i1, ac, Oarg, 0, &env);

	for (stk=0, a=&ac[i1-i0]; a>ac;)
		if ((--a)->inmem) {
	for (stk=0, a=&ac[i1-i0-1]; a>ac; a--)
		if (a->inmem || a->asPointer) {
			if (a->align > 4)
				err("win abi requires alignments of 16 or less");
			stk += a->size;


@@ 315,6 319,7 @@ selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
		}
	stk += stk & 15;
	if (stk) {
		stk += 32;
		r = getcon(-(int64_t)stk, fn);
		emit(Osalloc, Kl, R, r, R);
	}


@@ 376,15 381,9 @@ selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
		if (a->inmem)
			continue;
		r1 = rarg(a->cls[0], &ni, &ns);
		if (i->op == Oargc) {
			if (a->size > 8) {
				r2 = rarg(a->cls[1], &ni, &ns);
				r = newtmp("abi", Kl, fn);
				emit(Oload, a->cls[1], r2, r, R);
				emit(Oadd, Kl, r, i->arg[1], getcon(8, fn));
			}
		if (i->op == Oargc)
			emit(Oload, a->cls[0], r1, i->arg[1], R);
		} else
		else
			emit(Ocopy, i->cls, r1, i->arg[0], R);
	}



@@ 392,7 391,7 @@ selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
		return;

	r = newtmp("abi", Kl, fn);
	for (i=i0, a=ac, off=0; i<i1; i++, a++) {
	for (i=i0, a=ac, off=32; i<i1; i++, a++) {
		if (!a->inmem)
			continue;
		if (i->op == Oargc) {

M qbe.vcxproj => qbe.vcxproj +2 -1
@@ 12,13 12,14 @@
        <Platform>x64</Platform>
        <PreferredToolArchitecture>x64</PreferredToolArchitecture>
        <Configuration>Release</Configuration>
        <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
    </PropertyGroup>
    <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
    <Target Name="CreateConfig" BeforeTargets="ClCompile">
        <PropertyGroup>
            <Config>
#define Defasm Gaself
#define Deftgt T_amd64_sysv
#define Deftgt T_amd64_win
            </Config>
        </PropertyGroup>