From f317250d92232987162cc16c437c9ab4597e84ac Mon Sep 17 00:00:00 2001 From: glenda Date: Wed, 14 Sep 2022 14:19:39 +0000 Subject: [PATCH] Parser mostly working, able to build a table of instructions in memory correctly --- asm.c | 170 +++++++++++++++++++++++++++++++++++++++++++------ tests/test.asm | 12 ++-- 2 files changed, 158 insertions(+), 24 deletions(-) diff --git a/asm.c b/asm.c index d3636d0..c7b814d 100644 --- a/asm.c +++ b/asm.c @@ -14,6 +14,7 @@ struct Inst { u16int argc; u16int mode; char* arg[3]; + u16int p_arg[3]; // parsed into numeric values }; typedef struct Symbol Symbol; @@ -61,6 +62,7 @@ syntaxerr(char* msg) lineno, msg ); + free(msg); exits("Syntax error"); } @@ -73,7 +75,7 @@ Inst for(int i = 0; i<3; i++) mem[memc]->arg[i] = 0; mem[memc]->mode = 0; - return mem[memc]; + return mem[memc++]; } int @@ -105,12 +107,7 @@ findreg(char* name) return -1; } -void -newnop(void) -{ - newinst(OP_BR); - mem[memc]->argc+=2; // already initialized to 0; -} + int matchopcode(char* t) @@ -135,7 +132,123 @@ parseargs(char* t, Inst* in) in->arg[in->argc] = emalloc(strlen(buf) + 1); strcpy(in->arg[in->argc], buf); in->argc++;} - // FIXME should probably find some way to indicate arity errors here? + if(in->argc > 3) syntaxerr("Arity over 3!"); + // FIXME should probably find some way to indicate larger arity errors here? +} + +u16int +hexnparse(int b, char *s) +{ + u16int n; + long m; + if(s[0] != 'x') + syntaxerr(smprint("Not a hex value, must indicate base with a leading `x`: %s", s)); + s++; + m = strtol(s, 0, 16); + n = (u16int) m; + if(n != m || n >= (1 << b)) + syntaxerr(smprint("Hex value %s is too big to be represented in %d bits", s, b)); + return n; +} + +void +validate(Inst *in) +{ + int arity; + int p; + // Validate arity + switch(in->op){ + case OP_ADD: + case OP_AND: + case OP_LDR: + case OP_STR: + arity = 3; + break; + case OP_LD: + case OP_LDI: + case OP_LEA: + case OP_NOT: + case OP_ST: + case OP_STI: + arity = 2; + break; + case OP_BR: + case OP_JMP: + case OP_JSR: + case OP_TRAP: + arity = 1; + break; + case OP_RTI: + arity = 0; + break; + } + if(arity != in->argc) + syntaxerr(smprint("Wrong number of arguments for %s: need %d, got %d", opcode[in->op], arity, in->argc)); + + // All the special cases are on final args, so we'll work backward from the highest. + if(arity == 3){ + if(in->op == OP_ADD || in->op == OP_AND){ + p = findreg(in->arg[2]); + if(p == -1){ + in->p_arg[2] = hexnparse(5, in->arg[2]); + in->mode++; + } else { + in->p_arg[2] = (u16int) p; + } + } else { // STR and LDR which have the same signature + in->p_arg[2] = hexnparse(6, in->arg[2]); + } + } + + if(arity >= 2){ + switch(in->op){ + case OP_ADD: + case OP_AND: + case OP_LDR: + case OP_NOT: + case OP_STR: + p = findreg(in->arg[1]); + if(p == -1){ + syntaxerr(smprint("Argument %d to %s must be a register (got `%s`)", 2, opcode[in->op], in->arg[1])); + } + in->p_arg[1] = p; + break; + case OP_LD: + case OP_LDI: + case OP_LEA: + case OP_ST: + case OP_STI: + in->p_arg[1] = hexnparse(9, in->arg[1]); + break; + } + } + + if(arity >= 1){ + switch(in->op){ + case OP_BR: + in->p_arg[0] = hexnparse(9, in->arg[0]); + break; + case OP_JSR: + // FIXME Make sure mode is set by the parser! + if(in->mode) + in->p_arg[0] = hexnparse(11, in->arg[0]); + else + goto Reg; + break; + case OP_TRAP: + in->p_arg[0] = hexnparse(8, in->arg[0]); + break; + default: + Reg: + // They're all registers + p = findreg(in->arg[0]); + if(p == -1){ + syntaxerr(smprint("Argument %d to %s must be a register (got `%s`)", 1, opcode[in->op], in->arg[0])); + } + in->p_arg[0] = p; + break; + } + } } int @@ -147,15 +260,17 @@ parseline(char *l) int op = -1; Inst *inst; while(l[0] && pos < MAXARGS){ - if(l[0] == ';' || l[0] == '\n') return 0; - if(l[0] == ' ' || l[0] == '\t') { + if(l[0] == ';' || l[0] == '\n') return 0; // a comment + if(l[0] == ' ' || l[0] == '\t') { // blast through whitespace l++; - if(pos == 0) pos++; + if(pos == 0) pos++; // If it's leading whitespace we need to increment pos so that instructions will be parsed } else if(pos == 0 && l[0] != '\t'){ // It's a label symtab[symcount].addr = pc; + // FIXME can probably drop a strlen here by using returned value l += gettoken(&t, l); - symtab[symcount].label = t; + symtab[symcount].label = emalloc(strlen(t) + 1); + strcpy(symtab[symcount].label, t); symcount++; pos++; continue; @@ -165,11 +280,13 @@ parseline(char *l) if(c == 0) return 0; l += c; if(strcmp(t,".END") == 0) { - return -1; + return -1; // We're done parsing! } else if(strcmp(t,".ORIG") == 0){ //FIXME: only supporting hex for now + //FIXME: validate that this is the first instruction l+=2; //skipping hex number indicator to raw number gettoken(&t, l); + // FIXME: replace with hexenparse // re-use c c = (int) strtol(t, 0, 16); pc = origin = c; @@ -181,24 +298,33 @@ parseline(char *l) print("updated origin to %x\n", origin); return 0; } else if(strcmp(t,"NOP") == 0) { - newnop(); - pc++; - pos++; + op = OP_BR; + inst = newinst(op); + inst->arg[0] = "x0"; + inst->argc++; + goto validate; return 1; } else if(strcmp(t,"RET") == 0) { op = OP_JMP; - //FIXME: Add args + inst = newinst(op); + inst->arg[0] = "R7"; + inst->argc++; + goto validate; } else { op = matchopcode(t); pos++; if(op == -1) syntaxerr(smprint("Invalid instruction `%s`", t)); l++; + inst = newinst(op); } - inst = newinst(op); } else if(pos==2){ gettoken(&t, l); - parseargs(t, inst); + if(inst->argc == 0) parseargs(t, inst); + + validate: + validate(inst); + pc++; if(debug){ print("%d:OPCODE:%s", lineno, opcode[op]); for(int i = 0; iargc; i++) @@ -248,6 +374,12 @@ main(int argc, char* argv[]) symtab[i].addr ); } + for(int i = 0; i < memc; i++){ + print("%04x: %x %d", orig + i, mem[i]->op, mem[i]->mode); + for(int j = 0; jargc; j++) + print(" %x", mem[i]->p_arg[j]); + print("\n"); + } } exits(nil); } diff --git a/tests/test.asm b/tests/test.asm index e09d195..511e1a2 100644 --- a/tests/test.asm +++ b/tests/test.asm @@ -4,9 +4,11 @@ MAIN NOP NOP ;whatever NOP JUNK NOP - ADD r0,r0,x1 - BR 0x0000 - RET - JMP MAIN + ADD R0,R0,x1f + ADD R0,R0,R0 + AND R0,R0,R0 + BR x0000 +GARB RET .END - ADD r0,r0.x1 + ADD R0,R0.x111111 + RTI stuff ;syntax error -- 2.45.2