@@ 14,6 14,7 @@ struct Inst {
u16int argc;
u16int mode;
char* arg[3];
+ u16int p_arg[3]; // parsed into numeric values
};
typedef struct Symbol Symbol;
@@ 61,6 62,7 @@ syntaxerr(char* msg)
lineno,
msg
);
+ free(msg);
exits("Syntax error");
}
@@ 73,7 75,7 @@ Inst
for(int i = 0; i<3; i++)
mem[memc]->arg[i] = 0;
mem[memc]->mode = 0;
- return mem[memc];
+ return mem[memc++];
}
int
@@ 105,12 107,7 @@ findreg(char* name)
return -1;
}
-void
-newnop(void)
-{
- newinst(OP_BR);
- mem[memc]->argc+=2; // already initialized to 0;
-}
+
int
matchopcode(char* t)
@@ 135,7 132,123 @@ parseargs(char* t, Inst* in)
in->arg[in->argc] = emalloc(strlen(buf) + 1);
strcpy(in->arg[in->argc], buf);
in->argc++;}
- // FIXME should probably find some way to indicate arity errors here?
+ if(in->argc > 3) syntaxerr("Arity over 3!");
+ // FIXME should probably find some way to indicate larger arity errors here?
+}
+
+u16int
+hexnparse(int b, char *s)
+{
+ u16int n;
+ long m;
+ if(s[0] != 'x')
+ syntaxerr(smprint("Not a hex value, must indicate base with a leading `x`: %s", s));
+ s++;
+ m = strtol(s, 0, 16);
+ n = (u16int) m;
+ if(n != m || n >= (1 << b))
+ syntaxerr(smprint("Hex value %s is too big to be represented in %d bits", s, b));
+ return n;
+}
+
+void
+validate(Inst *in)
+{
+ int arity;
+ int p;
+ // Validate arity
+ switch(in->op){
+ case OP_ADD:
+ case OP_AND:
+ case OP_LDR:
+ case OP_STR:
+ arity = 3;
+ break;
+ case OP_LD:
+ case OP_LDI:
+ case OP_LEA:
+ case OP_NOT:
+ case OP_ST:
+ case OP_STI:
+ arity = 2;
+ break;
+ case OP_BR:
+ case OP_JMP:
+ case OP_JSR:
+ case OP_TRAP:
+ arity = 1;
+ break;
+ case OP_RTI:
+ arity = 0;
+ break;
+ }
+ if(arity != in->argc)
+ syntaxerr(smprint("Wrong number of arguments for %s: need %d, got %d", opcode[in->op], arity, in->argc));
+
+ // All the special cases are on final args, so we'll work backward from the highest.
+ if(arity == 3){
+ if(in->op == OP_ADD || in->op == OP_AND){
+ p = findreg(in->arg[2]);
+ if(p == -1){
+ in->p_arg[2] = hexnparse(5, in->arg[2]);
+ in->mode++;
+ } else {
+ in->p_arg[2] = (u16int) p;
+ }
+ } else { // STR and LDR which have the same signature
+ in->p_arg[2] = hexnparse(6, in->arg[2]);
+ }
+ }
+
+ if(arity >= 2){
+ switch(in->op){
+ case OP_ADD:
+ case OP_AND:
+ case OP_LDR:
+ case OP_NOT:
+ case OP_STR:
+ p = findreg(in->arg[1]);
+ if(p == -1){
+ syntaxerr(smprint("Argument %d to %s must be a register (got `%s`)", 2, opcode[in->op], in->arg[1]));
+ }
+ in->p_arg[1] = p;
+ break;
+ case OP_LD:
+ case OP_LDI:
+ case OP_LEA:
+ case OP_ST:
+ case OP_STI:
+ in->p_arg[1] = hexnparse(9, in->arg[1]);
+ break;
+ }
+ }
+
+ if(arity >= 1){
+ switch(in->op){
+ case OP_BR:
+ in->p_arg[0] = hexnparse(9, in->arg[0]);
+ break;
+ case OP_JSR:
+ // FIXME Make sure mode is set by the parser!
+ if(in->mode)
+ in->p_arg[0] = hexnparse(11, in->arg[0]);
+ else
+ goto Reg;
+ break;
+ case OP_TRAP:
+ in->p_arg[0] = hexnparse(8, in->arg[0]);
+ break;
+ default:
+ Reg:
+ // They're all registers
+ p = findreg(in->arg[0]);
+ if(p == -1){
+ syntaxerr(smprint("Argument %d to %s must be a register (got `%s`)", 1, opcode[in->op], in->arg[0]));
+ }
+ in->p_arg[0] = p;
+ break;
+ }
+ }
}
int
@@ 147,15 260,17 @@ parseline(char *l)
int op = -1;
Inst *inst;
while(l[0] && pos < MAXARGS){
- if(l[0] == ';' || l[0] == '\n') return 0;
- if(l[0] == ' ' || l[0] == '\t') {
+ if(l[0] == ';' || l[0] == '\n') return 0; // a comment
+ if(l[0] == ' ' || l[0] == '\t') { // blast through whitespace
l++;
- if(pos == 0) pos++;
+ if(pos == 0) pos++; // If it's leading whitespace we need to increment pos so that instructions will be parsed
} else if(pos == 0 && l[0] != '\t'){
// It's a label
symtab[symcount].addr = pc;
+ // FIXME can probably drop a strlen here by using returned value
l += gettoken(&t, l);
- symtab[symcount].label = t;
+ symtab[symcount].label = emalloc(strlen(t) + 1);
+ strcpy(symtab[symcount].label, t);
symcount++;
pos++;
continue;
@@ 165,11 280,13 @@ parseline(char *l)
if(c == 0) return 0;
l += c;
if(strcmp(t,".END") == 0) {
- return -1;
+ return -1; // We're done parsing!
} else if(strcmp(t,".ORIG") == 0){
//FIXME: only supporting hex for now
+ //FIXME: validate that this is the first instruction
l+=2; //skipping hex number indicator to raw number
gettoken(&t, l);
+ // FIXME: replace with hexenparse
// re-use c
c = (int) strtol(t, 0, 16);
pc = origin = c;
@@ 181,24 298,33 @@ parseline(char *l)
print("updated origin to %x\n", origin);
return 0;
} else if(strcmp(t,"NOP") == 0) {
- newnop();
- pc++;
- pos++;
+ op = OP_BR;
+ inst = newinst(op);
+ inst->arg[0] = "x0";
+ inst->argc++;
+ goto validate;
return 1;
} else if(strcmp(t,"RET") == 0) {
op = OP_JMP;
- //FIXME: Add args
+ inst = newinst(op);
+ inst->arg[0] = "R7";
+ inst->argc++;
+ goto validate;
} else {
op = matchopcode(t);
pos++;
if(op == -1)
syntaxerr(smprint("Invalid instruction `%s`", t));
l++;
+ inst = newinst(op);
}
- inst = newinst(op);
} else if(pos==2){
gettoken(&t, l);
- parseargs(t, inst);
+ if(inst->argc == 0) parseargs(t, inst);
+
+ validate:
+ validate(inst);
+ pc++;
if(debug){
print("%d:OPCODE:%s", lineno, opcode[op]);
for(int i = 0; i<inst->argc; i++)
@@ 248,6 374,12 @@ main(int argc, char* argv[])
symtab[i].addr
);
}
+ for(int i = 0; i < memc; i++){
+ print("%04x: %x %d", orig + i, mem[i]->op, mem[i]->mode);
+ for(int j = 0; j<mem[i]->argc; j++)
+ print(" %x", mem[i]->p_arg[j]);
+ print("\n");
+ }
}
exits(nil);
}
@@ 4,9 4,11 @@ MAIN NOP
NOP ;whatever
NOP
JUNK NOP
- ADD r0,r0,x1
- BR 0x0000
- RET
- JMP MAIN
+ ADD R0,R0,x1f
+ ADD R0,R0,R0
+ AND R0,R0,R0
+ BR x0000
+GARB RET
.END
- ADD r0,r0.x1
+ ADD R0,R0.x111111
+ RTI stuff ;syntax error