~levithan/rek

346e197ab0038a54c90c75e858784a28b50d5c45 — levithan 2 months ago master
Add files
10 files changed, 871 insertions(+), 0 deletions(-)

A .gitignore
A Makefile
A README
A src/arek.c
A src/arek.h
A src/librek.c
A src/librek.h
A src/utils.c
A src/utils.h
A test/simple.rek
A  => .gitignore +1 -0
@@ 1,1 @@
build/*

A  => Makefile +22 -0
@@ 1,22 @@
SRCDIR = src/
BUILDDIR = build/

CC := gcc
CFLAGS += -Wall -Wpedantic -O3 -std=c99
#CFLAGS += -DDEBUG

OBJS =  librek.o arek.o utils.o

.PHONY: build clean
all: build

build: $(OBJS) arek

$(OBJS):
	$(CC) $(CFLAGS) -fPIC -c $(SRCDIR)$(patsubst %.o,%,$(@)).c -o $(BUILDDIR)$@
arek:
	$(CC) $(CFLAGS) $(foreach O, $(OBJS), $(BUILDDIR)$(O)) -o $(BUILDDIR)$@

clean:
	rm -vf $(BUILDDIR)*


A  => README +6 -0
@@ 1,6 @@
rek
===

Work in progress.

Assembly language with a simple and minimalistic instruction set.

A  => src/arek.c +379 -0
@@ 1,379 @@
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>

#include "arek.h"

static program rek_out;
static labels_list rek_labels;
static FILE *ifd, *ofd;

void
init(program *p)
{
  pheader *h  = &p->header;
  ptail *t    = &p->tail;

  h->psmag  = magic_start;
  h->pentry = sizeof(h->psmag) + sizeof(h->pentry); /* default it to data start */

  /* clean prog data*/
  for (uint16_t i = 0; i < TOTAL_PROG_SIZE; p->data[i++] = '\0') ;
  
  t->pemag  = magic_end;
  rek_labels.labels = (label **) malloc(sizeof(label));
}

void *
add_label(labels_list *l, uint16_t addr, const char *name)
{
  label local, *new;
  
  uint8_t i = 0;
  for (; name[i] != '\0'; i++) {
    if (is_char_name(name[i]))
      local.name[i] = name[i];
    else {
      bail();
      fatal("characters 0-9a-zA-Z_ are only allowed for labels");
    }
  }
  local.name[i++] = '\0';
  local.addr = addr;

  l->amount++;
  l->labels = (label **) realloc(l->labels, sizeof(label) * l->amount);

  new = (label *)(l->labels + sizeof(label) * l->amount);
  memcpy(new, (void *)&local, sizeof(label));

#ifdef DEBUG
  debug("\tLabel: %s Addr: 0x%04x\n", new->name, new->addr);
#endif
  return NULL;
}

label *
get_label(labels_list *l, const char *name)
{
  label *ret;
  uint16_t i = 1;
  
  while (i <= l->amount) {
    ret = (label *)(l->labels + sizeof(label) * i);
    if (strcmp(ret->name, name) == 0) {
      return ret;
    }
    i++;
  }

  return NULL;
}

static void
bail()
{
  /* free any defined label */
  free(rek_labels.labels);

  fclose(ofd);
  fclose(ifd);
}

/* is_valid_line does most of the heavy work on checking the syntax of the line
 * being ingested and assembled. */
int8_t
is_valid_line (const char *s, const size_t l)
{
  instruction tmpins = {-1, '\0', -1};
  size_t pos = 0;

  /* validate token */
  switch(*s) {
    case(TOKEN_MOV):
    case(TOKEN_ADD):
    case(TOKEN_SUB):
    case(TOKEN_CMP):
    case(TOKEN_JMP):
      /* TODO:
       * check that the address is a valid under the TOTAL_PAGE_SIZE spectrum
       */
    case(TOKEN_JE):
    case(TOKEN_JNE):
      /* up until this one are the instructions that take any kind of argument */
      tmpins = get_instruction(*s);
      break;
    case(TOKEN_ORIGIN):
    case(TOKEN_LABEL):
      break;
    case(TOKEN_COMMENT):
      /* comments should be able to contain any kind of char */
      return 1;
    default:
      error ("invalid token '%c' (line:%d)\n", *s, l);
      return 0;
  }

  s++;pos++;
  /* check parameters */
  if (tmpins.code != 0) {
    if (strlen(s) == 0) {
      error ("instruction '%s' requires arguments (line:%d)\n", tmpins.name, l);
      return 0;
    }

    /* check the amount of parameters
     * rek accepts up to 2 arguments on MOV, ADD, SUB and CMP instructions and 
     * that isn't going to change so far. */
    if (tmpins.nargs == 2 && param_check(s) == NULL) {
      error ("instruction '%c' requires 2 arguments (line:%d)\n",
          tmpins.name, l);
      return 0;
    }
  }

  /* search for invalid characters */
  while (*s != '\n'&& *s != '\0' /*EOF*/) {
    if (!is_char_name(*s) && !is_char_special(*s)) { 
      error ("invalid character (dec: %d, line:%d, pos:%d)\n", *s, l, pos);
      return 0;
    }
    s++;pos++;
  }

  return 1;
}

instruction
get_instruction (const int8_t token)
{
  uint8_t i = 0;
  instruction ret;

  while (1) {
    ret = set[i++];
    if (ret.code == -1)
      break; /* token not found? */
    if (token != ret.name)
      continue; /* next! */
    break;
  }
  return ret;
}

int
main(int argc, char **argv)
{
  char *ifile, *ofile, *iline = NULL;
  uint16_t addrpos;
  size_t  iline_s, iline_l;
  ssize_t line_ss;

  if (argc <= 1)
    fatal("need arguments");

  argc--;
  (void)*argv++;

  /* initialize file desc to be used */
  ifile = argv[0];
  argc--;
  if (argc)
    ofile = argv[1];
  else 
    ofile = DEFAULT_OUT;

  if ((ifd = fopen(ifile, "r")) == NULL)
    fatal(NULL);
  if ((ofd = fopen(ofile, "w")) == NULL)
    fatal(NULL);

  /* initialize structs */
  init(&rek_out);

  /* read code lines */
  addrpos = iline_l = 0;
  iline = (char *)malloc(MAX_ILINE);
  while ((line_ss = getline(&iline, &iline_s, ifd)) != -1) {
    char *pline;
    uint8_t op;
    iline_l++;

    pline = skip_blank(iline);
    line_ss -= (pline - iline); /* ignore blank chars */
#ifdef DEBUG
    debug("Input line: %s", pline);
    /* if (iline[line_ss - 1] != '\n') putchar('\n'); */
#endif

    /* longer line than expected? */
    if (line_ss > MAX_ILINE) {
      bail();
      fatal("input line can be max %d bytes long (line: %d, length: %d)\n",
          MAX_ILINE, iline_l, line_ss);
    }
    
    /* validate line */
    if (!is_valid_line(pline, iline_l)) {
      bail();
      exit(1);
    }

    /* processing of code starts */
    op = pline[0];
    pline++;
    line_ss -= 2;
    pline[line_ss] = '\0';

    /* comments */
    if (op == TOKEN_COMMENT) {
#ifdef DEBUG
      debug("\tComment: %s\n", pline);
#endif
      goto next_line;
    }

    /* origin */
    if (op == TOKEN_ORIGIN) {
      uint16_t org;
      sscanf(pline, "%hx", &org);

      while ( addrpos < org ) {
        rek_out.tail.data_size++;
        rek_out.data[addrpos++] = ORIGIN_FILL;
      }
#ifdef DEBUG
      debug("\tOrigin at: 0x%04x\n", org);
#endif
      goto next_line;
    }

    /* label */
    if (op == TOKEN_LABEL) {
      add_label(&rek_labels, addrpos, pline);
      goto next_line;
    }

    /* cmds */
    instruction ins = get_instruction(op);
    if (ins.code != -1) {
      /* found token */
      opcode tmpop;
      const char *p1, *p2;
      uint8_t pt1 = ARG_NIL, pt2 = ARG_NIL;
      label *foundlabel = NULL;

      memset(&tmpop, 0, sizeof(opcode));
      tmpop.code = ins.code;

      if (ins.nargs == 2) {
        /* get both parameters and types */
        p2 = param_break(pline);
        p1 = pline;

        pt1 = get_param_type(p1);
        pt2 = get_param_type(p2);
        if (pt1 == ARG_NIL || pt2 == ARG_NIL) {
          bail();
          fatal("argument %d is invalid (line: %d)\n", pt1 ? 2 : 1, iline_l);
        }

        if (op == TOKEN_MOV && pt2 == ARG_IMM) {
          bail();
          fatal("can't move data to an immediate (line: %d)\n", iline_l);
        }

#ifdef DEBUG
        debug("\tInstruction: %c Param1: %s Param2: %s\n", ins.name, p1, p2);
#endif
        if (pt1 == ARG_REG) {
          tmpop.code += 5;
          tmpop.wp1 = (uint16_t)is_reg(p1);
        } else if (pt1 == ARG_MEM) {
          tmpop.code += 10;
          tmpop.wp1 = hex2int(p1);
        } else {
          if (*p1 == '$')
            tmpop.wp1 = hex2int(++p1);
          if (*p1 == '#')
            tmpop.wp1 = dec2int(++p1);
        }
        
        tmpop.code += pt2;
        if (pt2 == ARG_REG)
          tmpop.wp2 = (uint16_t)is_reg(p2);
        else if (pt2 == ARG_MEM)
          tmpop.wp2 = hex2int(p2);
        else {
          if (*p2 == '$')
            tmpop.wp2 = hex2int(++p2);
          if (*p2 == '#')
            tmpop.wp2 = dec2int(++p2);
        }
      } else {
        /* requires only one param */
        switch (op) {
        case(TOKEN_JE):
        case(TOKEN_JNE):
        case(TOKEN_JMP):
          foundlabel = get_label(&rek_labels, pline);
          if (foundlabel) {
            /* jumping to a label */
            tmpop.wp1 = foundlabel->addr;
          } else if (is_addr(pline, 0)) {
            /* jumping to an address */
            pline++;
            sscanf(pline, "%hx", &tmpop.wp1);
          } else {
            bail();
            fatal("label '%s' not found\n", pline);
          }
#ifdef DEBUG
          switch (ins.name) {
            case(TOKEN_JE):
              debug("\tInstruction: JE Addr: 0x%04x\n",  tmpop.wp1); break;
            case(TOKEN_JNE):
              debug("\tInstruction: JNE Addr: 0x%04x\n", tmpop.wp1); break;
            default:
              debug("\tInstruction: JMP Addr: 0x%04x\n", tmpop.wp1);
          }
#endif
          break;
        }
      }

#ifdef DEBUG
      debug("\tOffset of instruction: %d\n", rek_out.tail.data_size);
#endif
      /* got opcode, write it to buffer */
      rek_out.data[addrpos++] = tmpop.code;
      rek_out.tail.data_size++;

      rek_out.data[addrpos++] = tmpop.wp1 & 0xff;
      rek_out.data[addrpos++] = tmpop.wp1 >> 0x8;
      rek_out.tail.data_size += 2;
      if (ins.nargs == 2) {
        rek_out.data[addrpos++] = tmpop.wp2 & 0xff;
        rek_out.data[addrpos++] = tmpop.wp2 >> 0x8;
        rek_out.tail.data_size += 2;
      }
    }
next_line:
    iline_s = 0;
  }
  free(iline);

  /* done */
  fwrite(&rek_out.header, sizeof(pheader), 1, ofd);
  fwrite(&rek_out.data, rek_out.tail.data_size, 1, ofd);
  fwrite(&rek_out.tail, sizeof(ptail), 1, ofd);
#ifdef DEBUG
  debug("Successful compilation! Wrote %d bytes to the output file\n",
      sizeof(pheader) + rek_out.tail.data_size + sizeof(ptail));
#endif
  bail();
  return 0;
}


A  => src/arek.h +21 -0
@@ 1,21 @@
#define PROGNAME  "arek"
#define PROGVERS  0,0,1

#include "librek.h"
#include "utils.h"

#define MAX_ILINE   0xFF
#define DEFAULT_OUT "r.out"

typedef struct {
  label   **labels;
  uint16_t  amount;
} labels_list;

/* main.c */
void init(program *p);
void *add_label(labels_list *labels, uint16_t addr, const char *name);
label *get_label(labels_list *l, const char *name);
static void bail();
int8_t is_valid_line (const char *s, const size_t l);
instruction get_instruction (const int8_t token);

A  => src/librek.c +1 -0
@@ 1,1 @@
typedef short _fool_pedantic_warning_from_gcc;

A  => src/librek.h +203 -0
@@ 1,203 @@
#define VERSION 0,0,1
#include <stdint.h>

/* Maximum size of program
 * 65535 bytes of maximum size
 */
#define TOTAL_PROG_SIZE 0xFFFF
#define ORIGIN_FILL     0x00 /* null filling */

/* 
 * Magic numbers
 * Define the start and end of a program
 * start: 'REK\xFF'
 * end:   '\xFFKER'
 */
const static uint32_t magic_start = 0xFF4B4552;
const static uint32_t magic_end   = 0x52454BFF;

/* Argument types */
enum {
  ARG_NIL,
  ARG_IMM,
  ARG_REG,
  ARG_MEM
};

/*
 * Instruction set
 * Setting up string and opcode representation
 * XXX: there are better ways to do this.
 */
enum {
#define TOKEN_MOV '='
  INS_MOV         = 0x10,
  INS_MOV_IMM_IMM_INVALID,
  INS_MOV_IMM_REG,
  INS_MOV_IMM_MEM,
  INS_MOV_REG     = INS_MOV + 5,
  INS_MOV_REG_IMM_INVALID,
  INS_MOV_REG_REG,
  INS_MOV_REG_MEM,
  INS_MOV_MEM     = INS_MOV + 10,
  INS_MOV_MEM_IMM_INVALID,
  INS_MOV_MEM_REG,
  INS_MOV_MEM_MEM,
#define TOKEN_ADD '+'
  INS_ADD         = 0x20,
  INS_ADD_IMM_IMM,
  INS_ADD_IMM_REG,
  INS_ADD_IMM_MEM,
  INS_ADD_REG     = INS_ADD + 5,
  INS_ADD_REG_IMM,
  INS_ADD_REG_REG,
  INS_ADD_REG_MEM,
  INS_ADD_MEM     = INS_ADD + 10,
  INS_ADD_MEM_IMM,
  INS_ADD_MEM_REG,
  INS_ADD_MEM_MEM,
#define TOKEN_SUB '-'
  INS_SUB         = 0x30,
  INS_SUB_IMM_IMM,
  INS_SUB_IMM_REG,
  INS_SUB_IMM_MEM,
  INS_SUB_REG     = INS_SUB + 5,
  INS_SUB_REG_IMM,
  INS_SUB_REG_REG,
  INS_SUB_REG_MEM,
  INS_SUB_MEM     = INS_SUB + 10,
  INS_SUB_MEM_IMM,
  INS_SUB_MEM_REG,
  INS_SUB_MEM_MEM,
#define TOKEN_CMP '~'
  INS_CMP         = 0x40,
  INS_CMP_IMM_IMM,
  INS_CMP_IMM_REG,
  INS_CMP_IMM_MEM,
  INS_CMP_REG     = INS_CMP + 5,
  INS_CMP_REG_IMM,
  INS_CMP_REG_REG,
  INS_CMP_REG_MEM,
  INS_CMP_MEM     = INS_CMP + 10,
  INS_CMP_MEM_IMM,
  INS_CMP_MEM_REG,
  INS_CMP_MEM_MEM,
#define TOKEN_JMP '^'
  INS_JMP         = 0x50,
#define TOKEN_JE  '>'
  INS_JE,
#define TOKEN_JNE '<'
  INS_JNE,
};

#define TOKEN_COMMENT   '?'
#define TOKEN_ORIGIN    '.'
#define TOKEN_LABEL     '['

#define PARAM_SEPARATOR '|'

/*
 * Main instruction structure.
 *
 * NOTES:
 *  Instructions accept <= 2 arguments right now
 */
typedef struct {
  int8_t      code;     /* opcode byte */ 
  char        name;     /* string representation */
  int8_t      nargs;    /* number of arguments */
  /*uint8_t     argt[2];   argument types */
} instruction;

static const instruction set[] = {
  {INS_MOV, TOKEN_MOV,  2, /*{0,0}, {0,0}*/},
  {INS_ADD, TOKEN_ADD,  2, /*{0,0}, {0,0}*/},
  {INS_SUB, TOKEN_SUB,  2, /*{0,0}, {0,0}*/},
  {INS_CMP, TOKEN_CMP,  2, /*{0,0}, {0,0}*/},
  {INS_JMP, TOKEN_JMP,  1, /*{0,0}, {0,0}*/},
  {INS_JE , TOKEN_JE ,  1, /*{0,0}, {0,0}*/},
  {INS_JNE, TOKEN_JNE,  1, /*{0,0}, {0,0}*/},
  {-1     , '\0'     , -1, /*{0,0}, {0,0}*/}
};

typedef struct {
  int8_t      code;     /* opcode byte */
  uint16_t    wp1;      /* word-size arg 1 */
  uint16_t    wp2;      /* word-size arg 2 */
} opcode;

/* Label structure */
typedef struct {
#define LABEL_SIZE 64
  char      name[LABEL_SIZE];   /* label name */
  uint16_t  addr;               /* positional address */
} label;

/* Program structure*/
typedef struct {
  uint32_t  psmag;
  uint32_t  pentry;
} pheader;

typedef struct {
  uint16_t  reserved;
  uint16_t  data_size;
  uint32_t  pemag;
} ptail;

/* main */
typedef struct {
  pheader   header;
  uint8_t   data[TOTAL_PROG_SIZE];
  ptail     tail;
} program;

/*
 * CPU structure
 * reg size: 16-bits
 * NOTES:
 *  register _S:  "string" register. On each instruction step (_I) whatever is
 *                allocated into that register will be printed to screen.
 *                If the NOSTRING flag on FLAGS register is 1 then the character
 *                printing is disabled and the register can be used as a general
 *                purpose register.
 */

#define TOKEN_REG   '_'

typedef struct {
  /* general purpose registers */
#define TOKEN_REG_A 'A'
  union {
    uint8_t   b;
    uint16_t  w;
  } reg_A;
#define TOKEN_REG_B 'B'
  union {
    uint8_t   b;
    uint16_t  w;
  } reg_B;
#define TOKEN_REG_C 'C'
  union {
    uint8_t   b;
    uint16_t  w;
  } reg_C;
#define TOKEN_REG_S 'S'
  union {
    uint8_t   b;
    uint16_t  w;
  } reg_S;          /* string register */
#define TOKEN_REG_I 'I'
  uint16_t  reg_I;  /* instruction pointer */
#define TOKEN_REG_F 'F'
  uint8_t   reg_F;  /* zero flag */
} cpu;

enum {
  CODE_REG_A = 0x0001,
  CODE_REG_B,
  CODE_REG_C,
  CODE_REG_S,
  CODE_REG_I,
  CODE_REG_F = 0x00ff,
};

A  => src/utils.c +207 -0
@@ 1,207 @@
#define _GNU_SOURCE
#include <stdio.h>
#include <stdarg.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>

#include "librek.h"

#define PRINT_FMT(MSG) if (fmt != NULL) { \
    fprintf(stderr, MSG); \
    va_list al; \
    va_start(al, fmt); \
      vfprintf(stderr, fmt, al); \
    va_end(al); \
  }

void
error (const char *fmt, ...)
{
  PRINT_FMT("error: ")
  
  if (errno != 0)
    perror(NULL);
}

void
fatal (const char *fmt, ...)
{
  PRINT_FMT("fatal: ")

  /* die */
  if (errno != 0) {
    error("");
    exit(errno);
  }
  exit(1);
}

#ifdef DEBUG
void
debug (const char *fmt, ...)
{
  va_list al;
  
  printf("[DEBUG] ");
  va_start(al, fmt);
    vprintf(fmt, al);
  va_end(al);
}
#endif

int8_t
is_char_alphanum (const char c) 
{
  return (
      (c >= '0' && c <= '9') ||
      (c >= 'A' && c <= 'Z') ||
      (c >= 'a' && c <= 'z')
    ) ? 1 : 0;
}

int8_t
is_char_hex (const char c) 
{
  return (
      (c >= '0' && c <= '9') ||
      (c >= 'A' && c <= 'F') ||
      (c >= 'a' && c <= 'f')
    ) ? 1 : 0;
}

int8_t
is_char_num (const char c) 
{
  return (c >= '0' && c <= '9') ? 1 : 0;
}

int8_t
is_char_name (const char c)
{
  return (is_char_alphanum(c) || c == TOKEN_REG) ? 1 : 0;
}

int8_t
is_char_special (const char c)
{
  return (c == '$' || c == '#' || c == '|') ? 1 : 0;
}

int8_t
is_hex (const char *s)
{
  if (*s == '\0' || strlen(s) > 4) return 0;
  while (*s != '\n' && *s != '\0') {
    if (!is_char_hex(*s))
      return 0;
    s++;
  }
  return 1;
}

int8_t
is_dec (const char *s)
{
  if (*s == '\0' || strlen(s) > 5) return 0;
  while (*s != '\n' && *s != '\0') {
    if (!is_char_num(*s))
      return 0;
    s++;
  }
  if (atoi(s) > TOTAL_PROG_SIZE) return 0;
  return 1;
}

int8_t
is_imm (const char *s)
{
  if (*s == '#') return is_dec(++s);
  if (*s == 'x') return is_hex(++s);
  return 0;
}

int8_t
is_addr (const char *s, int8_t pointer)
{
  if (pointer) {
    if (*s != '$')
      return 0;
    return is_hex(++s);
  }
  
  return is_imm(s);
}

int8_t
is_reg (const char *s)
{
  if (*s != TOKEN_REG || *(s+2) != '\0')
    return 0;
  switch (*(s+1)) {
    case(TOKEN_REG_A):
    case(TOKEN_REG_B):
    case(TOKEN_REG_C):
    case(TOKEN_REG_S):
    case(TOKEN_REG_I):
    case(TOKEN_REG_F):
      break;
    default:
      return 0;
  }

  return *(s+1); /* return the ASCII representation */
}

uint16_t
hex2int (const char *s)
{
  uint16_t ret = 0;
  sscanf(++s, "%hx", &ret);
  return ret;
}

uint16_t
dec2int (const char *s)
{
  return atoi(s);
}

const char *
param_check (const char *s)
{
  char *ret = strchr(s, PARAM_SEPARATOR);
  if (ret == NULL || strchr(++ret, PARAM_SEPARATOR) != NULL)
    /* re-check if there's more than one parameter */
    return NULL;

  return ret;
}

const char *
param_break (char *s)
{
  char *ret = strchr(s, PARAM_SEPARATOR);
  
  s += (ret - s);
  *s = '\0';

  return ++ret;
}

uint8_t
get_param_type(const char *p)
{
  if (is_imm(p)) return ARG_IMM;
  if (is_addr(p, 1)) return ARG_MEM;
  if (is_reg(p)) return ARG_REG;
  return ARG_NIL;
}

char *
skip_blank (char *s)
{
  for (;*s == ' ' || *s == '\t'; s++) ;
  return s;
}

A  => src/utils.h +22 -0
@@ 1,22 @@
/* utils.c */
void error(const char *fmt, ...);
void fatal(const char *fmt, ...);
#ifdef DEBUG
void debug(const char *fmt, ...);
#endif
int8_t is_char_alphanum (const char c);
int8_t is_char_hex (const char c);
int8_t is_char_num (const char c);
int8_t is_char_name (const char c);
int8_t is_char_special (const char c);
int8_t is_hex (const char *s);
int8_t is_dec (const char *s);
int8_t is_imm (const char *s);
int8_t is_addr (const char *s, int8_t pointer);
int8_t is_reg (const char *s);
uint16_t hex2int (const char *s);
uint16_t dec2int (const char *s);
const char *param_check (const char *s);
const char *param_break (char *s);
uint8_t get_param_type(const char *p);
char * skip_blank (char *s);

A  => test/simple.rek +9 -0
@@ 1,9 @@
?start of the code
  =#97|_A
[start_loop
  +_A|_B
  =_A|$00ff
  =$00ff|_S
  ~_B|#122
  <start_loop
?no more code