~charles/awk-riscv

a4ae14ff63742d39f14b8023321ae11e1896ed6d — Charles Daniels 10 months ago 659c694
start sketching out the assembler
3 files changed, 292 insertions(+), 26 deletions(-)

M riscv.awk
M tests/simulation/andi.txt
A tests/simulation/asm_immediate.txt
M riscv.awk => riscv.awk +275 -25
@@ 86,7 86,7 @@
#	If REG is 0 or greater than 31, this prints 0.
#
# pcpeek
#	
#
#	Reads the 32-bit PC value.
#
# pcpoke ADDR


@@ 136,6 136,40 @@
#	Each line of input is treated as a 32-bit instruction literal and it's
#	disassembly is shown on standard out.
#
# assemble
#
#	Each line of input is treated as an assembler instruction. The
#	instruction will be parsed, and if it can be assembled successfully the
#	result will be placed at asmcursor, which will be incremented by 4
#	thereafter (thus, using -v asmcursor=xyz will allow you to change the
#	location in memory where the assembled program is placed).
#
#	Note that if the assembler encounters any errors, the program up to
#	the point at which the error occurred will still be placed in memory.
#
#	The assembler syntax used is heavily based on that used by RARS.
#
#
#                        INFLUENTIAL VARIABLES
#
# The following variables can be specified using the -v flag to awk to change
# the behavior of the program.
#
# mode
#
#	Can be set to start the program in a particular mode when it begins.
#	This can be useful for example when processing a normal assembler file
#	to avoid having to place "mode assemble" at the top of the file.
#
# PC
#
#	Can be used to change the address at which the program begins executing.
#
# asmcursor
#
#	Can be used to change the address at which the assembler places the
#	program that it assembles.
#
#
#                            MEMORY MODEL
#


@@ 151,8 185,14 @@
#
# Assertions can cause an error message to be displayed on standard out.
# Additionally, each failed assertion increments a counter. This counter is
# used as the program's exist code. This can allow a RISC-V program (or the
# simulator) to be tested by monitoring the exit code.
# used in part to determine the program's exit code. This can allow a RISC-V
# program (or the simulator) to be tested by monitoring the exit code.
#
#
#                              EXIT CODE
#
# The program's exit code is the sum of the number of assertion failures, and
# the number of assembler errors.
#
#
#                         DIFFERENCES FROM RV32I


@@ 182,6 222,66 @@
###############################################################################


#### GLOBAL VARIABLES #########################################################
#
# This is AWK, technically all variables are global. However this list is used
# to store variables which are intentionally used to pass data between
# functions in this program. Other variables may be global per se, but are not
# (intentionally) used to pass data around, and cases where they do so should
# be fixed or they should be added to this list.
#
# PC
#
#	Stores the current program counter.
#
# assert_errors
#
#	Stores the number of assertion errors encountered so far.
#
# assemble_errors
#
#	Stores the number of assembler errors encountered so far.
#
# traceregs
#
#	Stores whether or not register accesses should be traced.
#
# tracemem
#
#	Stores whether or not memory accesses should be trace.
#
# traceinst
#
#	Stores whether or not instruction execution should be traced.
#
# mode
#
#	Stores the current operating mode.
#
# regs
#
#	Table associated register numbers with their current value. Cells
#	should store 32-bit integers.
#
# mem
#
#	Table associating memory addresses with their current value. Cells
#	should store 8-bit integers.
#
# labels
#
#	Table associating string label names with the memory address at which
#	they occur.
#
# asmcursor
#
#	The memory address at which the assembler will place the next assembled
#	instruction. Each time an instruction is assembled, this is incremented
#	by 4.
#
#
###############################################################################


#### HELPER METHODS ###########################################################



@@ 233,6 333,14 @@ function twoabs(v) {
	}
}

# replaces $1 with $2, $2 with $3, and so on, reducing the number of fields by
# 1 and destroying the contents of $1
function fieldshift() {
	for (i=1; i < NF; i++) {
		$i = $(i+1)
	}
	NF--
}

#### INSTRUCTION DECODING #####################################################



@@ 379,6 487,43 @@ function disasm(v) {
	return sprintf("0x%08x: UNKNOWN", v)
}

# parse a string register name and return its numeric value
function parsereg(r) {
	if (r == "0" || r == "zero" || r == "x0") { return 0 }
	else if (r == "1" || r == "ra" || r == "x1") { return 1 }
	else if (r == "2" || r == "sp" || r == "x2") { return 2 }
	else if (r == "3" || r == "gp" || r == "x3") { return 3 }
	else if (r == "4" || r == "tp" || r == "x4") { return 4 }
	else if (r == "5" || r == "t0" || r == "x5") { return 5 }
	else if (r == "6" || r == "t1" || r == "x6") { return 6 }
	else if (r == "7" || r == "t2" || r == "x7") { return 7 }
	else if (r == "8" || r == "s0" || r == "x8") { return 8 }
	else if (r == "9" || r == "s1" || r == "x9") { return 9 }
	else if (r == "10" || r == "a0" || r == "x10") { return 10 }
	else if (r == "11" || r == "a1" || r == "x11") { return 11 }
	else if (r == "12" || r == "a2" || r == "x12") { return 12 }
	else if (r == "13" || r == "a3" || r == "x13") { return 13 }
	else if (r == "14" || r == "a4" || r == "x14") { return 14 }
	else if (r == "15" || r == "a5" || r == "x15") { return 15 }
	else if (r == "16" || r == "a6" || r == "x16") { return 16 }
	else if (r == "17" || r == "a7" || r == "x17") { return 17 }
	else if (r == "18" || r == "s2" || r == "x18") { return 18 }
	else if (r == "19" || r == "s3" || r == "x19") { return 19 }
	else if (r == "20" || r == "s4" || r == "x20") { return 20 }
	else if (r == "21" || r == "s5" || r == "x21") { return 21 }
	else if (r == "22" || r == "s6" || r == "x22") { return 22 }
	else if (r == "23" || r == "s7" || r == "x23") { return 23 }
	else if (r == "24" || r == "s8" || r == "x24") { return 24 }
	else if (r == "25" || r == "s9" || r == "x25") { return 25 }
	else if (r == "26" || r == "s10" || r == "x26") { return 26 }
	else if (r == "27" || r == "s11" || r == "x27") { return 27 }
	else if (r == "28" || r == "t3" || r == "x28") { return 28 }
	else if (r == "29" || r == "t4" || r == "x29") { return 29 }
	else if (r == "30" || r == "t5" || r == "x30") { return 30 }
	else if (r == "31" || r == "t6" || r == "x31") { return 31 }
	else {return -1}
}

#### STATE MUTATION ###########################################################

# write v to register regaddr


@@ 442,28 587,18 @@ function nextstate(    inst, branchtarget) {
	if (traceinst) {
		printf("# PC=0x%08x, inst=0x%08x, type=%s, disasm='%s', funct7=0x%02x, funct3=0x%1x, rs1=0x%02x, rs2=0x%02x, rd=0x%02x, opcode=0x%02x", PC, inst, type(inst), disasm(inst), funct7(inst), funct3(inst), rs1(inst), rs2(inst), rd(inst), opcode(inst))

		if (type(inst) == "R") {
			printf("\n")
		}

		if (type(inst) == "I") {
			printf(", imm=0x%08x\n", immI(inst))
		}

		if (type(inst) == "S") {
		} else if (type(inst) == "S") {
			printf(", shamt=0x%02x, arith=%d\n", shamt(inst), arith(inst))
		}

		if (type(inst) == "B") {
		} else if (type(inst) == "B") {
			printf(", imm=0x%08x\n", immB(inst))
		}

		if (type(inst) == "U") {
		} else if (type(inst) == "U") {
			printf(", imm=0x%08x\n", immU(inst))
		}

		if (type(inst) == "J") {
		} else if (type(inst) == "J") {
			printf(", imm=0x%08x\n", immJ(inst))
		} else {
			printf("\n")
		}
	}



@@ 533,12 668,23 @@ BEGIN {
	# display memory reads and writes
	tracemem=0

	# set up state storage variables
	PC = 0
	# "regs" and "mem" used for register and memory state, but not
	# initialized explicitly for efficiency, since undefined values have
	# a value of 0 in AWK.

	# if the PC is not initialized via -v, it should compare with the empty
	# string, if it is explicitly 0, this if statement will fail
	if (PC == "") {
		PC=0
	}

	if (asmcursor == "") {
		asmcursor=0
	}

	assert_errors = 0
	assemble_errors = 0

}

$1 ~ /^[#]/ { next }


@@ 551,10 697,114 @@ $1 == "mode" { mode=$2 ; next }
	if (mode == "disasm") {
		print(disasm(strtonum($1)))
		next
	}

	} else if (mode == "assemble") {
		# save the line in case we call fieldshift later
		asm_line=$0

		# handle labels
		if (match($1, /[a-zA-Z][a-zA-Z0-9_]+[:]/)) {
			labels[sub(/[:]/, $1, $1)] = asmcursor
			fieldshift()
		}

		# skip comments and empty lines
		if (match($1, /^[#]/) || NF == 0) { next }


		asm_opcode = 0
		asm_funct3 = 0
		asm_funct7 = 0
		asm_rs2 = 0
		asm_rs1 = 0
		asm_rd = 0
		asm_immI = 0
		asm_immU = 0
		asm_shamt = 0
		asm_arith = 0
		asm_immS = 0
		asm_immB = 0
		asm_immJ = 0

		# style can be any of the instruction types, and will be parsed
		# as they are disassembled
		#
		# TODO: add styles for RARS-style jump targets and lw/sw
		asm_style = "error"

		if (tolower($1) == "addi") {
			asm_opcode = 0x13
			asm_style = "I"

		} else if (tolower($1) == "slti") {
			asm_opcode = 0x13
			asm_funct3= 0x2
			asm_style = "I"

		} else if (tolower($1) == "sltiu") {
			asm_opcode = 0x13
			asm_funct3= 0x3
			asm_style = "I"

		} else if (tolower($1) == "andi") {
			asm_opcode = 0x13
			asm_funct3= 0x7
			asm_style = "I"

		} else if (tolower($1) == "xori") {
			asm_opcode = 0x13
			asm_funct3= 0x4
			asm_style = "I"

		} else if (tolower($1) == "ori") {
			asm_opcode = 0x13
			asm_funct3= 0x6
			asm_style = "I"

		} else {
			printf("# ASSEMBLER ERROR: expected opcode, got '%s' (line %d: '%s')\n", $1, NR, asm_line)
			assemble_errors++
			next
		}

		if (asm_style == "I") {
			asm_rd = parsereg($2)
			if (asm_rd < 0) {
				printf("# ASSEMBLER ERROR: expected rd register, got '%s' (line %d: '%s')\n", $2, NR, asm_line)
				assemble_errors++
				next
			}
			asm_rs1 = parsereg($3)
			if (asm_rs1 < 0) {
				printf("# ASSEMBLER ERROR: expected rs1 register, got '%s' (line %d: '%s')\n", $3, NR, asm_line)
				assemble_errors++
				next
			}

			if (strtonum($4) > 2047 || strtonum($4) < -2048) {
				printf("# ASSEMBLER ERROR: expected immediate in range -2048...2047, got '%s' (line %d: '%s')\n", $4, NR, asm_line)
				assemble_errors++
				next
			}
			asm_immI = dec2two($4)
		}


		asm_inst = or( \
			 asm_opcode, \
			 lshift(asm_rd, 7), \
			 lshift(asm_funct3, 12), \
			 lshift(asm_rs1, 15), \
			 lshift(asm_immI, 20))

		memwrite(asmcursor, asm_inst)

		asmcursor += 4

	}
}


$1 == "traceregs" { traceregs=strtonum($2) ; next }

$1 == "traceinst" { traceinst=strtonum($2) ; next }


@@ 576,7 826,7 @@ $1 == "pcpoke" { PC = strtonum($2) ; next }
$1 == "assert" && $2 == "reg" {
	assert_val = dec2two(regread(strtonum($3)))
	if (assert_val  != dec2two(strtonum($4))) {
		printf("ASSERTION FAILURE: register x%d was 0x%08x, should have been 0x%08x\n", strtonum($3), assert_val, strtonum($4))
		printf("# ASSERTION FAILURE: register x%d was 0x%08x, should have been 0x%08x\n", strtonum($3), assert_val, strtonum($4))
		assert_errors ++
	}
}


@@ 584,7 834,7 @@ $1 == "assert" && $2 == "reg" {
$1 == "assert" && $2 == "mem" {
	assert_val = dec2two(memread(strtonum($3)))
	if (assert_val  != dec2two(strtonum($4))) {
		printf("ASSERTION FAILURE: memory at x%d was 0x%08x, should have been 0x%08x\n", strtonum($3), assert_val, strtonum($4))
		printf("# ASSERTION FAILURE: memory at x%d was 0x%08x, should have been 0x%08x\n", strtonum($3), assert_val, strtonum($4))
		assert_errors ++
	}
}


@@ 689,5 939,5 @@ $1 == "debug" && $2 == "immS" { printf("0x%08x\n", immS(strtonum($3))); next }
$1 == "debug" && $2 == "disasm" { printf("%s\n", disasm(strtonum($3))); next }

END {
	exit(assert_errors)
	exit(assert_errors + assemble_errors)
}

M tests/simulation/andi.txt => tests/simulation/andi.txt +0 -1
@@ 11,7 11,6 @@ poke 0x14 0x00137693   # andi x13 x6 1
poke 0x18 0x00237713   # andi x14 x6 2
poke 0x1c 0x00337793   # andi x15 x6 3


step 100
assert reg 5 1
assert reg 6 0xff

A tests/simulation/asm_immediate.txt => tests/simulation/asm_immediate.txt +17 -0
@@ 0,0 1,17 @@
tracemem 1

mode assemble
addi x5 x0 123
slti x5 x0 123
sltiu x5 x0 123
andi x5 x0 123
ori x5 x0 123
xori x5 x0 123

mode normal
assert mem 0x000 0x07b00293
assert mem 0x004 0x07b02293
assert mem 0x008 0x07b03293
assert mem 0x00c 0x07b07293
assert mem 0x010 0x07b06293
assert mem 0x014 0x07b04293