# Implementation of RV32I
# (https://riscv.org//wp-content/uploads/2017/05/riscv-spec-v2.2.pdf)
# in AWK.
# Copyright (c) 2020, Charles Daniels
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
###############################################################################
#
# INTRODUCTION
#
# This program implements a RISC-V assembler, disassembler, and simulator,
# supporting most of the RV32I instruction set. It is controlled by reading
# directives on standard input. A directive consists of one or more whitespace
# separated values. Empty lines, and lines beginning with '#' are ignored.
#
# This program also has a concept of a "mode". In normal mode, which is the
# default, all input lines are processed as directives. In other modes, input
# lines are processed in some special way specific to that mode. In all modes,
# the directive 'mode normal' will unconditionally return the program to normal
# mode.
#
# DIRECTIVES
#
# mode MODE
#
# Updates the mode to the specified value. Undefined modes are treated
# equivalently to "normal".
#
# traceregs TRACEREGS
#
# If TRACEREGS is nonzero, all register reads and writes are printed to
# standard out.
#
# traceinst TRACEINST
#
# If TRACEINST is nonzero, each instruction fetched for execution is
# printed to standard out.
#
# tracemem TRACEMEM
#
# If TRACEMEM is nonzero, all memory reads and writes are pritned to
# standard out.
#
# traceresolve TRACERESOLVE
#
# If TRACERESOLVE is nonzero, then a detailed trace of assembler label
# resolution will be displayed. This will also display label information
# as they are encountered by the assembler.
#
# poke ADDR VAL
#
# Updates the memory address ADDR to the given 32-bit value VAL.
#
# peek ADDR
#
# Reads a 32-bit value from the memory address ADDR.
#
# rpoke REG VAL
#
# Writes the 32-bit value VAL to the register REG. If REG is 0 or greater
# than 31, the directive is ignored silently.
#
# rpeek REG
#
# Reads the 32-bit value of a register REG and prints it to standard out.
# If REG is 0 or greater than 31, this prints 0.
#
# pcpeek
#
# Reads the 32-bit PC value.
#
# pcpoke ADDR
#
# Set the 32-bit PC value to the given address.
#
# dump
#
# Dump the current state of the simulation in a format that it can be
# processed as directives for later resumption.
#
# assert reg REG VAL
#
# Asserts that the register REG contains a value equal to the given
# 32-bit value VAL.
#
# assert mem ADDR VAL
#
# Asserts that the memory at address contains a value equal to the given
# 32-bit value VAL.
#
# step N
#
# Advances the CPU simulation by N many cycles.
#
# showregs
#
# Displays the contents of the entire register file.
#
# resolve
#
# Processes any unresolved jump or branch targets created as a result of
# assembly by rewriting the corresponding instructions in-memory to point
# to the appropriate locations.
#
# Note that errors in label resolution count as assembler errors for
# exit-code calculation.
#
# debug FIELD INST
#
# Displays the value of the specified field FIELD of the instruction
# literal INST. FIELD may take any of the following values: funct7, rs2,
# rs1, fucnt3, rd, opcode, immI, immU, shamt, arith, immB, immJ, immS.
#
# debug disasm INST
#
# Displays the disassembly of the specified instruction literal INST.
#
#
# MODES
#
# The following modes, aside from normal mode, are available...
#
# disasm
#
# Each line of input is treated as a 32-bit instruction literal and it's
# disassembly is shown on standard out.
#
# assemble
#
# Each line of input is treated as an assembler instruction. The
# instruction will be parsed, and if it can be assembled successfully the
# result will be placed at asmcursor, which will be incremented by 4
# thereafter (thus, using -v asmcursor=xyz will allow you to change the
# location in memory where the assembled program is placed).
#
# Note that if the assembler encounters any errors, the program up to
# the point at which the error occurred will still be placed in memory.
#
# The assembler syntax used is heavily based on that used by RARS.
#
#
# INFLUENTIAL VARIABLES
#
# The following variables can be specified using the -v flag to awk to change
# the behavior of the program.
#
# mode
#
# Can be set to start the program in a particular mode when it begins.
# This can be useful for example when processing a normal assembler file
# to avoid having to place "mode assemble" at the top of the file.
#
# PC
#
# Can be used to change the address at which the program begins executing.
#
# asmcursor
#
# Can be used to change the address at which the assembler places the
# program that it assembles.
#
#
# MEMORY MODEL
#
# This simulator will allow reads and writes to any memory address addressable
# by RV32I, though potentially limited by the performance of AWK and the
# available system memory. The memory is byte-addressable, and each cell stores
# a value in 0...255. This is enforced by masking in the memread() and
# memwrite() functions. Program execution starts by default at memory address
# 0x0.
#
#
# ASSERTIONS
#
# Assertions can cause an error message to be displayed on standard out.
# Additionally, each failed assertion increments a counter. This counter is
# used in part to determine the program's exit code. This can allow a RISC-V
# program (or the simulator) to be tested by monitoring the exit code.
#
#
# EXIT CODE
#
# The program's exit code is the sum of the number of assertion failures, and
# the number of assembler errors.
#
#
# DIFFERENCES FROM RV32I
#
# For the sake of simplicity, this program implements only a subset of RV32I.
# It could likely be extended to support more of it easily, if someone so
# desired. The following differences are known and deliberate:
#
# * Exception and error handling are not implemented at all.
#
# * CSR-related instructions are not implemented.
#
# * Timers and counters are not implemented.
#
# * ECALL and EBREAK are not implemented.
#
# * Although the memory map is byte-addressable internally, byte and half-word
# related instructions are not implemented.
#
#
# DIFFERENCES FROM RARS
#
# The syntax for lw and sw is changed in order to make them easier to parse.
# The assembled instructions are identical in memory. The equivalence table is
# shown below:
#
# RARS instruction riscv.awk instruction
# lw x1 100(x2) lw x1 x2 100
# sw x2 100(x1) sw x1 x2 100
#
#
# LIMITATIONS
#
# Support for error handling, detecting and recovering from bad inputs, and so
# on is quite limited. In general, if you feed this program garbage, it will
# probably behave in unpredictable and undesirable ways.
#
# The assembler does not detect or mitigate jumps to labels outside of the
# maximum range of branch or jump instructions.
#
###############################################################################
#### GLOBAL VARIABLES #########################################################
#
# This is AWK, technically all variables are global. However this list is used
# to store variables which are intentionally used to pass data between
# functions in this program. Other variables may be global per se, but are not
# (intentionally) used to pass data around, and cases where they do so should
# be fixed or they should be added to this list.
#
# PC
#
# Stores the current program counter.
#
# assert_errors
#
# Stores the number of assertion errors encountered so far.
#
# assemble_errors
#
# Stores the number of assembler errors encountered so far.
#
# traceregs
#
# Stores whether or not register accesses should be traced.
#
# tracemem
#
# Stores whether or not memory accesses should be trace.
#
# traceinst
#
# Stores whether or not instruction execution should be traced.
#
# mode
#
# Stores the current operating mode.
#
# regs
#
# Table associated register numbers with their current value. Cells
# should store 32-bit integers.
#
# mem
#
# Table associating memory addresses with their current value. Cells
# should store 8-bit integers.
#
# labels
#
# Table associating string label names with the memory address at which
# they occur.
#
# unresolved
#
# Table associating string label names with memory addresses containing
# jump or branch targets with an unresolved jump to that label.
#
# asmcursor
#
# The memory address at which the assembler will place the next assembled
# instruction. Each time an instruction is assembled, this is incremented
# by 4.
#
###############################################################################
#### HELPER METHODS ###########################################################
# convert a two's complement number to a normal integer
#
# this is important, because on some systems AWK may use 64 bit ints, or
# arbitrary precision integers, or v might even be a string
function two2dec(v) {
if (isneg(v)) {
return -1 * and(twoabs(v), 0xffffffff)
} else {
return and(twoabs(v), 0xffffffff)
}
}
# convert a decimal integer to two's complement
function dec2two(v) {
if (v < 0) {
return and(2^32 + v, 0xffffffff)
} else {
return and(v, 0xffffffff)
}
}
# sign extend at the given MSB
function signextend(v, i, temp) {
temp = v
# don't need to sign extend
if (and(v, lshift(1, i)) == 0) { return v }
for (j = i ; j < 32 ; j++) {
temp = or(temp, lshift(1, j))
}
return temp
}
# return true if value is two's complement negative
function isneg(v) {
return (and(v, lshift(1, 31)) != 0)
}
# take the absolute value of a two's complement negative value
function twoabs(v) {
if (isneg(v)) {
return 2^32 - v
} else {
return v
}
}
# replaces $1 with $2, $2 with $3, and so on, reducing the number of fields by
# 1 and destroying the contents of $1
function fieldshift() {
for (i=1; i < NF; i++) {
$i = $(i+1)
}
NF--
}
# given a value, shuffle the bits so it matches up with the J type encoding
function packJimm(v) {
return or( \
lshift(and(v, 0x7fe), 20), \
lshift(and(v, 0x800), 9), \
and(v, 0xff000), \
lshift(and(v, 0x100000), 11))
}
function packBimm(v) {
return or( \
lshift(and(v, 0x1000), 19), \
lshift(and(v, 0x7e0), 20), \
lshift(and(v, 0x1e), 7), \
rshift(and(v, 0x800), 4))
}
function packSimm(v) {
return or( \
lshift(and(v, 0x1f), 7), \
lshift(and(v, 0xfe0), 20))
}
#### INSTRUCTION DECODING #####################################################
# masks to pull relevant values out of an instruction
function funct7(v) { return rshift(and(v, 0xfe000000), 25) }
function rs2(v) { return rshift(and(v, 0x01f00000), 20) }
function rs1(v) { return rshift(and(v, 0x000f8000), 15) }
function funct3(v) { return rshift(and(v, 0x00007000), 12) }
function rd(v) { return rshift(and(v, 0x00000f80), 7) }
function opcode(v) { return rshift(and(v, 0x0000007f), 0) }
function immI(v) { return signextend(rshift(and(v, 0xfff00000), 20), 11) }
function immU(v) { return rshift(and(v, 0xfffff000), 0) }
function shamt(v) { return rshift(and(v, 0x01f00000), 20) }
function arith(v) { return rshift(and(v, 0x40000000), 30) }
function immS(v) { return signextend(or( \
rshift(and(v, 0xfe000000), 20), \
rshift(and(v, 0x00000f80), 7)), 11) }
function immB(v) { return signextend(or( \
rshift(and(v, 0x80000000), 31-12), \
rshift(and(v, 0x7e000000), 25-5), \
lshift(and(v, 0x00000080), 11-7), \
rshift(and(v, 0x00000f00), 7)), 12) }
function immJ(v) { return signextend(or( \
rshift(and(v, 0x80000000), 11), \
rshift(and(v, 0x7fe00000), 20), \
rshift(and(v, 0x00100000), 9), \
rshift(and(v, 0x000ff000), 0)), 20)}
# returns one of "R", "I", "S", "B", "U", "J"
#
# return "E" on error
function type(v) {
if (opcode(v) == 0x37) { return "U" }
if (opcode(v) == 0x17) { return "U" }
if (opcode(v) == 0x6f) { return "J" }
if (opcode(v) == 0x67) { return "I" }
if (opcode(v) == 0x63) { return "B" }
if (opcode(v) == 0x3) { return "I" }
if (opcode(v) == 0x23) { return "S" }
if (opcode(v) == 0x13) { return "I" }
if (opcode(v) == 0x33) { return "R" }
if (opcode(v) == 0xf) { return "I" }
if (opcode(v) == 0x73) { return "I" }
return "E"
}
function op2str(v) {
if (opcode(v) == 0x37) { return "LUI" }
if (opcode(v) == 0x17) { return "AUIPC" }
if (opcode(v) == 0x6f) { return "JAL" }
if ((opcode(v) == 0x67) && (funct3(v) == 0x0)) {return "JALR"}
if ((opcode(v) == 0x63) && (funct3(v) == 0x0)) {return "BEQ"}
if ((opcode(v) == 0x63) && (funct3(v) == 0x1)) {return "BNE"}
if ((opcode(v) == 0x63) && (funct3(v) == 0x4)) {return "BLT"}
if ((opcode(v) == 0x63) && (funct3(v) == 0x5)) {return "BGE"}
if ((opcode(v) == 0x63) && (funct3(v) == 0x6)) {return "BLTU"}
if ((opcode(v) == 0x63) && (funct3(v) == 0x7)) {return "BGEU"}
if ((opcode(v) == 0x3) && (funct3(v) == 0x0)) {return "LB"}
if ((opcode(v) == 0x3) && (funct3(v) == 0x1)) {return "LH"}
if ((opcode(v) == 0x3) && (funct3(v) == 0x2)) {return "LW"}
if ((opcode(v) == 0x3) && (funct3(v) == 0x4)) {return "LBU"}
if ((opcode(v) == 0x3) && (funct3(v) == 0x5)) {return "LHU"}
if ((opcode(v) == 0x23) && (funct3(v) == 0x0)) {return "SB"}
if ((opcode(v) == 0x23) && (funct3(v) == 0x1)) {return "SH"}
if ((opcode(v) == 0x23) && (funct3(v) == 0x2)) {return "SW"}
if ((opcode(v) == 0x13) && (funct3(v) == 0x0)) {return "ADDI"}
if ((opcode(v) == 0x13) && (funct3(v) == 0x2)) {return "SLTI"}
if ((opcode(v) == 0x13) && (funct3(v) == 0x3)) {return "SLTIU"}
if ((opcode(v) == 0x13) && (funct3(v) == 0x4)) {return "XORI"}
if ((opcode(v) == 0x13) && (funct3(v) == 0x6)) {return "ORI"}
if ((opcode(v) == 0x13) && (funct3(v) == 0x7)) {return "ANDI"}
if ((opcode(v) == 0x13) && (funct7(v) == 0x0) && (funct3(v) == 0x1)) {return "SLLI"}
if ((opcode(v) == 0x13) && (funct7(v) == 0x0) && (funct3(v) == 0x5)) {return "SRLI"}
if ((opcode(v) == 0x13) && (funct7(v) == 0x20) && (funct3(v) == 0x5)) {return "SRAI"}
if ((opcode(v) == 0x33) && (funct7(v) == 0x0) && (funct3(v) == 0x0)) {return "ADD"}
if ((opcode(v) == 0x33) && (funct7(v) == 0x20) && (funct3(v) == 0x0)) {return "SUB"}
if ((opcode(v) == 0x33) && (funct7(v) == 0x0) && (funct3(v) == 0x1)) {return "SLL"}
if ((opcode(v) == 0x33) && (funct7(v) == 0x0) && (funct3(v) == 0x2)) {return "SLT"}
if ((opcode(v) == 0x33) && (funct7(v) == 0x0) && (funct3(v) == 0x3)) {return "SLTU"}
if ((opcode(v) == 0x33) && (funct7(v) == 0x0) && (funct3(v) == 0x4)) {return "XOR"}
if ((opcode(v) == 0x33) && (funct7(v) == 0x0) && (funct3(v) == 0x5)) {return "SRL"}
if ((opcode(v) == 0x33) && (funct7(v) == 0x20) && (funct3(v) == 0x5)) {return "SRA"}
if ((opcode(v) == 0x33) && (funct7(v) == 0x0) && (funct3(v) == 0x6)) {return "OR"}
if ((opcode(v) == 0x33) && (funct7(v) == 0x0) && (funct3(v) == 0x7)) {return "AND"}
if ((opcode(v) == 0xf) && (funct3(v) == 0x0)) {return "FENCE"}
if ((opcode(v) == 0xf) && (funct3(v) == 0x1)) {return "FENCE.I"}
if ((opcode(v) == 0x73) && (funct3(v) == 0x0)) {return "ECALL"}
if ((opcode(v) == 0x73) && (funct3(v) == 0x0)) {return "EBREAK"}
if ((opcode(v) == 0x73) && (funct3(v) == 0x1)) {return "CSRRW"}
if ((opcode(v) == 0x73) && (funct3(v) == 0x2)) {return "CSRRS"}
if ((opcode(v) == 0x73) && (funct3(v) == 0x3)) {return "CSRRC"}
if ((opcode(v) == 0x73) && (funct3(v) == 0x5)) {return "CSRRWI"}
if ((opcode(v) == 0x73) && (funct3(v) == 0x6)) {return "CSRRSI"}
if ((opcode(v) == 0x73) && (funct3(v) == 0x7)) {return "CSRRCI"}
if ((opcode(v) == 0x33) && (funct7(v) == 0x1) && (funct3(v) == 0x0)) {return "MUL"}
if ((opcode(v) == 0x33) && (funct7(v) == 0x1) && (funct3(v) == 0x1)) {return "MULH"}
if ((opcode(v) == 0x33) && (funct7(v) == 0x1) && (funct3(v) == 0x2)) {return "MULHSU"}
if ((opcode(v) == 0x33) && (funct7(v) == 0x1) && (funct3(v) == 0x3)) {return "MULHU"}
if ((opcode(v) == 0x33) && (funct7(v) == 0x1) && (funct3(v) == 0x4)) {return "DIV"}
if ((opcode(v) == 0x33) && (funct7(v) == 0x1) && (funct3(v) == 0x5)) {return "DIVU"}
if ((opcode(v) == 0x33) && (funct7(v) == 0x1) && (funct3(v) == 0x6)) {return "REM"}
if ((opcode(v) == 0x33) && (funct7(v) == 0x1) && (funct3(v) == 0x7)) {return "REMU"}
return "ERROR"
}
# parse a string register name and return its numeric value
function parsereg(r) {
if (r == "0" || r == "zero" || r == "x0") { return 0 }
else if (r == "1" || r == "ra" || r == "x1") { return 1 }
else if (r == "2" || r == "sp" || r == "x2") { return 2 }
else if (r == "3" || r == "gp" || r == "x3") { return 3 }
else if (r == "4" || r == "tp" || r == "x4") { return 4 }
else if (r == "5" || r == "t0" || r == "x5") { return 5 }
else if (r == "6" || r == "t1" || r == "x6") { return 6 }
else if (r == "7" || r == "t2" || r == "x7") { return 7 }
else if (r == "8" || r == "s0" || r == "x8") { return 8 }
else if (r == "9" || r == "s1" || r == "x9") { return 9 }
else if (r == "10" || r == "a0" || r == "x10") { return 10 }
else if (r == "11" || r == "a1" || r == "x11") { return 11 }
else if (r == "12" || r == "a2" || r == "x12") { return 12 }
else if (r == "13" || r == "a3" || r == "x13") { return 13 }
else if (r == "14" || r == "a4" || r == "x14") { return 14 }
else if (r == "15" || r == "a5" || r == "x15") { return 15 }
else if (r == "16" || r == "a6" || r == "x16") { return 16 }
else if (r == "17" || r == "a7" || r == "x17") { return 17 }
else if (r == "18" || r == "s2" || r == "x18") { return 18 }
else if (r == "19" || r == "s3" || r == "x19") { return 19 }
else if (r == "20" || r == "s4" || r == "x20") { return 20 }
else if (r == "21" || r == "s5" || r == "x21") { return 21 }
else if (r == "22" || r == "s6" || r == "x22") { return 22 }
else if (r == "23" || r == "s7" || r == "x23") { return 23 }
else if (r == "24" || r == "s8" || r == "x24") { return 24 }
else if (r == "25" || r == "s9" || r == "x25") { return 25 }
else if (r == "26" || r == "s10" || r == "x26") { return 26 }
else if (r == "27" || r == "s11" || r == "x27") { return 27 }
else if (r == "28" || r == "t3" || r == "x28") { return 28 }
else if (r == "29" || r == "t4" || r == "x29") { return 29 }
else if (r == "30" || r == "t5" || r == "x30") { return 30 }
else if (r == "31" || r == "t6" || r == "x31") { return 31 }
else {
printf("# ASSEMBLER ERROR: expected register, got '%s', (line %d: '%s')\n", r, NR, asm_line)
return -1
}
}
#### DISASSEMBLER #############################################################
# return a very rudimentary disassembly of the instruction v
function disasm(v) {
if (v == 0x13) {
return sprintf("0x00000013: NOP")
}
if (type(v) == "R") {
return sprintf("0x%08x: %s x%d x%d x%d", v, op2str(v), rd(v), rs1(v), rs2(v))
}
if (type(v) == "I") {
if (isneg(v) == 1) {
return sprintf("0x%08x: %s x%d x%d -%d", v, op2str(v), rd(v), rs1(v), twoabs(immI(v)))
} else {
return sprintf("0x%08x: %s x%d x%d %d", v, op2str(v), rd(v), rs1(v), immI(v))
}
}
if (type(v) == "S") {
return sprintf("0x%08x: %s x%d x%d %d", v, op2str(v), rs1(v), rs2(v), immS(v))
}
if (type(v) == "B") {
return sprintf("0x%08x: %s x%d x%d 0x%x", v, op2str(v), rs1(v), rs2(v), immB(v))
}
if (type(v) == "U") {
return sprintf("0x%08x: %s x%d 0x%x", v, op2str(v), rd(v), immU(v))
}
if (type(v) == "J") {
return sprintf("0x%08x: %s x%d 0x%x", v, op2str(v), rd(v), immJ(v))
}
return sprintf("0x%08x: UNKNOWN", v)
}
#### ASSEMBLER ################################################################
# This function operates on the normal AWK field registers and attempts to
# assemble the instruction placed therein. Said instruction is written to
# mem[asmcursor], and asmcursor is incremented by 4.
function assemble() {
# save the line in case we call fieldshift later
asm_line=$0
# handle labels
if (match($1, /[a-zA-Z][a-zA-Z0-9_]+[:]/)) {
sub(/[:]/, "", $1)
labels[$1] = asmcursor
if (traceresolve) {
printf("# encountered label '%s' at mem[0x%08x]\n", $1, asmcursor)
}
fieldshift()
}
# skip comments and empty lines
if (match($1, /^[#]/) || NF == 0) { return }
asm_opcode = 0
asm_funct3 = 0
asm_funct7 = 0
asm_rs2 = 0
asm_rs1 = 0
asm_rd = 0
asm_immI = 0
asm_immU = 0
asm_shamt = 0
asm_arith = 0
asm_immS = 0
asm_immB = 0
asm_immJ = 0
# valid styles include:
#
# I -- assemble like a normal register-immediate instruction
#
# shift -- assemble like a shift instruction
#
# U -- LUI or AUIPC
#
# R -- normal register-register instructions
#
# nop -- generate a no-op
#
# B -- branches
#
# S -- store instruction
asm_style = "error"
if (tolower($1) == "addi") {
asm_opcode = 0x13
asm_style = "I"
} else if (tolower($1) == "slti") {
asm_opcode = 0x13
asm_funct3 = 0x2
asm_style = "I"
} else if (tolower($1) == "sltiu") {
asm_opcode = 0x13
asm_funct3 = 0x3
asm_style = "I"
} else if (tolower($1) == "andi") {
asm_opcode = 0x13
asm_funct3 = 0x7
asm_style = "I"
} else if (tolower($1) == "xori") {
asm_opcode = 0x13
asm_funct3 = 0x4
asm_style = "I"
} else if (tolower($1) == "ori") {
asm_opcode = 0x13
asm_funct3 = 0x6
asm_style = "I"
} else if (tolower($1) == "slli") {
asm_opcode = 0x13
asm_funct3 = 0x1
asm_style = "shift"
} else if (tolower($1) == "srli") {
asm_opcode = 0x13
asm_funct3 = 0x5
asm_style = "shift"
} else if (tolower($1) == "srai") {
asm_opcode = 0x13
asm_funct3 = 0x5
asm_funct7 = 0x20
asm_arith = 0x1
asm_style = "shift"
} else if (tolower($1) == "lui") {
asm_opcode = 0x37
asm_style = "U"
} else if (tolower($1) == "auipc") {
asm_opcode = 0x17
asm_style = "U"
} else if (tolower($1) == "add") {
asm_opcode = 0x33
asm_style = "R"
} else if (tolower($1) == "slt") {
asm_opcode = 0x33
asm_funct3 = 0x2
asm_style = "R"
} else if (tolower($1) == "sltu") {
asm_opcode = 0x33
asm_funct3 = 0x3
asm_style = "R"
} else if (tolower($1) == "and") {
asm_opcode = 0x33
asm_funct3 = 0x7
asm_style = "R"
} else if (tolower($1) == "or") {
asm_opcode = 0x33
asm_funct3 = 0x6
asm_style = "R"
} else if (tolower($1) == "xor") {
asm_opcode = 0x33
asm_funct3 = 0x4
asm_style = "R"
} else if (tolower($1) == "sll") {
asm_opcode = 0x33
asm_funct3 = 0x1
asm_style = "R"
} else if (tolower($1) == "srl") {
asm_opcode = 0x33
asm_funct3 = 0x5
asm_style = "R"
} else if (tolower($1) == "sub") {
asm_opcode = 0x33
asm_funct7 = 0x20
asm_style = "R"
} else if (tolower($1) == "sra") {
asm_opcode = 0x33
asm_funct7 = 0x20
asm_funct3 = 0x5
asm_style = "R"
} else if (tolower($1) == "nop") {
asm_style = "nop"
asm_opcode = 0x13
} else if (tolower($1) == "jal") {
# no style, we handle this as a custom one-off
asm_style = "nop"
asm_opcode = 0x6f
asm_rd = parsereg($2)
if (asm_rd < 0) {
printf("# ASSEMBLER ERROR: expected rd register, got '%s' (line %d: '%s')\n", $2, NR, asm_line)
assemble_errors++
return
}
if (match($3, /^0x[0-9a-fA-F]{1,8}$/) || match($3, /^[0-9-]+$/)) {
# literal value for the jump target
asm_immJ = packJimm(dec2two(strtonum($3)))
} else {
unresolved[$3] = asmcursor
}
} else if (tolower($1) == "jalr") {
asm_style = "I"
asm_opcode = 0x67
} else if (tolower($1) == "beq") {
asm_style = "B"
asm_opcode = 0x63
} else if (tolower($1) == "bne") {
asm_style = "B"
asm_opcode = 0x63
asm_funct3 = 0x1
} else if (tolower($1) == "blt") {
asm_style = "B"
asm_opcode = 0x63
asm_funct3 = 0x4
} else if (tolower($1) == "bltu") {
asm_style = "B"
asm_opcode = 0x63
asm_funct3 = 0x6
} else if (tolower($1) == "bge") {
asm_style = "B"
asm_opcode = 0x63
asm_funct3 = 0x5
} else if (tolower($1) == "bgeu") {
asm_style = "B"
asm_opcode = 0x63
asm_funct3 = 0x7
} else if (tolower($1) == "lw") {
asm_style = "I"
asm_opcode = 0x3
asm_funct3 = 0x2
} else if (tolower($1) == "sw") {
asm_style = "S"
asm_opcode = 0x23
asm_funct3 = 0x2
} else {
printf("# ASSEMBLER ERROR: expected opcode, got '%s' (line %d: '%s')\n", $1, NR, asm_line)
assemble_errors++
return
}
if (asm_style == "I") {
asm_rd = parsereg($2)
if (asm_rd < 0) {
printf("# ASSEMBLER ERROR: expected rd register, got '%s' (line %d: '%s')\n", $2, NR, asm_line)
assemble_errors++
return
}
asm_rs1 = parsereg($3)
if (asm_rs1 < 0) {
printf("# ASSEMBLER ERROR: expected rs1 register, got '%s' (line %d: '%s')\n", $3, NR, asm_line)
assemble_errors++
return
}
if (strtonum($4) > 2047 || strtonum($4) < -2048) {
printf("# ASSEMBLER ERROR: expected immediate in range -2048...2047, got '%s' (line %d: '%s')\n", $4, NR, asm_line)
assemble_errors++
return
}
asm_immI = dec2two(strtonum($4))
} else if (asm_style == "shift") {
asm_rd = parsereg($2)
if (asm_rd < 0) {
printf("# ASSEMBLER ERROR: expected rd register, got '%s' (line %d: '%s')\n", $2, NR, asm_line)
assemble_errors++
return
}
asm_rs1 = parsereg($3)
if (asm_rs1 < 0) {
printf("# ASSEMBLER ERROR: expected rs1 register, got '%s' (line %d: '%s')\n", $3, NR, asm_line)
assemble_errors++
return
}
if (strtonum($4) > 31 || strtonum($4) < 0) {
printf("# ASSEMBLER ERROR: expected shift amount in range 0...31, got '%s' (line %d: '%s')\n", $4, NR, asm_line)
assemble_errors++
return
}
asm_immI = or(dec2two(strtonum($4)), lshift(asm_arith, 10))
} else if (asm_style == "U") {
asm_rd = parsereg($2)
if (asm_rd < 0) {
printf("# ASSEMBLER ERROR: expected rd register, got '%s' (line %d: '%s')\n", $2, NR, asm_line)
assemble_errors++
return
}
if (strtonum($3) > 1048575 || strtonum($3) < 0) {
printf("# ASSEMBLER ERROR: expected immedite in range 0...1048575, got '%s' (line %d: '%s')\n", $3, NR, asm_line)
assemble_errors++
return
}
asm_immU = lshift(dec2two(strtonum($3)), 12)
} else if (asm_style == "R") {
asm_rd = parsereg($2)
if (asm_rd < 0) {
printf("# ASSEMBLER ERROR: expected rd register, got '%s' (line %d: '%s')\n", $2, NR, asm_line)
assemble_errors++
return
}
asm_rs1 = parsereg($3)
if (asm_rs1 < 0) {
printf("# ASSEMBLER ERROR: expected rs1 register, got '%s' (line %d: '%s')\n", $3, NR, asm_line)
assemble_errors++
return
}
asm_rs2 = parsereg($4)
if (asm_rs1 < 0) {
printf("# ASSEMBLER ERROR: expected rs1 register, got '%s' (line %d: '%s')\n", $4, NR, asm_line)
assemble_errors++
return
}
} else if (asm_style == "B") {
asm_rs1 = parsereg($2)
if (asm_rs1 < 0) {
printf("# ASSEMBLER ERROR: expected rs1 register, got '%s' (line %d: '%s')\n", $2, NR, asm_line)
assemble_errors++
return
}
asm_rs2 = parsereg($3)
if (asm_rs1 < 0) {
printf("# ASSEMBLER ERROR: expected rs2 register, got '%s' (line %d: '%s')\n", $3, NR, asm_line)
assemble_errors++
return
}
if (match($4, /^0x[0-9a-fA-F]{1,8}$/) || match($4, /^[0-9-]+$/)) {
if (strtonum($4) > 4095 || strtonum($4) < -4096) {
printf("# ASSEMBLER ERROR: expected immediate in range -4096...4095, got '%s' (line %d: '%s')\n", $4, NR, asm_line)
assemble_errors++
return
}
asm_immB = packBimm(dec2two(strtonum($4)))
} else {
unresolved[$4] = asmcursor
}
} else if (asm_style == "S") {
asm_rs1 = parsereg($2)
if (asm_rs1 < 0) {
printf("# ASSEMBLER ERROR: expected rs1 register, got '%s' (line %d: '%s')\n", $2, NR, asm_line)
assemble_errors++
return
}
asm_rs2 = parsereg($3)
if (asm_rs1 < 0) {
printf("# ASSEMBLER ERROR: expected rs2 register, got '%s' (line %d: '%s')\n", $3, NR, asm_line)
assemble_errors++
return
}
if (strtonum($4) > 2047 || strtonum($4) < -2048) {
printf("# ASSEMBLER ERROR: expected immediate in range -2048...2047, got '%s' (line %d: '%s')\n", $4, NR, asm_line)
assemble_errors++
return
}
asm_immS = packSimm(dec2two(strtonum($4)))
} else if (asm_style == "nop") {
# do nothing
}
asm_inst = or( \
asm_opcode, \
lshift(asm_rd, 7), \
lshift(asm_funct3, 12), \
lshift(asm_rs1, 15), \
lshift(asm_immI, 20), \
asm_immU, \
lshift(asm_rs2, 20), \
lshift(asm_funct7, 25), \
asm_immJ, \
asm_immB, \
asm_immS)
memwrite(asmcursor, asm_inst)
asmcursor += 4
}
#### STATE MUTATION ###########################################################
# write v to register regaddr
function regwrite(regaddr, v) {
if (traceregs) {
printf("# regwrite(x%d, 0x%08x)\n", regaddr, v)
}
if (regaddr == 0 || regaddr > 31) {
return
}
regs[regaddr] = and(v, 0xffffffff)
}
# read from register regaddr
function regread(regaddr ) {
if (regaddr == 0 || regaddr > 31) {
if (traceregs) {
printf("# regread(x%d) -> 0x%08x\n", regaddr, 0)
}
return 0
}
if (traceregs) {
printf("# regread(x%d) -> 0x%08x\n", regaddr, and(regs[regaddr], 0xffffffff))
}
return and(regs[regaddr], 0xffffffff)
}
# Writes the 32-bit value v to the specified memory address. The value will
# be split into 4 individual bytes and written to memaddr ... memaddr+3
function memwrite(memaddr, v) {
if (tracemem) {
printf("# memwrite(0x%08x, 0x%08x)\n", memaddr, v)
}
mem[memaddr] = and(v, 0xff)
mem[memaddr+1] = rshift(and(v, 0xff00), 8)
mem[memaddr+2] = rshift(and(v, 0xff0000), 16)
mem[memaddr+3] = rshift(and(v, 0xff000000), 24)
}
# Reads a 32 bit value from the given memory address, re-composing the 1 byte
# memory values into an 32 bit value automatically.
function memread(memaddr, res) {
res = or(lshift(and(mem[memaddr], 0xff), 0), lshift(and(mem[memaddr+1], 0xff), 8), lshift(and(mem[memaddr+2], 0xff), 16), lshift(and(mem[memaddr+3], 0xff), 24))
if (tracemem) {
printf("# memread(0x%08x) -> 0x%08x\n", memaddr, res)
}
return res
}
# run the CPU for one tick
function nextstate( inst, branchtarget) {
inst = memread(PC)
if (traceinst) {
printf("# PC=0x%08x, inst=0x%08x, type=%s, disasm='%s', funct7=0x%02x, funct3=0x%1x, rs1=0x%02x, rs2=0x%02x, rd=0x%02x, opcode=0x%02x", PC, inst, type(inst), disasm(inst), funct7(inst), funct3(inst), rs1(inst), rs2(inst), rd(inst), opcode(inst))
if (type(inst) == "I") {
printf(", imm=0x%08x\n", immI(inst))
} else if (type(inst) == "S") {
printf(", shamt=0x%02x, arith=%d\n", shamt(inst), arith(inst))
} else if (type(inst) == "B") {
printf(", imm=0x%08x\n", immB(inst))
} else if (type(inst) == "U") {
printf(", imm=0x%08x\n", immU(inst))
} else if (type(inst) == "J") {
printf(", imm=0x%08x\n", immJ(inst))
} else {
printf("\n")
}
}
PC_NEXT = PC + 4
# resolve the branch target up front, since we will use it several times
branchtarget = PC + two2dec(immB(inst))
# integer register-immediate instructions
if (op2str(inst) == "ADDI") { regwrite(rd(inst), dec2two(two2dec(regread(rs1(inst))) + two2dec(immI(inst)))) }
else if (op2str(inst) == "SLTI") { regwrite(rd(inst), two2dec(regread(rs1(inst))) < two2dec(immI(inst))) }
else if (op2str(inst) == "SLTIU") { regwrite(rd(inst), regread(rs1(inst)) < immI(inst)) }
else if (op2str(inst) == "ANDI") { regwrite(rd(inst), and(regread(rs1(inst)), immI(inst))) }
else if (op2str(inst) == "ORI") { regwrite(rd(inst), or(regread(rs1(inst)), immI(inst))) }
else if (op2str(inst) == "XORI") { regwrite(rd(inst), xor(regread(rs1(inst)), immI(inst))) }
else if (op2str(inst) == "SLLI") { regwrite(rd(inst), lshift(regread(rs1(inst)), shamt(inst))) }
else if (op2str(inst) == "SRLI") { regwrite(rd(inst), rshift(regread(rs1(inst)), shamt(inst))) }
else if (op2str(inst) == "SRAI") { regwrite(rd(inst), signextend(and(rshift(regread(rs1(inst)), shamt(inst)), 0xffffffff), 31-shamt(inst))) }
else if (op2str(inst) == "LUI") {regwrite(rd(inst), immU(inst)) }
else if (op2str(inst) == "AUIPC") {regwrite(rd(inst), immU(inst) + PC)}
# integer register-register operations
else if (op2str(inst) == "ADD") {regwrite(rd(inst), dec2two(two2dec(regread(rs1(inst))) + two2dec(regread(rs2(inst))))) }
else if (op2str(inst) == "SUB") {regwrite(rd(inst), dec2two(two2dec(regread(rs1(inst))) - two2dec(regread(rs2(inst))))) }
else if (op2str(inst) == "SLT") {regwrite(rd(inst), two2dec(regread(rs1(inst))) < two2dec(regread(rs2(inst)))) }
else if (op2str(inst) == "SLTU") {regwrite(rd(inst), regread(rs1(inst)) < regread(rs2(inst))) }
else if (op2str(inst) == "AND") { regwrite(rd(inst), and(regread(rs1(inst)), regread(rs2(inst)))) }
else if (op2str(inst) == "OR") { regwrite(rd(inst), or(regread(rs1(inst)), regread(rs2(inst)))) }
else if (op2str(inst) == "XOR") { regwrite(rd(inst), xor(regread(rs1(inst)), regread(rs2(inst)))) }
else if (op2str(inst) == "SLL") { regwrite(rd(inst), lshift(regread(rs1(inst)), regread(rs2((inst))))) }
else if (op2str(inst) == "SRL") { regwrite(rd(inst), rshift(regread(rs1(inst)), regread(rs2(inst)))) }
else if (op2str(inst) == "SRA") { regwrite(rd(inst), signextend(and(rshift(regread(rs1(inst)), regread(rs2(inst))), 0xffffffff), 31-regread(rs2(inst)))) }
# unconditional jumps
else if (op2str(inst) == "JAL") { regwrite(rd(inst), PC+4) ; PC_NEXT = PC + two2dec(lshift(immJ(inst), 1))}
else if (op2str(inst) == "JALR") { regwrite(rd(inst), PC+4) ; PC_NEXT = and(two2dec(immI(inst)) + two2dec(regread(rs1(inst))), 0xfffffffe) }
# conditional branches
else if (op2str(inst) == "BEQ" ) { if ( regread(rs1(inst)) == regread(rs2(inst)) ) { PC_NEXT = branchtarget } }
else if (op2str(inst) == "BNE" ) { if ( regread(rs1(inst)) != regread(rs2(inst)) ) { PC_NEXT = branchtarget } }
else if (op2str(inst) == "BLT" ) { if (two2dec(regread(rs1(inst))) < two2dec(regread(rs2(inst)))) { PC_NEXT = branchtarget } }
else if (op2str(inst) == "BLTU") { if (regread(rs1(inst)) < regread(rs2(inst))) {PC_NEXT = branchtarget } }
else if (op2str(inst) == "BGE" ) { if (two2dec(regread(rs1(inst))) >= two2dec(regread(rs2(inst)))) { PC_NEXT = branchtarget } }
else if (op2str(inst) == "BGEU") { if (regread(rs1(inst)) >= regread(rs2(inst))) {PC_NEXT = branchtarget } }
# load and store
else if (op2str(inst) == "LW") { regwrite(rd(inst), memread(regread(rs1(inst)) + immI(inst))) }
else if (op2str(inst) == "SW") { memwrite(regread(rs1(inst)) + immS(inst), regread(rs2(inst))) }
PC = PC_NEXT
}
#### DIRECTIVES ###############################################################
BEGIN {
if (mode == "") {
# not overridden with -v
mode="normal"
}
# display all register reads and writes
traceregs=0
# display each instruction executed
traceinst=0
# display memory reads and writes
tracemem=0
# debug label resolve
traceresolves = 0
# "regs" and "mem" used for register and memory state, but not
# initialized explicitly for efficiency, since undefined values have
# a value of 0 in AWK.
# if the PC is not initialized via -v, it should compare with the empty
# string, if it is explicitly 0, this if statement will fail
if (PC == "") {
PC=0
}
if (asmcursor == "") {
asmcursor=0
}
assert_errors = 0
assemble_errors = 0
}
$1 ~ /^[#]/ { next }
$1 == "mode" { mode=$2 ; next }
{
if (mode == "disasm") {
print(disasm(strtonum($1)))
next
} else if (mode == "assemble") {
assemble()
next
}
}
$1 == "traceregs" { traceregs=strtonum($2) ; next }
$1 == "traceinst" { traceinst=strtonum($2) ; next }
$1 == "tracemem" { tracemem=strtonum($2) ; next }
$1 == "traceresolve" { traceresolve=strtonum($2) ; next }
$1 == "poke" { memwrite(strtonum($2), strtonum($3)) ; next }
$1 == "peek" { printf("0x%08x\n", memread(strtonum($2))) ; next }
$1 == "rpoke" { regwrite(strtonum($2), strtonum($3)) ; next }
$1 == "rpeek" { printf("0x%08x\n", regread(strtonum($2))) ; next }
$1 == "pcpeek" { printf("0x%08x\n", PC) ; next }
$1 == "pcpoke" { PC = strtonum($2) ; next }
$1 == "assert" && $2 == "reg" {
assert_val = dec2two(regread(strtonum($3)))
if (assert_val != dec2two(strtonum($4))) {
printf("# ASSERTION FAILURE: register x%d was 0x%08x, should have been 0x%08x\n", strtonum($3), assert_val, strtonum($4))
assert_errors ++
}
}
$1 == "assert" && $2 == "mem" {
assert_val = dec2two(memread(strtonum($3)))
if (assert_val != dec2two(strtonum($4))) {
printf("# ASSERTION FAILURE: memory at 0x%08x was 0x%08x, should have been 0x%08x\n", strtonum($3), assert_val, strtonum($4))
assert_errors ++
}
}
$1 == "step" {
howlong=strtonum($2)
for ( ; howlong > 0; howlong--) {
nextstate()
}
next
}
$1 == "resolve" {
for (resolve_label in unresolved) {
if (traceresolve) {
printf("# resolving label '%s'\n", resolve_label)
}
resolve_addr = unresolved[resolve_label]
resolve_memval = memread(resolve_addr)
if (! (resolve_label in labels)) {
printf("# RESOLVE ERROR: unknown label '%s'\n", resolve_label)
assemble_errors ++
continue
}
if (traceresolve) {
printf("# \tcurrent value: 0x%08x at mem[0x%08x]\n", resolve_memval, resolve_addr)
printf("# \tpacking type: %s\n", type(resolve_memval))
printf("# \tresolves to: 0x%08x\n", labels[resolve_label])
}
if (type(resolve_memval) == "J") {
resolve_memval = or(resolve_memval, packJimm(dec2two(labels[resolve_label] - resolve_addr)))
} else {
resolve_memval = or(resolve_memval, packBimm(dec2two(labels[resolve_label] - resolve_addr)))
}
if (traceresolve) {
printf("# \tupdate memory value to: 0x%08x\n", resolve_memval)
}
memwrite(unresolved[resolve_label], resolve_memval)
delete unresolved[resolve_label]
}
}
$1 == "showregs" {
printf("# register: ")
for (i = 0 ; i < 8 ; i ++) {
printf(" x%-8d ", i)
}
printf("\n# = ")
for (i = 0 ; i < 8 ; i ++) {
printf(" 0x%08x ", regs[i])
}
printf("\n# register: ")
for (i = 8 ; i < 16 ; i ++) {
printf(" x%-8d ", i)
}
printf("\n# = ")
for (i = 8 ; i < 16 ; i ++) {
printf(" 0x%08x ", regs[i])
}
printf("\n# register: ")
for (i = 16 ; i < 24; i ++) {
printf(" x%-8d ", i)
}
printf("\n# = ")
for (i = 16 ; i < 24 ; i ++) {
printf(" 0x%08x ", regs[i])
}
printf("\n# register: ")
for (i = 24; i < 32; i ++) {
printf(" x%-8d ", i)
}
printf("\n# = ")
for (i = 24; i < 32 ; i ++) {
printf(" 0x%08x ", regs[i])
}
printf("\n")
next
}
$1 == "dump" {
printf("# BEGINNING OF RISCV.AWK DUMP\n")
printf("pcpoke 0x%08x\n", PC)
savtraceregs = traceregs
savtracemem = tracemem
traceregs = 0
tracemem = 0
for (addr in regs) {
if (regread(addr) != 0 ) {
printf("rpoke %d 0x%08x\n", addr, regread(addr))
}
}
for (addr in mem) {
if (addr % 4 == 0) {
if (memread(addr) != 0) {
printf("poke 0x%08x 0x%08x # %s\n", addr, memread(addr), disasm(memread(addr)))
}
}
}
printf("# END OF RISCV.AWK DUMP\n")
traceregs = savtraceregs
tracemem = savtracemem
next
}
# these operations expose some of our internal functions to assist in debugging
# and automated testing
$1 == "debug" && $2 == "funct7" { printf("0x%08x\n", funct7(strtonum($3))); next }
$1 == "debug" && $2 == "rs2" { printf("0x%08x\n", rs2(strtonum($3))); next }
$1 == "debug" && $2 == "rs1" { printf("0x%08x\n", rs1(strtonum($3))); next }
$1 == "debug" && $2 == "funct3" { printf("0x%08x\n", funct3(strtonum($3))); next }
$1 == "debug" && $2 == "rd" { printf("0x%08x\n", rd(strtonum($3))); next }
$1 == "debug" && $2 == "opcode" { printf("0x%08x\n", opcode(strtonum($3))); next }
$1 == "debug" && $2 == "immI" { printf("0x%08x\n", immI(strtonum($3))); next }
$1 == "debug" && $2 == "immU" { printf("0x%08x\n", immU(strtonum($3))); next }
$1 == "debug" && $2 == "shamt" { printf("0x%08x\n", shamt(strtonum($3))); next }
$1 == "debug" && $2 == "arith" { printf("0x%08x\n", arith(strtonum($3))); next }
$1 == "debug" && $2 == "immB" { printf("0x%08x\n", immB(strtonum($3))); next }
$1 == "debug" && $2 == "immJ" { printf("0x%08x\n", immJ(strtonum($3))); next }
$1 == "debug" && $2 == "immS" { printf("0x%08x\n", immS(strtonum($3))); next }
$1 == "debug" && $2 == "disasm" { printf("%s\n", disasm(strtonum($3))); next }
END {
exit(assert_errors + assemble_errors)
}