~charles/awk-riscv

a2eb961e4d1a26d332e6e21d404b241535343dbd — Charles Daniels 1 year, 2 months ago
initial commit
A  => README.md +9 -0
@@ 1,9 @@
# Tests

Tests can be run using the `.run_tests.sh` script.

## Unit tests

Unit tests organized as a directory of TSV files in the `test/unit/` folder.
The first column of each file is an input to riscv.awk, and the second column
is the expected output. Each TSV file corresponds to one test case.

A  => fibo.hex +21 -0
@@ 1,21 @@
0xf0002573
0x00100593
0x00000613
0x00100693
0x00000713
0x00a6ce63
0xf0201073
0xf0301073
0x02c0006f
0xf0269073
0xf0369073
0x0200006f
0x00d60733
0x00d00633
0x00e006b3
0x00158593
0xf0269073
0xf0359073
0xfea5c4e3
0x00000013
0xffdff06f

A  => instructions.txt +56 -0
@@ 1,56 @@
LUI	RV32I			0110111	U
AUIPC	RV32I			0010111	U
JAL	RV32I			1101111	J
JALR	RV32I		000	1100111	I
BEQ	RV32I		000	1100011	B
BNE	RV32I		001	1100011	B
BLT	RV32I		100	1100011	B
BGE	RV32I		101	1100011	B
BLTU	RV32I		110	1100011	B
BGEU	RV32I		111	1100011	B
LB	RV32I		000	0000011	I
LH	RV32I		001	0000011	I
LW	RV32I		010	0000011	I
LBU	RV32I		100	0000011	I
LHU	RV32I		101	0000011	I
SB	RV32I		000	0100011	S
SH	RV32I		001	0100011	S
SW	RV32I		010	0100011	S
ADDI	RV32I		000	0010011	I
SLTI	RV32I		010	0010011	I
SLTIU	RV32I		011	0010011	I
XORI	RV32I		100	0010011	I
ORI	RV32I		110	0010011	I
ANDI	RV32I		111	0010011	I
SLLI	RV32I	0000000	001	0010011	R
SRLI	RV32I	0000000	101	0010011	R
SRAI	RV32I	0100000	101	0010011	R
ADD	RV32I	0000000	000	0110011	R
SUB	RV32I	0100000	000	0110011	R
SLL	RV32I	0000000	001	0110011	R
SLT	RV32I	0000000	010	0110011	R
SLTU	RV32I	0000000	011	0110011	R
XOR	RV32I	0000000	100	0110011	R
SRL	RV32I	0000000	101	0110011	R
SRA	RV32I	0100000	101	0110011	R
OR	RV32I	0000000	110	0110011	R
AND	RV32I	0000000	111	0110011	R
FENCE	RV32I		000	0001111	I
FENCE.I	RV32I		001	0001111	I
ECALL	RV32I		000	1110011	I
EBREAK	RV32I		000	1110011	I
CSRRW	RV32I		001	1110011	I
CSRRS	RV32I		010	1110011	I
CSRRC	RV32I		011	1110011	I
CSRRWI	RV32I		101	1110011	I
CSRRSI	RV32I		110	1110011	I
CSRRCI	RV32I		111	1110011	I
MUL	RV32M	0000001	000	0110011	R
MULH	RV32M	0000001	001	0110011	R
MULHSU	RV32M	0000001	010	0110011	R
MULHU	RV32M	0000001	011	0110011	R
DIV	RV32M	0000001	100	0110011	R
DIVU	RV32M	0000001	101	0110011	R
REM	RV32M	0000001	110	0110011	R
REMU	RV32M	0000001	111	0110011	R


A  => make_op2str.py +23 -0
@@ 1,23 @@
#!/usr/bin/env python3

# This script reads instructions.txt and generates a list of AWK if statements
# for converting the opcode to a string.

import sys

for line in sys.stdin:
    line = [s.strip() for s in line.strip().split()]
    if len(line) < 1:
        continue
    t=line[-1]
    o=int(line[-2], 2)
    m=line[0]
    if len(line) == 4:
        print("if (opcode(v) == 0x{:x}) {{ return \"{}\" }}".format(o, m))
    elif len(line) == 6:
        funct7=int(line[2], 2)
        funct3=int(line[3], 2)
        print("if ((opcode(v) == 0x{:x}) && (funct7(v) == 0x{:x}) && (funct3(v) == 0x{:x})) {{return \"{}\"}}".format(o, funct7, funct3, m))
    elif len(line) == 5:
        funct3=int(line[2], 2)
        print("if ((opcode(v) == 0x{:x}) && (funct3(v) == 0x{:x})) {{return \"{}\"}}".format(o, funct3, m))

A  => make_type.py +18 -0
@@ 1,18 @@
#!/usr/bin/env python3

# This script reads instructions.txt and generates a list of AWK if statements
# for checking the return type.

import sys

seen = []
for line in sys.stdin:
    line = [s.strip() for s in line.strip().split()]
    if len(line) < 1:
        continue
    t=line[-1]
    o=int(line[-2], 2)
    if o in seen:
        continue
    seen.append(o)
    print("if (opcode(v) == 0x{:x}) {{ return \"{}\" }}".format(o, t))

A  => riscv.awk +167 -0
@@ 1,167 @@
# Implementation of RV32I
# (https://riscv.org//wp-content/uploads/2017/05/riscv-spec-v2.2.pdf)
# in AWK.
#
# Copyright 2020 Charles A. Daniels

# sign extend at the given MSB
function signextend(v, i,            temp) {
	temp = v

	# don't need to sign extend
	if (and(v, lshift(1, i)) == 0) { return v }

	for (j = i ; j < 32  ; j++) {
		temp = or(temp, lshift(1, j))
	}
	return temp
}

# masks to pull relevant values out of an instruction
function funct7(v) { return rshift(and(v, 0xfe000000), 25) }
function rs2(v)    { return rshift(and(v, 0x01f00000), 20) }
function rs1(v)    { return rshift(and(v, 0x000f8000), 15) }
function funct3(v) { return rshift(and(v, 0x00007000), 12) }
function rd(v)     { return rshift(and(v, 0x00000f80),  7) }
function opcode(v) { return rshift(and(v, 0x0000007f),  0) }
function immI(v)   { return rshift(and(v, 0xfff00000), 20) }
function immU(v)   { return rshift(and(v, 0xfffff000),  0) }
function immS(v)   { return or( \
	rshift(and(v, 0xfe000000), 20), \
	rshift(and(v, 0x0000001f),  5))}
function immB(v)   { return signextend(or( \
	rshift(and(v, 0x80000000), 31-12), \
	rshift(and(v, 0x7e000000), 25-5),  \
	lshift(and(v, 0x00000080), 11-7),  \
	rshift(and(v, 0x00000f00), 8)), 12) }
function immJ(v)   { return signextend(or( \
	rshift(and(v, 0x80000000), 31-19), \
	rshift(and(v, 0x3fe00000), 21),  \
	rshift(and(v, 0x00080000), 19-10), \
	rshift(and(v, 0x0000ff00), 0)), 20)}
# returns one of "R", "I", "S", "B", "U", "J"
#
# return "E" on error
function type(v) {
	if (opcode(v) == 0x37) { return "U" }
	if (opcode(v) == 0x17) { return "U" }
	if (opcode(v) == 0x6f) { return "J" }
	if (opcode(v) == 0x67) { return "I" }
	if (opcode(v) == 0x63) { return "B" }
	if (opcode(v) == 0x3) { return "I" }
	if (opcode(v) == 0x23) { return "S" }
	if (opcode(v) == 0x13) { return "I" }
	if (opcode(v) == 0x33) { return "R" }
	if (opcode(v) == 0xf) { return "I" }
	if (opcode(v) == 0x73) { return "I" }

	printf("don't know type of opcode 0x%x of instruction 0x%x", opcode(v), v) > "/dev/stderr"

	return "E"
}


function op2str(v) {
	if (opcode(v) == 0x37) { return "LUI" }
	if (opcode(v) == 0x17) { return "AUIPC" }
	if (opcode(v) == 0x6f) { return "JAL" }
	if ((opcode(v) == 0x67) && (funct3(v) == 0x0)) {return "JALR"}
	if ((opcode(v) == 0x63) && (funct3(v) == 0x0)) {return "BEQ"}
	if ((opcode(v) == 0x63) && (funct3(v) == 0x1)) {return "BNE"}
	if ((opcode(v) == 0x63) && (funct3(v) == 0x4)) {return "BLT"}
	if ((opcode(v) == 0x63) && (funct3(v) == 0x5)) {return "BGE"}
	if ((opcode(v) == 0x63) && (funct3(v) == 0x6)) {return "BLTU"}
	if ((opcode(v) == 0x63) && (funct3(v) == 0x7)) {return "BGEU"}
	if ((opcode(v) == 0x3) && (funct3(v) == 0x0)) {return "LB"}
	if ((opcode(v) == 0x3) && (funct3(v) == 0x1)) {return "LH"}
	if ((opcode(v) == 0x3) && (funct3(v) == 0x2)) {return "LW"}
	if ((opcode(v) == 0x3) && (funct3(v) == 0x4)) {return "LBU"}
	if ((opcode(v) == 0x3) && (funct3(v) == 0x5)) {return "LHU"}
	if ((opcode(v) == 0x23) && (funct3(v) == 0x0)) {return "SB"}
	if ((opcode(v) == 0x23) && (funct3(v) == 0x1)) {return "SH"}
	if ((opcode(v) == 0x23) && (funct3(v) == 0x2)) {return "SW"}
	if ((opcode(v) == 0x13) && (funct3(v) == 0x0)) {return "ADDI"}
	if ((opcode(v) == 0x13) && (funct3(v) == 0x2)) {return "SLTI"}
	if ((opcode(v) == 0x13) && (funct3(v) == 0x3)) {return "SLTIU"}
	if ((opcode(v) == 0x13) && (funct3(v) == 0x4)) {return "XORI"}
	if ((opcode(v) == 0x13) && (funct3(v) == 0x6)) {return "ORI"}
	if ((opcode(v) == 0x13) && (funct3(v) == 0x7)) {return "ANDI"}
	if ((opcode(v) == 0x13) && (funct7(v) == 0x0) && (funct3(v) == 0x1)) {return "SLLI"}
	if ((opcode(v) == 0x13) && (funct7(v) == 0x0) && (funct3(v) == 0x5)) {return "SRLI"}
	if ((opcode(v) == 0x13) && (funct7(v) == 0x20) && (funct3(v) == 0x5)) {return "SRAI"}
	if ((opcode(v) == 0x33) && (funct7(v) == 0x0) && (funct3(v) == 0x0)) {return "ADD"}
	if ((opcode(v) == 0x33) && (funct7(v) == 0x20) && (funct3(v) == 0x0)) {return "SUB"}
	if ((opcode(v) == 0x33) && (funct7(v) == 0x0) && (funct3(v) == 0x1)) {return "SLL"}
	if ((opcode(v) == 0x33) && (funct7(v) == 0x0) && (funct3(v) == 0x2)) {return "SLT"}
	if ((opcode(v) == 0x33) && (funct7(v) == 0x0) && (funct3(v) == 0x3)) {return "SLTU"}
	if ((opcode(v) == 0x33) && (funct7(v) == 0x0) && (funct3(v) == 0x4)) {return "XOR"}
	if ((opcode(v) == 0x33) && (funct7(v) == 0x0) && (funct3(v) == 0x5)) {return "SRL"}
	if ((opcode(v) == 0x33) && (funct7(v) == 0x20) && (funct3(v) == 0x5)) {return "SRA"}
	if ((opcode(v) == 0x33) && (funct7(v) == 0x0) && (funct3(v) == 0x6)) {return "OR"}
	if ((opcode(v) == 0x33) && (funct7(v) == 0x0) && (funct3(v) == 0x7)) {return "AND"}
	if ((opcode(v) == 0xf) && (funct3(v) == 0x0)) {return "FENCE"}
	if ((opcode(v) == 0xf) && (funct3(v) == 0x1)) {return "FENCE.I"}
	if ((opcode(v) == 0x73) && (funct3(v) == 0x0)) {return "ECALL"}
	if ((opcode(v) == 0x73) && (funct3(v) == 0x0)) {return "EBREAK"}
	if ((opcode(v) == 0x73) && (funct3(v) == 0x1)) {return "CSRRW"}
	if ((opcode(v) == 0x73) && (funct3(v) == 0x2)) {return "CSRRS"}
	if ((opcode(v) == 0x73) && (funct3(v) == 0x3)) {return "CSRRC"}
	if ((opcode(v) == 0x73) && (funct3(v) == 0x5)) {return "CSRRWI"}
	if ((opcode(v) == 0x73) && (funct3(v) == 0x6)) {return "CSRRSI"}
	if ((opcode(v) == 0x73) && (funct3(v) == 0x7)) {return "CSRRCI"}
	if ((opcode(v) == 0x33) && (funct7(v) == 0x1) && (funct3(v) == 0x0)) {return "MUL"}
	if ((opcode(v) == 0x33) && (funct7(v) == 0x1) && (funct3(v) == 0x1)) {return "MULH"}
	if ((opcode(v) == 0x33) && (funct7(v) == 0x1) && (funct3(v) == 0x2)) {return "MULHSU"}
	if ((opcode(v) == 0x33) && (funct7(v) == 0x1) && (funct3(v) == 0x3)) {return "MULHU"}
	if ((opcode(v) == 0x33) && (funct7(v) == 0x1) && (funct3(v) == 0x4)) {return "DIV"}
	if ((opcode(v) == 0x33) && (funct7(v) == 0x1) && (funct3(v) == 0x5)) {return "DIVU"}
	if ((opcode(v) == 0x33) && (funct7(v) == 0x1) && (funct3(v) == 0x6)) {return "REM"}
	if ((opcode(v) == 0x33) && (funct7(v) == 0x1) && (funct3(v) == 0x7)) {return "REMU"}

	printf("don't know mnemonic of instruction 0x%x", v) > "/dev/stderr"
	return "ERROR"
}

# return a very rudimentary disassembly of the instruction v
function disasm(v) {
	if (type(v) == "R") {
		return sprintf("0x%08x: %s x%d x%d x%d", v, op2str(v), rd(v), rs1(v), rs2(v))
	}

	if (type(v) == "I") {
		return sprintf("0x%08x: %s x%d x%d %d", v, op2str(v), rd(v), rs1(v), immI(v))
	}

	if (type(v) == "S") {
		return sprintf("0x%08x: %s x%d x%d %d", v, op2str(v), rs1(v), rs2(v), immS(v))
	}

	if (type(v) == "B") {
		return sprintf("0x%08x: %s x%d x%d 0x%x", v, op2str(v), rs1(v), rs2(v), immB(v))
	}

	if (type(v) == "U") {
		return sprintf("0x%08x: %s x%d 0x%x", v, op2str(v), rd(v), immU(v))
	}

	if (type(v) == "J") {
		return sprintf("0x%08x: %s x%d 0x%x", v, op2str(v), rd(v), immJ(v))
	}
	
	return sprintf("0x%08x:", v)
}

# these operations expose some of our internal functions to assist in debugging
# and automated testing
$1 == "debug" && $2 == "funct7" { printf("0x%08x\n", funct7(strtonum($3))) }
$1 == "debug" && $2 == "rs2" { printf("0x%08x\n", rs2(strtonum($3))) }
$1 == "debug" && $2 == "rs1" { printf("0x%08x\n", rs1(strtonum($3))) }
$1 == "debug" && $2 == "funct3" { printf("0x%08x\n", funct3(strtonum($3))) }
$1 == "debug" && $2 == "rd" { printf("0x%08x\n", rd(strtonum($3))) }
$1 == "debug" && $2 == "opcode" { printf("0x%08x\n", opcode(strtonum($3))) }
$1 == "debug" && $2 == "immI" { printf("0x%08x\n", immI(strtonum($3))) }
$1 == "debug" && $2 == "immU" { printf("0x%08x\n", immU(strtonum($3))) }
$1 == "debug" && $2 == "immS" { printf("0x%08x\n", immS(strtonum($3))) }
$1 == "debug" && $2 == "immB" { printf("0x%08x\n", immB(strtonum($3))) }
$1 == "debug" && $2 == "immJ" { printf("0x%08x\n", immJ(strtonum($3))) }
$1 == "debug" && $2 == "signextend" { printf("0x%08x\n", signextend(strtonum($3))) }

A  => run_tests.sh +29 -0
@@ 1,29 @@
#!/bin/sh

cd "$(dirname "$0")"

echo '#### running unit tests...'
for f in tests/unit/*.tsv ; do
	echo "------ test case $(basename "$f" .tsv)"
	while read -r line ; do
		# skip lines with the wrong number of fields
		if [ "$(echo "$line" | awk -F '\t' '{print(NF)}' )" != 2 ] ; then
			continue
		fi

		# skip comments
		if echo "$line" | egrep -q '^[ \t]*[#]' ; then
			continue
		fi

		input="$(echo "$line" | cut -f 1)"
		expect="$(echo "$line" | cut -f 2)"

		actual="$(echo "$input" | awk -f riscv.awk)"
		if [ "$actual" != "$expect" ] ; then
			echo "test failed, input was '$input', expected '$expect' but got '$actual'"
			exit 1
		fi

	done < "$f"
done

A  => tests/unit/funct3.tsv +34 -0
@@ 1,34 @@
# tests func3

debug funct3 0x80000000	0x00000000
debug funct3 0x40000000	0x00000000
debug funct3 0x20000000	0x00000000
debug funct3 0x10000000	0x00000000
debug funct3 0x08000000	0x00000000
debug funct3 0x04000000	0x00000000
debug funct3 0x02000000	0x00000000
debug funct3 0x01000000	0x00000000
debug funct3 0x00800000	0x00000000
debug funct3 0x00400000	0x00000000
debug funct3 0x00200000	0x00000000
debug funct3 0x00100000	0x00000000
debug funct3 0x00080000	0x00000000
debug funct3 0x00040000	0x00000000
debug funct3 0x00020000	0x00000000
debug funct3 0x00010000	0x00000000
debug funct3 0x00008000	0x00000000
debug funct3 0x00004000	0x00000004
debug funct3 0x00002000	0x00000002
debug funct3 0x00001000	0x00000001
debug funct3 0x00000800	0x00000000
debug funct3 0x00000400	0x00000000
debug funct3 0x00000200	0x00000000
debug funct3 0x00000100	0x00000000
debug funct3 0x00000080	0x00000000
debug funct3 0x00000040	0x00000000
debug funct3 0x00000020	0x00000000
debug funct3 0x00000010	0x00000000
debug funct3 0x00000008	0x00000000
debug funct3 0x00000004	0x00000000
debug funct3 0x00000002	0x00000000
debug funct3 0x00000001	0x00000000

A  => tests/unit/funct7.tsv +34 -0
@@ 1,34 @@
# tests func7

debug funct7 0x80000000	0x00000040
debug funct7 0x40000000	0x00000020
debug funct7 0x20000000	0x00000010
debug funct7 0x10000000	0x00000008
debug funct7 0x08000000	0x00000004
debug funct7 0x04000000	0x00000002
debug funct7 0x02000000	0x00000001
debug funct7 0x01000000	0x00000000
debug funct7 0x00800000	0x00000000
debug funct7 0x00400000	0x00000000
debug funct7 0x00200000	0x00000000
debug funct7 0x00100000	0x00000000
debug funct7 0x00080000	0x00000000
debug funct7 0x00040000	0x00000000
debug funct7 0x00020000	0x00000000
debug funct7 0x00010000	0x00000000
debug funct7 0x00008000	0x00000000
debug funct7 0x00004000	0x00000000
debug funct7 0x00002000	0x00000000
debug funct7 0x00001000	0x00000000
debug funct7 0x00000800	0x00000000
debug funct7 0x00000400	0x00000000
debug funct7 0x00000200	0x00000000
debug funct7 0x00000100	0x00000000
debug funct7 0x00000080	0x00000000
debug funct7 0x00000040	0x00000000
debug funct7 0x00000020	0x00000000
debug funct7 0x00000010	0x00000000
debug funct7 0x00000008	0x00000000
debug funct7 0x00000004	0x00000000
debug funct7 0x00000002	0x00000000
debug funct7 0x00000001	0x00000000

A  => tests/unit/rd.tsv +34 -0
@@ 1,34 @@
# tests rd

debug rd 0x80000000	0x00000000
debug rd 0x40000000	0x00000000
debug rd 0x20000000	0x00000000
debug rd 0x10000000	0x00000000
debug rd 0x08000000	0x00000000
debug rd 0x04000000	0x00000000
debug rd 0x02000000	0x00000000
debug rd 0x01000000	0x00000000
debug rd 0x00800000	0x00000000
debug rd 0x00400000	0x00000000
debug rd 0x00200000	0x00000000
debug rd 0x00100000	0x00000000
debug rd 0x00080000	0x00000000
debug rd 0x00040000	0x00000000
debug rd 0x00020000	0x00000000
debug rd 0x00010000	0x00000000
debug rd 0x00008000	0x00000000
debug rd 0x00004000	0x00000000
debug rd 0x00002000	0x00000000
debug rd 0x00001000	0x00000000
debug rd 0x00000800	0x00000010
debug rd 0x00000400	0x00000008
debug rd 0x00000200	0x00000004
debug rd 0x00000100	0x00000002
debug rd 0x00000080	0x00000001
debug rd 0x00000040	0x00000000
debug rd 0x00000020	0x00000000
debug rd 0x00000010	0x00000000
debug rd 0x00000008	0x00000000
debug rd 0x00000004	0x00000000
debug rd 0x00000002	0x00000000
debug rd 0x00000001	0x00000000

A  => tests/unit/rs1.tsv +34 -0
@@ 1,34 @@
# tests rs1

debug rs1 0x80000000	0x00000000
debug rs1 0x40000000	0x00000000
debug rs1 0x20000000	0x00000000
debug rs1 0x10000000	0x00000000
debug rs1 0x08000000	0x00000000
debug rs1 0x04000000	0x00000000
debug rs1 0x02000000	0x00000000
debug rs1 0x01000000	0x00000000
debug rs1 0x00800000	0x00000000
debug rs1 0x00400000	0x00000000
debug rs1 0x00200000	0x00000000
debug rs1 0x00100000	0x00000000
debug rs1 0x00080000	0x00000010
debug rs1 0x00040000	0x00000008
debug rs1 0x00020000	0x00000004
debug rs1 0x00010000	0x00000002
debug rs1 0x00008000	0x00000001
debug rs1 0x00004000	0x00000000
debug rs1 0x00002000	0x00000000
debug rs1 0x00001000	0x00000000
debug rs1 0x00000800	0x00000000
debug rs1 0x00000400	0x00000000
debug rs1 0x00000200	0x00000000
debug rs1 0x00000100	0x00000000
debug rs1 0x00000080	0x00000000
debug rs1 0x00000040	0x00000000
debug rs1 0x00000020	0x00000000
debug rs1 0x00000010	0x00000000
debug rs1 0x00000008	0x00000000
debug rs1 0x00000004	0x00000000
debug rs1 0x00000002	0x00000000
debug rs1 0x00000001	0x00000000