~demindiro/dynasm_experiments

07c47bdcebbbaa480cb7ced0718bddf49a1fe4d2 — David Hoppenbrouwers 1 year, 7 months ago
Initial working JIT & interpreter

Conclusions at time of writing: JIT is _always_ worth it. Even if it is
not much faster it saves massively on executed instructions (~5.6x with
loop.ft) and reduces load on the front-end, which is beneficial on
systems with SMT.
9 files changed, 384 insertions(+), 0 deletions(-)

A .gitignore
A Cargo.lock
A Cargo.toml
A README.md
A bench.sh
A hello.ft
A loop.ft
A rustfmt.toml
A src/main.rs
A  => .gitignore +1 -0
@@ 1,1 @@
/target

A  => Cargo.lock +134 -0
@@ 1,134 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3

[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"

[[package]]
name = "byteorder"
version = "1.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610"

[[package]]
name = "dynasm"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47b1801e630bd336d0bbbdbf814de6cc749c9a400c7e3d995e6adfd455d0c83c"
dependencies = [
 "bitflags",
 "byteorder",
 "lazy_static",
 "proc-macro-error",
 "proc-macro2",
 "quote",
 "syn",
]

[[package]]
name = "dynasm_experiments"
version = "0.1.0"
dependencies = [
 "dynasmrt",
]

[[package]]
name = "dynasmrt"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d428afc93ad288f6dffc1fa5f4a78201ad2eec33c5a522e51c181009eb09061"
dependencies = [
 "byteorder",
 "dynasm",
 "memmap2",
]

[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"

[[package]]
name = "libc"
version = "0.2.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0005d08a8f7b65fb8073cb697aa0b12b631ed251ce73d862ce50eeb52ce3b50"

[[package]]
name = "memmap2"
version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fe3179b85e1fd8b14447cbebadb75e45a1002f541b925f0bfec366d56a81c56d"
dependencies = [
 "libc",
]

[[package]]
name = "proc-macro-error"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
dependencies = [
 "proc-macro-error-attr",
 "proc-macro2",
 "quote",
 "syn",
 "version_check",
]

[[package]]
name = "proc-macro-error-attr"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
dependencies = [
 "proc-macro2",
 "quote",
 "version_check",
]

[[package]]
name = "proc-macro2"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029"
dependencies = [
 "unicode-xid",
]

[[package]]
name = "quote"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "864d3e96a899863136fc6e99f3d7cae289dafe43bf2c5ac19b70df7210c0a145"
dependencies = [
 "proc-macro2",
]

[[package]]
name = "syn"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a65b3f4ffa0092e9887669db0eae07941f023991ab58ea44da8fe8e2d511c6b"
dependencies = [
 "proc-macro2",
 "quote",
 "unicode-xid",
]

[[package]]
name = "unicode-xid"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"

[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"

A  => Cargo.toml +9 -0
@@ 1,9 @@
[package]
name = "dynasm_experiments"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
dynasmrt = "*"

A  => README.md +25 -0
@@ 1,25 @@
# JIT compiler & interpreter for ´

This program is a quick experiment to (naïvely) test the performance difference
between interpreters and JIT compilers for simple programs.

## ´

´ is a derivative of [\`]. It is modified such that finite loops are possible.

Instructions still use backticks since forward ticks are not ASCII and hence annoying.

## Instructions

A and B are integer constants, \[X\] denotes an address dereference,  V is the last assigned
value, P is the instruction pointer.

| Syntax |       Function      |
| ------ | ------------------- |
| A`+B   | [A] += B, V = [A]   |
| A`B    | [A] += [B], V = [A] |
| +A`+B  | [A] == V ? P += B   |
| +A`B   | [A] == V ? P += [B] |
| other  | ignored             |

[\`]: https://esolangs.org/wiki/%60

A  => bench.sh +4 -0
@@ 1,4 @@
#!/usr/bin/env bash
cargo b --release || exit $?
perf stat ./target/release/dynasm_experiments interpreter $1 || exit $?
perf stat ./target/release/dynasm_experiments jit $1 || exit $?

A  => hello.ft +5 -0
@@ 1,5 @@
0`+72
0`+29
0`+7
0`+0
0`+3

A  => loop.ft +3 -0
@@ 1,3 @@
1`+200000000
1`+-1
+0`+-1

A  => rustfmt.toml +1 -0
@@ 1,1 @@
hard_tabs=true

A  => src/main.rs +202 -0
@@ 1,202 @@
//! # JIT compiler & interpreter for ´
//!
//! This program is a quick experiment to (naïvely) test the performance difference
//! between interpreters and JIT compilers for simple programs.
//!
//! ## ´
//!
//! ´ is a derivative of [\`]. It is modified such that finite loops are possible.
//!
//! Instructions still use backticks since forward ticks are not ASCII and hence annoying.
//!
//! ## Instructions
//!
//! A and B are integer constants, \[X\] denotes an address dereference,  V is the last assigned
//! value, P is the instruction pointer.
//!
//! | Syntax |       Function      |
//! | ------ | ------------------- |
//! | A`+B   | [A] += B, V = [A]   |
//! | A`B    | [A] += [B], V = [A] |
//! | +A`+B  | [A] == V ? P += B   |
//! | +A`B   | [A] == V ? P += [B] |
//! | other  | ignored             |
//!
//! [\`]: https://esolangs.org/wiki/%60

use dynasmrt::{dynasm, DynasmApi, DynasmLabelApi};

pub enum Op {
	SetV(isize, isize),
	SetA(isize, isize),
	JmpV(isize, isize),
	JmpA(isize, isize),
}

fn parse_int(code: &mut Vec<u8>) -> Result<isize, ()> {
	let c = code.pop();
	let inv = if c == Some(b'-') {
		true
	} else {
		c.map(|c| code.push(c));
		false
	};
	let mut n = 0;
	while let Some(c) = code.pop() {
		if c < b'0' || b'9' < c {
			code.push(c);
			return Ok(if inv { -n } else { n });
		}
		n *= 10;
		n += (c - b'0') as isize;
	}
	Ok(if inv { -n } else { n })
}

fn parse(mut code: Vec<u8>) -> Vec<Op> {
	let mut ops = Vec::new();
	code.reverse();
	while let Some(b) = code.pop() {
		match b {
			b'+' => {
				if let Ok(a) = parse_int(&mut code) {
					if code.pop() != Some(b'`') {
						continue;
					}
					let chr = code.pop();
					let addr = chr != Some(b'+');
					if addr {
						chr.map(|c| code.push(c));
					}
					if let Ok(b) = parse_int(&mut code) {
						ops.push(if addr { Op::JmpA(a, b) } else { Op::JmpV(a, b) });
					}
				}
			}
			_ if b'-' == b || b'0' <= b && b <= b'9' => {
				code.push(b);
				if let Ok(a) = parse_int(&mut code) {
					if code.pop() != Some(b'`') {
						continue;
					}
					let chr = code.pop();
					let addr = chr != Some(b'+');
					if addr {
						chr.map(|c| code.push(c));
					}
					if let Ok(b) = parse_int(&mut code) {
						ops.push(if addr { Op::SetA(a, b) } else { Op::SetV(a, b) });
					}
				}
			}
			_ => (),
		}
	}
	ops
}

extern "C" fn print(v: isize) {
	use std::io::Write;
	let mut c = [0; 4];
	let c = char::from_u32(v as u32)
		.unwrap_or('\u{fffd}')
		.encode_utf8(&mut c);
	let _ = std::io::stdout().write(c.as_bytes());
}

fn run(ops: Vec<Op>) {
	let ops = &ops[..]; // This is faster. Don't ask me why.
	let mut i = 0;
	let mut tape = [0; 0x10000];
	let mut v = 0;
	unsafe {
		while let Some(op) = ops.get(i) {
			i += 1;
			let (a, b) = match op {
				&Op::SetV(a, b) => (a, b),
				&Op::SetA(a, b) => (a, *tape.get_unchecked(b as usize)),
				&Op::JmpV(a, b) => {
					if a != v {
						i += b as usize - 1
					}
					continue;
				}
				&Op::JmpA(a, b) => {
					if a != v {
						i += *tape.get_unchecked(b as usize) as usize - 1
					}
					continue;
				}
			};
			*tape.get_unchecked_mut(a as usize) += b;
			v = *tape.get_unchecked(a as usize);
			(a == 0).then(|| print(v));
		}
	}
}

fn jit(ops: Vec<Op>) {
	let mut jit = dynasmrt::x64::Assembler::new().unwrap();
	let labels = core::iter::repeat_with(|| jit.new_dynamic_label())
		.take(ops.len())
		.collect::<Box<_>>();
	dynasm!(jit
		; push rbx
		; mov rbx, rdi
	);
	for (i, (op, &lbl)) in ops.into_iter().zip(labels.iter()).enumerate() {
		match op {
			Op::SetV(a, b) => {
				dynasm!(jit
					; =>lbl
					; mov rdi, QWORD b.try_into().unwrap()
					; add rdi, [rbx + (a * 8).try_into().unwrap()]
					; mov [rbx + (a * 8).try_into().unwrap()], rdi
				);
				(a == 0).then(|| dynasm!(jit ; mov rax, QWORD print as _ ; call rax));
			}
			Op::SetA(a, b) => {
				dynasm!(jit
					; =>lbl
					; mov rdi, [rbx + (b * 8).try_into().unwrap()]
					; add rdi, [rbx + (a * 8).try_into().unwrap()]
					; mov [rbx + (a * 8).try_into().unwrap()], rdi
				);
				(a == 0).then(|| dynasm!(jit ; mov rax, QWORD print as _ ; call rax));
			}
			Op::JmpV(a, b) => {
				dynasm!(jit
					; =>lbl
					; mov rax, [rbx + (a * 8).try_into().unwrap()]
					; cmp rdi, rax
					; jne =>labels[i - b as usize - 2]
				);
			}
			Op::JmpA(a, b) => {
				todo!()
			}
		}
	}
	dynasm!(jit
		; pop rbx
		; ret
	);
	let f = jit.finalize().unwrap();
	let f: extern "C" fn(*mut isize) =
		unsafe { core::mem::transmute(f.ptr(dynasmrt::AssemblyOffset(0))) };
	f([0; 0x10000].as_mut_ptr());
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
	let mut args = std::env::args().skip(1);
	let mode = args.next().ok_or("usage: <interpreter|jit> <file>")?;
	let f = args.next().ok_or("usage: <interpreter|jit> <file>")?;
	let f = std::fs::read(f)?;
	let f = parse(f);
	match &*mode {
		"interpreter" => run(f),
		"jit" => jit(f),
		_ => Err("usage: <interpreter|jit> <file>")?,
	}
	Ok(())
}