From 4487809648352f80d257dccb9174b9ea9276c076 Mon Sep 17 00:00:00 2001 From: David Hoppenbrouwers Date: Thu, 27 Jan 2022 14:11:32 +0100 Subject: [PATCH] Add JIT with more optimization Backend stalls are practically gone and it's VERY fast. --- bench.sh | 1 + disasm.sh | 2 + src/main.rs | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 118 insertions(+), 4 deletions(-) create mode 100755 disasm.sh diff --git a/bench.sh b/bench.sh index d791b90..10f19a4 100755 --- a/bench.sh +++ b/bench.sh @@ -2,3 +2,4 @@ cargo b --release || exit $? perf stat ./target/release/dynasm_experiments interpreter $1 || exit $? perf stat ./target/release/dynasm_experiments jit $1 || exit $? +perf stat ./target/release/dynasm_experiments jit2 $1 || exit $? diff --git a/disasm.sh b/disasm.sh new file mode 100755 index 0000000..1eef633 --- /dev/null +++ b/disasm.sh @@ -0,0 +1,2 @@ +#!/bin/sh +exec objdump -D -b binary -Mintel,x86-64 -m i386 $1 diff --git a/src/main.rs b/src/main.rs index 124d675..9f8e835 100644 --- a/src/main.rs +++ b/src/main.rs @@ -25,6 +25,7 @@ //! [\`]: https://esolangs.org/wiki/%60 use dynasmrt::{dynasm, DynasmApi, DynasmLabelApi}; +use std::time::Instant; pub enum Op { SetV(isize, isize), @@ -109,6 +110,7 @@ fn run(ops: Vec) { let mut i = 0; let mut tape = [0; 0x10000]; let mut v = 0; + let t = Instant::now(); unsafe { while let Some(op) = ops.get(i) { i += 1; @@ -133,6 +135,7 @@ fn run(ops: Vec) { (a == 0).then(|| print(v)); } } + eprintln!("{:?}", Instant::now() - t); } fn jit(ops: Vec) { @@ -172,7 +175,7 @@ fn jit(ops: Vec) { ; jne =>labels[i - b as usize - 2] ); } - Op::JmpA(a, b) => { + Op::JmpA(_a, _b) => { todo!() } } @@ -182,21 +185,129 @@ fn jit(ops: Vec) { ; ret ); let f = jit.finalize().unwrap(); + std::fs::write("/tmp/jit.out", &f[..]).unwrap(); let f: extern "C" fn(*mut isize) = unsafe { core::mem::transmute(f.ptr(dynasmrt::AssemblyOffset(0))) }; + let t = Instant::now(); f([0; 0x10000].as_mut_ptr()); + eprintln!("{:?}", Instant::now() - t); +} + +/// JIT with heavier optimization +/// +/// This JIT actually doesn't generate correct code in all cases but w/e. +fn jit2(ops: Vec) { + let mut jit = dynasmrt::x64::Assembler::new().unwrap(); + let labels = core::iter::repeat_with(|| jit.new_dynamic_label()) + .take(ops.len()) + .collect::>(); + dynasm!(jit + ; push rbx + ; push rbp + ; push r15 + ; push r14 + ; push r13 + ; push r12 + ; mov rbx, rdi + ); + + use dynasmrt::{Register, x64::Rq, x64::X64Relocation, Assembler}; + let mut tape2reg = std::collections::HashMap::::new(); + let mut reg2tape = std::collections::HashMap::::new(); + let mut regqueue = std::collections::VecDeque::from([Rq::R12, Rq::R13, Rq::R14, Rq::R15, Rq::RBP]); + + let mut load_tape = |t, jit: &mut Assembler| { + if let Some(r) = tape2reg.get(&t) { + return *r; + } + let r = regqueue.pop_front().unwrap(); + regqueue.push_back(r); + if let Some(i) = reg2tape.remove(&r) { + tape2reg.remove(&i).unwrap(); + dynasm!(jit ; mov [rbx + (i * 8).try_into().unwrap()], Rq(r.code())); + } + dynasm!(jit ; mov Rq(r.code()), QWORD [rbx + (t * 8).try_into().unwrap()]); + tape2reg.insert(t, r); + reg2tape.insert(r, t); + r + }; + let call_print = |jit: &mut Assembler, reg: Rq| { + dynasm!(jit + ; mov rdi, Rq(reg.code()) + ; mov rax, QWORD print as _ + ; call rax + ); + }; + + let mut last_reg = None; + + for (i, (op, &lbl)) in ops.into_iter().zip(labels.iter()).enumerate() { + match op { + Op::SetV(a, b) => { + dynasm!(jit ; =>lbl); + let r = load_tape(a, &mut jit); + if let Ok(b) = i8::try_from(b) { + dynasm!(jit ; add Rq(r.code()), BYTE b); + } else if let Ok(b) = i32::try_from(b) { + dynasm!(jit ; add Rq(r.code()), DWORD b); + } else { + todo!(); + } + dynasm!(jit ; mov [rbx + (a * 8).try_into().unwrap()], Rq(r.code())); + (a == 0).then(|| call_print(&mut jit, r)); + last_reg = Some(r); + } + Op::SetA(a, b) => { + dynasm!(jit ; =>lbl); + let r = load_tape(a, &mut jit); + dynasm!(jit ; add Rq(r.code()), [rbx + (b * 8).try_into().unwrap()]); + dynasm!(jit ; mov [rbx + (a * 8).try_into().unwrap()], Rq(r.code())); + (a == 0).then(|| call_print(&mut jit, r)); + last_reg = Some(r); + } + Op::JmpV(a, b) => { + let a = load_tape(a, &mut jit); + dynasm!(jit + ; =>lbl + ; cmp Rq(a.code()), Rq(last_reg.unwrap().code()) + ; jne =>labels[i - b as usize - 2] + ); + } + Op::JmpA(_a, _b) => { + todo!() + } + } + } + dynasm!(jit + ; pop r12 + ; pop r13 + ; pop r14 + ; pop r15 + ; pop rbp + ; pop rbx + ; ret + ); + let f = jit.finalize().unwrap(); + std::fs::write("/tmp/jit2.out", &f[..]).unwrap(); + let f: extern "C" fn(*mut isize) = + unsafe { core::mem::transmute(f.ptr(dynasmrt::AssemblyOffset(0))) }; + let t = Instant::now(); + f([0; 0x10000].as_mut_ptr()); + eprintln!("{:?}", Instant::now() - t); } fn main() -> Result<(), Box> { + const USAGE: &str = "usage: "; let mut args = std::env::args().skip(1); - let mode = args.next().ok_or("usage: ")?; - let f = args.next().ok_or("usage: ")?; + let mode = args.next().ok_or(USAGE)?; + let f = args.next().ok_or(USAGE)?; let f = std::fs::read(f)?; let f = parse(f); match &*mode { "interpreter" => run(f), "jit" => jit(f), - _ => Err("usage: ")?, + "jit2" => jit2(f), + _ => Err(USAGE)?, } Ok(()) } -- 2.45.2