~tim/scheme-vm

d1eac914a1b0fb085613cdef0216323faa54c673 — Tim Morgan 3 years ago e4dd824
Use pest instead of rust-peg

The code is ugly, needs refactoring, but it works! And it's faster!

BUT, while optimizing this code, I realized I was doing something really
slow with find the line and column indices for each atom. That was
probably a big part of the slowness with my rust-peg implementation. So
I'd like to go back to the rust-peg implementation and see if I can make
some optimizations to get the speed closer to my implementation using pest.
6 files changed, 89 insertions(+), 99 deletions(-)

M Cargo.toml
D build.rs
M src/atom.rs
M src/lib.rs
A src/lisp.pest
D src/lisp.rustpeg
M Cargo.toml => Cargo.toml +2 -4
@@ 2,7 2,6 @@
name = "scheme-vm"
version = "0.1.0"
authors = ["Tim Morgan <tim@timmorgan.org>"]
build = "build.rs"

[lib]
crate-type = ["dylib"]


@@ 11,6 10,5 @@ crate-type = ["dylib"]
libc = "0.2.37"
ruby-sys = "0.3.0"
lazy_static = "1.0.0"

[build-dependencies]
peg = { git = "https://github.com/kevinmehall/rust-peg", branch = "master" }
pest = "^1.0"
pest_derive = "^1.0"

D build.rs => build.rs +0 -5
@@ 1,5 0,0 @@
extern crate peg;

fn main() {
    peg::cargo_build("src/lisp.rustpeg");
}

M src/atom.rs => src/atom.rs +1 -4
@@ 1,10 1,7 @@
use rb;
use rb::{Value, RB_NIL};

pub fn atom(name: &str, filename: &str, offset: usize, input: &str) -> Value {
    let line = int2rbnum!(&input[..offset].matches("\n").count() + 1);
    let newline_index = *&input[..offset].match_indices("\n").last().unwrap_or((0, "\n")).0;
    let column = int2rbnum!(offset - newline_index);
pub fn atom(name: &str, filename: &str, offset: usize, line: Value, column: Value) -> Value {
    let name_str = rb::str_new(&name.to_string());
    let filename_str = rb::str_new(&filename.to_string());
    let offset_num = int2rbnum!(offset);

M src/lib.rs => src/lib.rs +49 -26
@@ 2,6 2,9 @@ extern crate libc;
extern crate ruby_sys;
#[macro_use]
extern crate lazy_static;
extern crate pest;
#[macro_use]
extern crate pest_derive;

#[macro_use] mod rb;
use rb::{CallbackPtr, Value, RB_NIL};


@@ 9,37 12,57 @@ use rb::{CallbackPtr, Value, RB_NIL};
mod atom;
mod quotes;

mod lisp {
    include!(concat!(env!("OUT_DIR"), "/lisp.rs"));
use pest::Parser;
use atom::atom;
use quotes::QUOTES;

pub mod lisp {
    #[derive(Parser)]
    #[grammar = "lisp.pest"]
    pub struct LispParser;
}

use lisp::*;

fn build_ast(pair: pest::iterators::Pair<lisp::Rule>, filename: &str, newlines: &Vec<usize>) -> Option<Value> {
    match pair.as_rule() {
        lisp::Rule::program | lisp::Rule::simple_sexp | lisp::Rule::quoted_sexp | lisp::Rule::quoted_atom => {
            let mut array = rb::ary_new();
            for p in pair.into_inner() {
                match build_ast(p, filename, newlines) {
                    Some(ast) => array = rb::ary_push(array, ast),
                    None => {}
                }
            }
            Some(array)
        }
        lisp::Rule::simple_atom | lisp::Rule::delimited_identifier_inner => {
            let span = pair.into_span();
            let before: Vec<usize> = newlines.iter().take_while(|i| *i < &span.start()).map(|i| *i).collect();
            let line = int2rbnum!(before.len() + 1);
            let column = int2rbnum!(span.start() - before.last().unwrap_or(&0));
            Some(atom(span.as_str(), filename, span.start(), line, column))
        }
        lisp::Rule::string => Some(rb::str_new(pair.into_span().as_str())),
        lisp::Rule::quote => {
            let q = pair.into_span().as_str();
            Some(rb::str_new(&QUOTES.get(&q).unwrap().to_string()))
        }
        lisp::Rule::comment => None,
        _ => {
            println!("{:?} is unknown", pair.as_rule());
            unreachable!()
        }
    }
}

fn parse_native(rself: Value) -> Value {
    let program_str = rbstr2str!(&rb::ivar_get(&rself, "@code"));
    let code = rbstr2str!(&rb::ivar_get(&rself, "@code"));
    let filename = rbstr2str!(&rb::ivar_get(&rself, "@filename"));
    let newlines = code.match_indices("\n").map(|(i, s)| i).collect();
    rb::gc_disable();
    match lisp::program(&program_str, &filename) {
        Ok(ast) => {
            rb::gc_enable();
            ast
        },
        Err(err) => {
            rb::gc_enable();
            //let expected = rb::vec2rbarr(
                //err.expected.iter().cloned().map(|e| rb::str_new(&e.to_string())).collect()
            //);
            println!("{}", err.line);
            println!("{}", err.column);
            println!("{:?}", err.expected);
            println!("{:?}", &program_str);
            println!("{:?}", &program_str[err.column..]);
            let c_parser = rb::const_get("Parser", &RB_NIL);
            let c_parse_error = rb::const_get("ParseError", &c_parser);
            let line = int2rbnum!(err.line);
            let error = rb::class_new_instance(&c_parse_error, vec![line]);
            rb::raise_instance(&error);
            RB_NIL
        }
    }
    let pairs = LispParser::parse(lisp::Rule::program, code).unwrap_or_else(|e| panic!("{}", e));
    build_ast(pairs.into_iter().next().unwrap(), filename, &newlines).expect("error parsing")
}

#[no_mangle]

A src/lisp.pest => src/lisp.pest +37 -0
@@ 0,0 1,37 @@
whitespace = _{ "\t" | "\n" | " " }

escape = _{ "\\" ~ any }

string = ${ "\"" ~ ((!("\"" | "\\") ~ any) | escape)* ~ "\"" }

delimited_identifier_inner = ${ (!"|" ~ any)+ }

delimited_identifier = _{ "|" ~ delimited_identifier_inner ~ "|" }

simple_atom = ${ (!("(" | ")" | " " | "\t" | "\n" | "[" | "]" | "{" | "}" | "|") ~ any)+ }

quoted_atom = ${ quote ~ simple_atom }

atom = _{ quoted_atom | delimited_identifier | simple_atom }

sexp = _{ quoted_sexp | simple_sexp }

comment = { block_comment | line_comment | datum_comment }

expression = _{ string | comment | sexp | atom }

quote = !{ "'" | ",@" | "," | "`" }

quoted_sexp = ${ quote ~ simple_sexp }

simple_sexp = ${ "(" ~ whitespace* ~ expressions* ~ whitespace* ~ ")" }

block_comment = @{ "#|" ~ (!"|#" ~ any)* ~ "|#" }

line_comment = @{ ";" ~ (!"\n" ~ any)* }

datum_comment = @{ "#;" ~ " "? ~ (sexp | atom) }

expressions = _{ expression ~ (whitespace* ~ expression)* }

program = { soi ~ whitespace* ~ expressions ~ whitespace* ~ eoi }

D src/lisp.rustpeg => src/lisp.rustpeg +0 -60
@@ 1,60 0,0 @@
#![arguments(filename: &str)]

use rb;
use rb::Value;
use atom::atom;
use quotes::QUOTES;

whitespace
	= [ \t\n]*

escape
	= "\\" .

string -> Option<Value>
	= s:$("\"" (escape / [^"])* "\"") { Some(rb::str_new(&s.to_string())) }

delimited_identifier -> &'input str
	= "|" i:$([^|]+) "|" {i}

simple_atom -> Value
	= p:#position a:(delimited_identifier / $([^\(\) \t\n\[\]\{\}\|"]+)) { atom(&a, &filename, p, __input) }

quoted_atom -> Value
	= q:quote a:simple_atom { rb::vec2rbarr(vec![q, a]) }

atom -> Option<Value>
	= a:(quoted_atom / simple_atom) { Some(a) }

sexp -> Option<Value>
	= n:(quoted_sexp / simple_sexp) { Some(n) }

comment -> Option<Value>
	= (block_comment / line_comment / datum_comment) { None }

expression -> Option<Value>
	= string / comment / sexp / atom

quote -> Value
	= q:$("'" / ",@" / "," / "`") { rb::str_new(&QUOTES.get(&q).unwrap().to_string()) }

quoted_sexp -> Value
	= q:quote s:simple_sexp { rb::vec2rbarr(vec![q, s]) }

simple_sexp -> Value
	= "(" s:expressions ")" {s}

block_comment
	= "#|" (!"|#" .)* "|#"

line_comment
	= ";" [^\n]*

datum_comment
	= "#;" " "? (atom / sexp)

expressions -> Value
  = whitespace s:(expression ** whitespace) whitespace { rb::vec2rbarr(s.into_iter().filter_map(|i| i).collect()) }

pub program -> Value
	= expressions