~jojo/microcrisp

1b5b33b97a75c891c3763d8addb1e692a2fdaed2 — JoJo 8 months ago
initial commit
3 files changed, 358 insertions(+), 0 deletions(-)

A .gitignore
A Cargo.toml
A src/lib.rs
A  => .gitignore +2 -0
@@ 1,2 @@
/target
/Cargo.lock

A  => Cargo.toml +9 -0
@@ 1,9 @@
[package]
name = "microcrisp"
authors = ["JoJo <jo@jo.zone>"]
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]

A  => src/lib.rs +347 -0
@@ 1,347 @@
//! Microcrisp -- a simplistic LISP for scripting in embedded contexts
//!
//! A microcrisp is an english microchip that rhymes with lisp.

use std::{
    borrow::Cow,
    cmp::{max, min},
    fmt, str,
};

const SYMBOL_CHARS: &str = "\0abcdefghijklmnopqrstuvxyz+-*?0123456789";
const SYMBOL_MAX_LEN: usize =
    1 + (u128::MAX / (SYMBOL_CHARS.len() - 10) as u128).ilog(SYMBOL_CHARS.len() as u128) as usize;

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Symbol(u128);

impl Symbol {
    pub fn pack_str(s: &str) -> Option<Self> {
        Self::pack(s.as_bytes())
    }

    pub fn pack(s: &[u8]) -> Option<Self> {
        if s.is_empty() || s.len() > SYMBOL_MAX_LEN || s[0].is_ascii_digit() || s[0] == 0 {
            None
        } else {
            let mut acc = SYMBOL_CHARS.find(s[0] as char)? as u128;
            let mut multiplier = (SYMBOL_CHARS.len() - 10) as u128;
            for &c in &s[1..] {
                if c == 0 {
                    return None;
                }
                acc += multiplier * SYMBOL_CHARS.find(c as char)? as u128;
                multiplier *= SYMBOL_CHARS.len() as u128;
            }
            Some(Symbol(acc))
        }
    }

    pub fn unpack_str(self) -> String {
        let mut cs = [0u8; SYMBOL_MAX_LEN + 1];
        let mut acc = self.0;
        let divisor = (SYMBOL_CHARS.len() - 10) as u128;
        cs[0] = SYMBOL_CHARS.as_bytes()[(acc % divisor) as usize];
        acc /= divisor;
        let divisor = SYMBOL_CHARS.len() as u128;
        for c in &mut cs[1..] {
            *c = SYMBOL_CHARS.as_bytes()[(acc % divisor) as usize];
            acc /= divisor;
        }
        std::ffi::CStr::from_bytes_until_nul(&cs).unwrap().to_str().unwrap().to_owned()
    }
}

#[derive(Debug, Clone, PartialEq)]
pub enum Lisp {
    Int(i64),
    Float(f64),
    Symbol(Symbol),
    List(Vec<Lisp>),
    String(String),
}

impl Lisp {
    pub fn as_list(&self) -> Option<&[Lisp]> {
        match self {
            Lisp::List(xs) => Some(xs),
            _ => None,
        }
    }

    pub fn as_int(&self) -> Option<i64> {
        match *self {
            Lisp::Int(x) => Some(x),
            _ => None,
        }
    }

    pub fn as_string(&self) -> Option<&str> {
        match self {
            Lisp::String(s) => Some(s),
            _ => None,
        }
    }
}

impl fmt::Display for Lisp {
    fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
        match self {
            Lisp::Int(x) => x.fmt(f),
            Lisp::Float(x) => x.fmt(f),
            _ => todo!(),
        }
    }
}

type PResult<T, E> = Result<(usize, T), (usize, E)>;

pub fn parse(src: &[u8]) -> Result<Lisp, String> {
    fn context(src: &[u8], i: usize) -> (Cow<str>, Cow<str>) {
        let s0 = String::from_utf8_lossy(&src[max(20, i) - 20..i]);
        let s1 = String::from_utf8_lossy(&src[i..min(src.len(), i + 20)]);
        (s0, s1)
    }

    match parse_(src) {
        Ok((i, x)) if consume_whitespace(&src[i..]) == src.len() - i => Ok(x),
        Ok((i, x)) => {
            let (s0, s1) = context(src, i);
            Err(format!("Leftover (non-whitespace) input after parsing lisp. \"Finished\" at index {} < {}. Context: \"{}<HERE>{}\". Parsed: {:?}", i, src.len(), s0, s1, x))
        }
        Err((i, e)) => {
            let (s0, s1) = context(src, i);
            Err(format!("parsing lisp failed at index {i} of input. Context: \"{s0}<HERE>{s1}\". Expected: {e}"))
        }
    }
}

fn parse_(src: &[u8]) -> PResult<Lisp, &'static str> {
    let mut i = 0;
    i += consume_whitespace(&src[i..]);
    match &src[i..] {
        [] => Err((i, "nonempty input")),
        [b'-', b'0'..=b'9', ..] => {
            i += 1;
            map_ok(offset(i, parse_(&src[i..])), |x| match x {
                Lisp::Int(x) => Lisp::Int(-x),
                Lisp::Float(x) => Lisp::Float(-x),
                _ => x,
            })
        }
        [b'0', b'x', rest @ ..] => {
            i += 2;
            let j = rest.iter().position(|b| !b.is_ascii_hexdigit()).unwrap_or(rest.len());
            match i64::from_str_radix(str::from_utf8(&rest[..j]).unwrap(), 0x10) {
                Ok(x) => Ok((i + j, Lisp::Int(x))),
                Err(_) => Err((i, "hex digits")),
            }
        }
        [b'0', b'b', rest @ ..] => {
            i += 2;
            let j = rest.iter().position(|&b| b != b'0' && b != b'1').unwrap_or(rest.len());
            match i64::from_str_radix(str::from_utf8(&rest[..j]).unwrap(), 0b10) {
                Ok(x) => Ok((i + j, Lisp::Int(x))),
                Err(_) => Err((i, "binary digits")),
            }
        }
        [b'0'..=b'9', rest @ ..] => {
            let i0 = i;
            i += 1;
            let mut dot = false;
            for b in rest {
                match b {
                    b'0'..=b'9' => {}
                    b'.' if !dot => dot = true,
                    _ => break,
                }
                i += 1;
            }
            let s = str::from_utf8(&src[i0..i]).unwrap();
            if dot {
                str::parse(s).map(Lisp::Float).or(Err("float literal"))
            } else {
                str::parse(s).map(Lisp::Int).or(Err("int literal"))
            }
            .map(|x| (i, x))
            .map_err(|e| (i0, e))
        }
        [b, ..] if SYMBOL_CHARS.contains(*b as char) => {
            let s = src[i..].split(|b| !SYMBOL_CHARS.contains(*b as char)).next().unwrap();
            match Symbol::pack(s) {
                Some(symbol) => Ok((i + s.len(), Lisp::Symbol(symbol))),
                None => Err((i, "valid symbol")),
            }
        }
        [b'(', ..] => {
            i += 1;
            let mut xs = Vec::new();
            let err = loop {
                match parse_(&src[i..]) {
                    Ok((n, x)) => {
                        i += n;
                        xs.push(x)
                    }
                    Err(err) => break err,
                }
            };
            i += consume_whitespace(&src[i..]);
            if src[i..].starts_with(&[b')']) {
                Ok((i + 1, Lisp::List(xs)))
            } else {
                Err(err)
            }
        }
        [b'"', ..] => {
            let i0 = i;
            i += 1;
            let mut unescaped = Vec::new();
            while i < src.len() {
                if src[i] == b'"' {
                    return match String::from_utf8(unescaped) {
                        Ok(s) => Ok((i + 1, Lisp::String(s))),
                        Err(_) => Err((i0, "valid utf-8 string literal")),
                    };
                } else if src[i] == b'\\' && i == src.len() - 1 {
                    return Err((i + 1, "escapee of \\ in string literal"));
                } else if src[i] == b'\\' {
                    unescaped.push(match src[i + 1] {
                        b't' => b'\t',
                        b'r' => b'\r',
                        b'n' => b'\n',
                        b'"' => b'"',
                        b'\\' => b'\\',
                        _ => return Err((i + 1, "valid escapee of \\ in string literal")),
                    });
                    i += 2;
                } else {
                    unescaped.push(src[i]);
                    i += 1;
                }
            }
            Err((src.len(), "end quote for string literal"))
        }
        _ => Err((0, "expression")),
    }
}

fn consume_whitespace(s: &[u8]) -> usize {
    s.iter().take_while(|c| c.is_ascii_whitespace()).count()
}

fn offset<T, E>(n: usize, r: PResult<T, E>) -> PResult<T, E> {
    match r {
        Ok((i, x)) => Ok((i + n, x)),
        Err((i, x)) => Err((i + n, x)),
    }
}

fn map_ok<T, U, E>(r: PResult<T, E>, f: impl FnOnce(T) -> U) -> PResult<U, E> {
    r.map(|(n, x)| (n, f(x)))
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn symbol_len_is_24() {
        assert_eq!(SYMBOL_MAX_LEN, 24)
    }

    #[test]
    fn symbol_pack() {
        assert_eq!(Symbol::pack_str("a"), Some(Symbol(1)));
        assert_eq!(Symbol::pack_str(""), None);
        assert_eq!(Symbol::pack_str("0"), None);
        assert_eq!(Symbol::pack(&[0]), None);
    }

    #[test]
    fn symbol_pack_unpack_identity() {
        assert_eq!(Symbol::pack_str("hello").map(Symbol::unpack_str).as_deref(), Some("hello"));
        assert_eq!(Symbol::pack_str("foo-bar").map(Symbol::unpack_str).as_deref(), Some("foo-bar"));
        assert_eq!(
            Symbol::pack_str("this-one-is-24-char-long").map(Symbol::unpack_str).as_deref(),
            Some("this-one-is-24-char-long")
        );
        assert_eq!(Symbol::pack_str("0-cant-begin-with-num").map(Symbol::unpack_str), None);
        assert_eq!(Symbol::pack_str("this-one-is-just-too-long").map(Symbol::unpack_str), None);
    }

    #[test]
    fn parse_decimal_int() {
        assert_eq!(parse(b"0"), Ok(Lisp::Int(0)));
        assert_eq!(parse(b"  1000  "), Ok(Lisp::Int(1000)));
        assert_eq!(parse(b"1234"), Ok(Lisp::Int(1234)));
        assert_eq!(parse(b"-0"), Ok(Lisp::Int(0)));
        assert_eq!(parse(b"-10\n"), Ok(Lisp::Int(-10)));
        assert!(parse(b"1f").is_err());
    }

    #[test]
    fn parse_hexadecimal_int() {
        assert_eq!(parse(b"0x0"), Ok(Lisp::Int(0)));
        assert_eq!(parse(b"0x0F"), Ok(Lisp::Int(15)));
        assert_eq!(parse(b"0x1000"), Ok(Lisp::Int(0x1000)));
        assert_eq!(parse(b"0x1234"), Ok(Lisp::Int(0x1234)));
        assert_eq!(parse(b"0xabc123def"), Ok(Lisp::Int(0xabc123def)));
        assert_eq!(parse(b"\n-0x0"), Ok(Lisp::Int(0)));
        assert_eq!(parse(b"-0x10"), Ok(Lisp::Int(-0x10)));
        assert_eq!(parse(b"-0xff"), Ok(Lisp::Int(-0xff)));
        assert!(parse(b"0xg").is_err());
    }

    #[test]
    fn parse_binary_int() {
        assert_eq!(parse(b"0b0"), Ok(Lisp::Int(0)));
        assert_eq!(parse(b"0b01"), Ok(Lisp::Int(1)));
        assert_eq!(parse(b"0b1000"), Ok(Lisp::Int(0b1000)));
        assert_eq!(parse(b"-0b0"), Ok(Lisp::Int(0)));
        assert_eq!(parse(b"-0b11"), Ok(Lisp::Int(-0b11)));
        assert!(parse(b"0b2").is_err());
    }

    #[test]
    fn parse_symbol() {
        assert_eq!(parse(b"a"), Ok(Lisp::Symbol(Symbol(1))));
        assert_eq!(parse(b" abc "), Ok(Lisp::Symbol(Symbol::pack_str("abc").unwrap())));
        assert_eq!(parse(b"x123"), Ok(Lisp::Symbol(Symbol::pack_str("x123").unwrap())));
        assert_eq!(parse(b"foo-bar"), Ok(Lisp::Symbol(Symbol::pack_str("foo-bar").unwrap())));
        assert_eq!(parse(b"-x"), Ok(Lisp::Symbol(Symbol::pack_str("-x").unwrap())));
        assert!(parse(b"A").is_err());
        assert!(parse(b"aA").is_err());
    }

    #[test]
    fn parse_string() {
        assert_eq!(parse(b"\"\""), Ok(Lisp::String("".to_owned())));
        assert_eq!(parse(b"\"hello world!\""), Ok(Lisp::String("hello world!".to_owned())));
        assert_eq!(parse(r#"  "\"nested!\""   "#.as_bytes()), Ok(Lisp::String("\"nested!\"".to_owned())));
        assert_eq!(parse(r#" "\tfoo" "#.as_bytes()), Ok(Lisp::String("\tfoo".to_owned())));
        assert_eq!(parse(b"\n\"\nfoo\n\"\n"), Ok(Lisp::String("\nfoo\n".to_owned())));
        assert!(parse(b"\"").is_err());
        assert!(parse(b"\"foo").is_err());
        assert!(parse(r#" "\y" "#.as_bytes()).is_err());
        assert!(parse(&[b'"', 128, 128, b'"']).is_err());
    }

    #[test]
    fn parse_list() {
        assert_eq!(parse(b" () "), Ok(Lisp::List(vec![])));
        assert_eq!(parse(b"(1 2)"), Ok(Lisp::List(vec![Lisp::Int(1), Lisp::Int(2)])));
        assert_eq!(
            parse(b"(\na\nb\n)"),
            Ok(Lisp::List(vec![
                Lisp::Symbol(Symbol::pack_str("a").unwrap()),
                Lisp::Symbol(Symbol::pack_str("b").unwrap())
            ]))
        );
        assert_eq!(parse(b"(()()())"), Ok(Lisp::List(vec![Lisp::List(vec![]); 3])));
        assert_eq!(parse(b"(\"(\")"), Ok(Lisp::List(vec![Lisp::String("(".to_owned())])));
        assert_eq!(parse(b"(\")\")"), Ok(Lisp::List(vec![Lisp::String(")".to_owned())])));
        assert!(parse(b"(").is_err());
        assert!(parse(b")").is_err());
        assert!(parse(b"(\")\"").is_err());
    }
}